diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 996bb3c5e16d..9c321c7ddc18 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -127,7 +127,8 @@ compaction_proactiveness This tunable takes a value in the range [0, 100] with a default value of 20. This tunable determines how aggressively compaction is done in the -background. Setting it to 0 disables proactive compaction. +background. On write of non zero value to this tunable will immediately +trigger the proactive compaction. Setting it to 0 disables proactive compaction. Note that compaction has a non-trivial system-wide impact as pages belonging to different processes are moved around, which could also lead diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 5734e3cf4792..f1eb9f943357 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -85,6 +85,8 @@ extern int sysctl_compact_memory; extern unsigned int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); +extern int compaction_proactiveness_sysctl_handler(struct ctl_table *table, + int write, void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b002e9b4abc0..6aaebd16ba6d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -783,6 +783,7 @@ typedef struct pglist_data { enum zone_type kcompactd_highest_zoneidx; wait_queue_head_t kcompactd_wait; struct task_struct *kcompactd; + bool proactive_compact_trigger; #endif /* * This is a per-node reserve of pages that are not available diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 20dfb9427fdf..7aed10c7dea9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2858,7 +2858,7 @@ static struct ctl_table vm_table[] = { .data = &sysctl_compaction_proactiveness, .maxlen = sizeof(sysctl_compaction_proactiveness), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = compaction_proactiveness_sysctl_handler, .extra1 = SYSCTL_ZERO, .extra2 = &one_hundred, }, diff --git a/mm/compaction.c b/mm/compaction.c index 8f28896cfc6c..8284fb8cb896 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2650,6 +2650,30 @@ int sysctl_compact_memory; */ unsigned int __read_mostly sysctl_compaction_proactiveness = 20; +int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, loff_t *ppos) +{ + int rc, nid; + + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (rc) + return rc; + + if (write && sysctl_compaction_proactiveness) { + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + + if (pgdat->proactive_compact_trigger) + continue; + + pgdat->proactive_compact_trigger = true; + wake_up_interruptible(&pgdat->kcompactd_wait); + } + } + + return 0; +} + /* * This is the entry point for compacting all nodes via * /proc/sys/vm/compact_memory @@ -2694,7 +2718,8 @@ void compaction_unregister_node(struct node *node) static inline bool kcompactd_work_requested(pg_data_t *pgdat) { - return pgdat->kcompactd_max_order > 0 || kthread_should_stop(); + return pgdat->kcompactd_max_order > 0 || kthread_should_stop() || + pgdat->proactive_compact_trigger; } static bool kcompactd_node_suitable(pg_data_t *pgdat) @@ -2847,7 +2872,8 @@ static int kcompactd(void *p) trace_mm_compaction_kcompactd_sleep(pgdat->node_id); if (wait_event_freezable_timeout(pgdat->kcompactd_wait, kcompactd_work_requested(pgdat), - msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) { + msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC)) && + !pgdat->proactive_compact_trigger) { psi_memstall_enter(&pflags); kcompactd_do_work(pgdat); @@ -2859,10 +2885,20 @@ static int kcompactd(void *p) if (should_proactive_compact_node(pgdat)) { unsigned int prev_score, score; - if (proactive_defer) { + /* + * On wakeup of proactive compaction by sysctl + * write, ignore the accumulated defer score. + * Anyway, if the proactive compaction didn't + * make any progress for the new value, it will + * be further deferred by 2^COMPACT_MAX_DEFER_SHIFT + * times. + */ + if (proactive_defer && + !pgdat->proactive_compact_trigger) { proactive_defer--; continue; } + prev_score = fragmentation_score_node(pgdat); proactive_compact_node(pgdat); score = fragmentation_score_node(pgdat); @@ -2873,6 +2909,8 @@ static int kcompactd(void *p) proactive_defer = score < prev_score ? 0 : 1 << COMPACT_MAX_DEFER_SHIFT; } + if (pgdat->proactive_compact_trigger) + pgdat->proactive_compact_trigger = false; } return 0;