From 308fd5a1d7a76c9f2db9b0d970427e87dd39abb4 Mon Sep 17 00:00:00 2001 From: Jiangtian Feng Date: Sat, 30 May 2026 22:07:08 +0800 Subject: [PATCH] anolis: mm: mglru: add per-memcg lru_gen_idle_stats interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #36700 When MGLRU is enabled, kidled refuses to run (the two are mutually exclusive — commit 6edd6f0e370e), so there is currently NO per-memcg cold/hot signal available on an MGLRU system: kidled's memory.idle_page_stats is empty and MGLRU only exposes the raw generation dump via memory.lru_gen (which also has a write side that triggers real aging/eviction). Add a read-only cgroup file memory.lru_gen_idle_stats that reshapes the existing MGLRU generation data into a stable per-memcg cold/hot histogram, without any write side effect and without triggering aging. It reuses the same lru_gen_folio fields as lru_gen_print_memcg() but: - aggregates across NUMA nodes into a single per-memcg view, aligning generations by their distance from max_seq (offset 0 = youngest / hottest, larger offsets = older / colder). For a single-node memcg the offset equals max_seq - seq exactly; across nodes per-offset counts are summed and the reported age is the oldest birth at that offset; - reports anon/file sizes in kB instead of raw page counts. Output format (one line per live generation, youngest first): # gen age_ms anon_kb file_kb 0 1200 4096 16384 1 45000 65536 131072 2 300000 262144 524288 The file is exposed in both the cgroup v2 and v1 (legacy) memory controller sets under CONFIG_LRU_GEN, mirroring how memory.lru_gen is registered, and emits nothing when MGLRU is disabled. This is the first, additive step toward unifying the kidled and MGLRU cold/hot views: it does not touch kidled's frozen idle_page_stats contract, the page-flags layout, or the kidled/MGLRU mutual-exclusion interlock. Mapping MGLRU generations onto a richer (per-type / slab) kidled-equivalent histogram, and retiring kidled's full-PFN scan, are left as follow-ups. Signed-off-by: Jiangtian Feng --- include/linux/mmzone.h | 6 ++++ mm/memcontrol.c | 15 ++++++++ mm/vmscan.c | 78 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d8949cf4bc36..cfaedf90a5f1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -590,6 +590,7 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg); void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid); struct seq_file; int lru_gen_print_memcg(struct seq_file *m, struct mem_cgroup *memcg); +int lru_gen_idle_stats_show(struct seq_file *m, struct mem_cgroup *memcg); struct kernfs_open_file; ssize_t lru_gen_memcg_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); @@ -640,6 +641,11 @@ static inline int lru_gen_print_memcg(struct seq_file *m, struct mem_cgroup *mem return 0; } +static inline int lru_gen_idle_stats_show(struct seq_file *m, struct mem_cgroup *memcg) +{ + return 0; +} + struct kernfs_open_file; static inline ssize_t lru_gen_memcg_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0b2ddf4a4640..9e533bf43108 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -8691,6 +8691,13 @@ static ssize_t memcg_lru_gen_write(struct kernfs_open_file *of, { return lru_gen_memcg_write(of, buf, nbytes, off); } + +static int memcg_lru_gen_idle_stats_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_seq(m); + + return lru_gen_idle_stats_show(m, memcg); +} #endif static struct cftype memory_files[] = { @@ -8930,6 +8937,10 @@ static struct cftype memory_files[] = { .seq_show = memcg_lru_gen_show, .write = memcg_lru_gen_write, }, + { + .name = "lru_gen_idle_stats", + .seq_show = memcg_lru_gen_idle_stats_show, + }, #endif { } /* terminate */ }; @@ -10009,6 +10020,10 @@ static struct cftype memsw_files[] = { .seq_show = memcg_lru_gen_show, .write = memcg_lru_gen_write, }, + { + .name = "lru_gen_idle_stats", + .seq_show = memcg_lru_gen_idle_stats_show, + }, #endif { }, /* terminate */ }; diff --git a/mm/vmscan.c b/mm/vmscan.c index 3fe6b3d1a89d..4c9269fc24e4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4492,6 +4492,84 @@ int lru_gen_print_memcg(struct seq_file *m, struct mem_cgroup *memcg) return 0; } +/* + * Read-only per-memcg cold/hot histogram derived from the MGLRU + * generations, aggregated across nodes. Unlike lru_gen_print_memcg() this + * has no write side and produces no aging; it just reshapes the existing + * generation data into a stable cold/hot view in kB. + * + * Generations are aligned across nodes by their distance from max_seq: + * offset 0 is the youngest (hottest) generation, larger offsets are older + * (colder). For a single-node memcg the offset equals max_seq - seq exactly; + * across nodes the per-offset counts are summed and the reported age is the + * oldest birth seen at that offset. + */ +int lru_gen_idle_stats_show(struct seq_file *m, struct mem_cgroup *memcg) +{ + unsigned long anon_pages[MAX_NR_GENS] = {}; + unsigned long file_pages[MAX_NR_GENS] = {}; + unsigned int age_ms[MAX_NR_GENS] = {}; + bool valid[MAX_NR_GENS] = {}; + unsigned long now = jiffies; + int nid, off; + + for_each_node_state(nid, N_MEMORY) { + struct lruvec *lruvec; + struct lru_gen_folio *lrugen; + unsigned long seq; + + lruvec = get_lruvec(memcg, nid); + if (!lruvec) + continue; + + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + lrugen = &lruvec->lrugen; + seq = evictable_min_seq(min_seq, MAX_SWAPPINESS / 2); + + for (; seq <= max_seq; seq++) { + int gen = lru_gen_from_seq(seq); + unsigned long birth = READ_ONCE(lrugen->timestamps[gen]); + unsigned int ms = jiffies_to_msecs(now - birth); + bool anon_valid = seq >= min_seq[LRU_GEN_ANON]; + bool file_valid = seq >= min_seq[LRU_GEN_FILE]; + int zone; + + /* youngest generation (max_seq) maps to offset 0 */ + off = max_seq - seq; + if (off < 0 || off >= MAX_NR_GENS) + continue; + + valid[off] = true; + if (ms > age_ms[off]) + age_ms[off] = ms; + + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + long *np = lrugen->nr_pages[gen][LRU_GEN_ANON]; + long *fp = lrugen->nr_pages[gen][LRU_GEN_FILE]; + + if (anon_valid) + anon_pages[off] += max(READ_ONCE(np[zone]), 0L); + if (file_valid) + file_pages[off] += max(READ_ONCE(fp[zone]), 0L); + } + } + } + + seq_puts(m, "# gen age_ms anon_kb file_kb\n"); + for (off = 0; off < MAX_NR_GENS; off++) { + if (!valid[off]) + continue; + + seq_printf(m, " %5d %11u %12lu %12lu\n", off, age_ms[off], + anon_pages[off] << (PAGE_SHIFT - 10), + file_pages[off] << (PAGE_SHIFT - 10)); + } + + return 0; +} + static int run_aging(struct lruvec *lruvec, unsigned long seq, int swappiness, bool force_scan); -- Gitee