Discussion:
[patch 0/4] mm: memcontrol: remove the page_cgroup->flags field
Johannes Weiner
2014-10-20 15:22:08 UTC
Permalink
Hi,

this series gets rid of the remaining page_cgroup flags, thus cutting
the memcg per-page overhead down to one pointer.

include/linux/page_cgroup.h | 12 ----
mm/memcontrol.c | 154 ++++++++++++++++++------------------------
2 files changed, 64 insertions(+), 102 deletions(-)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Johannes Weiner
2014-10-20 15:22:10 UTC
Permalink
Now that mem_cgroup_swapout() fully uncharges the page, every page
that is still in use when reaching mem_cgroup_uncharge() is known to
carry both the memory and the memory+swap charge. Simplify the
uncharge path and remove the PCG_MEMSW page flag accordingly.

Signed-off-by: Johannes Weiner <***@cmpxchg.org>
---
include/linux/page_cgroup.h | 1 -
mm/memcontrol.c | 34 ++++++++++++----------------------
2 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 5c831f1eca79..da62ee2be28b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -5,7 +5,6 @@ enum {
/* flags for mem_cgroup */
PCG_USED = 0x01, /* This page is charged to a memcg */
PCG_MEM = 0x02, /* This page holds a memory charge */
- PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
};

struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7709f17347f3..9bab35fc3e9e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
+ pc->flags = PCG_USED | PCG_MEM;

if (lrucare)
unlock_page_lru(page, isolated);
@@ -5815,7 +5815,6 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (!PageCgroupUsed(pc))
return;

- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
memcg = pc->mem_cgroup;

oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
@@ -6010,17 +6009,16 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
}

static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
- unsigned long nr_mem, unsigned long nr_memsw,
unsigned long nr_anon, unsigned long nr_file,
unsigned long nr_huge, struct page *dummy_page)
{
+ unsigned long nr_pages = nr_anon + nr_file;
unsigned long flags;

if (!mem_cgroup_is_root(memcg)) {
- if (nr_mem)
- page_counter_uncharge(&memcg->memory, nr_mem);
- if (nr_memsw)
- page_counter_uncharge(&memcg->memsw, nr_memsw);
+ page_counter_uncharge(&memcg->memory, nr_pages);
+ if (do_swap_account)
+ page_counter_uncharge(&memcg->memsw, nr_pages);
memcg_oom_recover(memcg);
}

@@ -6029,23 +6027,21 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
- __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+ __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
memcg_check_events(memcg, dummy_page);
local_irq_restore(flags);

if (!mem_cgroup_is_root(memcg))
- css_put_many(&memcg->css, max(nr_mem, nr_memsw));
+ css_put_many(&memcg->css, nr_pages);
}

static void uncharge_list(struct list_head *page_list)
{
struct mem_cgroup *memcg = NULL;
- unsigned long nr_memsw = 0;
unsigned long nr_anon = 0;
unsigned long nr_file = 0;
unsigned long nr_huge = 0;
unsigned long pgpgout = 0;
- unsigned long nr_mem = 0;
struct list_head *next;
struct page *page;

@@ -6072,10 +6068,9 @@ static void uncharge_list(struct list_head *page_list)

if (memcg != pc->mem_cgroup) {
if (memcg) {
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
- pgpgout = nr_mem = nr_memsw = 0;
- nr_anon = nr_file = nr_huge = 0;
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
+ pgpgout = nr_anon = nr_file = nr_huge = 0;
}
memcg = pc->mem_cgroup;
}
@@ -6091,18 +6086,14 @@ static void uncharge_list(struct list_head *page_list)
else
nr_file += nr_pages;

- if (pc->flags & PCG_MEM)
- nr_mem += nr_pages;
- if (pc->flags & PCG_MEMSW)
- nr_memsw += nr_pages;
pc->flags = 0;

pgpgout++;
} while (next != page_list);

if (memcg)
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
}

/**
@@ -6187,7 +6178,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
return;

VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
- VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);

if (lrucare)
lock_page_lru(oldpage, &isolated);
--
2.1.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Kamezawa Hiroyuki
2014-10-22 01:50:25 UTC
Permalink
Post by Johannes Weiner
Now that mem_cgroup_swapout() fully uncharges the page, every page
that is still in use when reaching mem_cgroup_uncharge() is known to
carry both the memory and the memory+swap charge. Simplify the
uncharge path and remove the PCG_MEMSW page flag accordingly.
---
include/linux/page_cgroup.h | 1 -
mm/memcontrol.c | 34 ++++++++++++----------------------
2 files changed, 12 insertions(+), 23 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 5c831f1eca79..da62ee2be28b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -5,7 +5,6 @@ enum {
/* flags for mem_cgroup */
PCG_USED = 0x01, /* This page is charged to a memcg */
PCG_MEM = 0x02, /* This page holds a memory charge */
- PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
};
struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7709f17347f3..9bab35fc3e9e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
+ pc->flags = PCG_USED | PCG_MEM;
if (lrucare)
unlock_page_lru(page, isolated);
@@ -5815,7 +5815,6 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (!PageCgroupUsed(pc))
return;
- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
memcg = pc->mem_cgroup;
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
@@ -6010,17 +6009,16 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
}
static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
- unsigned long nr_mem, unsigned long nr_memsw,
unsigned long nr_anon, unsigned long nr_file,
unsigned long nr_huge, struct page *dummy_page)
{
+ unsigned long nr_pages = nr_anon + nr_file;
unsigned long flags;
if (!mem_cgroup_is_root(memcg)) {
- if (nr_mem)
- page_counter_uncharge(&memcg->memory, nr_mem);
- if (nr_memsw)
- page_counter_uncharge(&memcg->memsw, nr_memsw);
+ page_counter_uncharge(&memcg->memory, nr_pages);
+ if (do_swap_account)
+ page_counter_uncharge(&memcg->memsw, nr_pages);
memcg_oom_recover(memcg);
}
@@ -6029,23 +6027,21 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
- __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+ __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
memcg_check_events(memcg, dummy_page);
local_irq_restore(flags);
if (!mem_cgroup_is_root(memcg))
- css_put_many(&memcg->css, max(nr_mem, nr_memsw));
+ css_put_many(&memcg->css, nr_pages);
}
static void uncharge_list(struct list_head *page_list)
{
struct mem_cgroup *memcg = NULL;
- unsigned long nr_memsw = 0;
unsigned long nr_anon = 0;
unsigned long nr_file = 0;
unsigned long nr_huge = 0;
unsigned long pgpgout = 0;
- unsigned long nr_mem = 0;
struct list_head *next;
struct page *page;
@@ -6072,10 +6068,9 @@ static void uncharge_list(struct list_head *page_list)
if (memcg != pc->mem_cgroup) {
if (memcg) {
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
- pgpgout = nr_mem = nr_memsw = 0;
- nr_anon = nr_file = nr_huge = 0;
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
+ pgpgout = nr_anon = nr_file = nr_huge = 0;
}
memcg = pc->mem_cgroup;
}
@@ -6091,18 +6086,14 @@ static void uncharge_list(struct list_head *page_list)
else
nr_file += nr_pages;
- if (pc->flags & PCG_MEM)
- nr_mem += nr_pages;
- if (pc->flags & PCG_MEMSW)
- nr_memsw += nr_pages;
pc->flags = 0;
pgpgout++;
} while (next != page_list);
if (memcg)
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
}
/**
@@ -6187,7 +6178,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
- VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
if (lrucare)
lock_page_lru(oldpage, &isolated);
Vladimir Davydov
2014-10-22 15:43:02 UTC
Permalink
Post by Johannes Weiner
Now that mem_cgroup_swapout() fully uncharges the page, every page
that is still in use when reaching mem_cgroup_uncharge() is known to
carry both the memory and the memory+swap charge. Simplify the
uncharge path and remove the PCG_MEMSW page flag accordingly.
---
include/linux/page_cgroup.h | 1 -
mm/memcontrol.c | 34 ++++++++++++----------------------
2 files changed, 12 insertions(+), 23 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 5c831f1eca79..da62ee2be28b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -5,7 +5,6 @@ enum {
/* flags for mem_cgroup */
PCG_USED = 0x01, /* This page is charged to a memcg */
PCG_MEM = 0x02, /* This page holds a memory charge */
- PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
};
struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7709f17347f3..9bab35fc3e9e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
+ pc->flags = PCG_USED | PCG_MEM;
if (lrucare)
unlock_page_lru(page, isolated);
@@ -5815,7 +5815,6 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (!PageCgroupUsed(pc))
return;
- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
memcg = pc->mem_cgroup;
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
@@ -6010,17 +6009,16 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
}
static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
- unsigned long nr_mem, unsigned long nr_memsw,
unsigned long nr_anon, unsigned long nr_file,
unsigned long nr_huge, struct page *dummy_page)
{
+ unsigned long nr_pages = nr_anon + nr_file;
unsigned long flags;
if (!mem_cgroup_is_root(memcg)) {
- if (nr_mem)
- page_counter_uncharge(&memcg->memory, nr_mem);
- if (nr_memsw)
- page_counter_uncharge(&memcg->memsw, nr_memsw);
+ page_counter_uncharge(&memcg->memory, nr_pages);
+ if (do_swap_account)
+ page_counter_uncharge(&memcg->memsw, nr_pages);
memcg_oom_recover(memcg);
}
@@ -6029,23 +6027,21 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
- __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+ __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
memcg_check_events(memcg, dummy_page);
local_irq_restore(flags);
if (!mem_cgroup_is_root(memcg))
- css_put_many(&memcg->css, max(nr_mem, nr_memsw));
+ css_put_many(&memcg->css, nr_pages);
}
static void uncharge_list(struct list_head *page_list)
{
struct mem_cgroup *memcg = NULL;
- unsigned long nr_memsw = 0;
unsigned long nr_anon = 0;
unsigned long nr_file = 0;
unsigned long nr_huge = 0;
unsigned long pgpgout = 0;
- unsigned long nr_mem = 0;
struct list_head *next;
struct page *page;
@@ -6072,10 +6068,9 @@ static void uncharge_list(struct list_head *page_list)
if (memcg != pc->mem_cgroup) {
if (memcg) {
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
- pgpgout = nr_mem = nr_memsw = 0;
- nr_anon = nr_file = nr_huge = 0;
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
+ pgpgout = nr_anon = nr_file = nr_huge = 0;
}
memcg = pc->mem_cgroup;
}
@@ -6091,18 +6086,14 @@ static void uncharge_list(struct list_head *page_list)
else
nr_file += nr_pages;
- if (pc->flags & PCG_MEM)
- nr_mem += nr_pages;
- if (pc->flags & PCG_MEMSW)
- nr_memsw += nr_pages;
pc->flags = 0;
pgpgout++;
} while (next != page_list);
if (memcg)
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
}
/**
@@ -6187,7 +6178,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
- VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
if (lrucare)
lock_page_lru(oldpage, &isolated);
--
2.1.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Michal Hocko
2014-10-22 15:46:06 UTC
Permalink
Post by Johannes Weiner
Now that mem_cgroup_swapout() fully uncharges the page, every page
that is still in use when reaching mem_cgroup_uncharge() is known to
carry both the memory and the memory+swap charge. Simplify the
uncharge path and remove the PCG_MEMSW page flag accordingly.
Looks good
Post by Johannes Weiner
---
include/linux/page_cgroup.h | 1 -
mm/memcontrol.c | 34 ++++++++++++----------------------
2 files changed, 12 insertions(+), 23 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 5c831f1eca79..da62ee2be28b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -5,7 +5,6 @@ enum {
/* flags for mem_cgroup */
PCG_USED = 0x01, /* This page is charged to a memcg */
PCG_MEM = 0x02, /* This page holds a memory charge */
- PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
};
struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7709f17347f3..9bab35fc3e9e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
+ pc->flags = PCG_USED | PCG_MEM;
if (lrucare)
unlock_page_lru(page, isolated);
@@ -5815,7 +5815,6 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (!PageCgroupUsed(pc))
return;
- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
memcg = pc->mem_cgroup;
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
@@ -6010,17 +6009,16 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
}
static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
- unsigned long nr_mem, unsigned long nr_memsw,
unsigned long nr_anon, unsigned long nr_file,
unsigned long nr_huge, struct page *dummy_page)
{
+ unsigned long nr_pages = nr_anon + nr_file;
unsigned long flags;
if (!mem_cgroup_is_root(memcg)) {
- if (nr_mem)
- page_counter_uncharge(&memcg->memory, nr_mem);
- if (nr_memsw)
- page_counter_uncharge(&memcg->memsw, nr_memsw);
+ page_counter_uncharge(&memcg->memory, nr_pages);
+ if (do_swap_account)
+ page_counter_uncharge(&memcg->memsw, nr_pages);
memcg_oom_recover(memcg);
}
@@ -6029,23 +6027,21 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
- __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+ __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
memcg_check_events(memcg, dummy_page);
local_irq_restore(flags);
if (!mem_cgroup_is_root(memcg))
- css_put_many(&memcg->css, max(nr_mem, nr_memsw));
+ css_put_many(&memcg->css, nr_pages);
}
static void uncharge_list(struct list_head *page_list)
{
struct mem_cgroup *memcg = NULL;
- unsigned long nr_memsw = 0;
unsigned long nr_anon = 0;
unsigned long nr_file = 0;
unsigned long nr_huge = 0;
unsigned long pgpgout = 0;
- unsigned long nr_mem = 0;
struct list_head *next;
struct page *page;
@@ -6072,10 +6068,9 @@ static void uncharge_list(struct list_head *page_list)
if (memcg != pc->mem_cgroup) {
if (memcg) {
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
- pgpgout = nr_mem = nr_memsw = 0;
- nr_anon = nr_file = nr_huge = 0;
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
+ pgpgout = nr_anon = nr_file = nr_huge = 0;
}
memcg = pc->mem_cgroup;
}
@@ -6091,18 +6086,14 @@ static void uncharge_list(struct list_head *page_list)
else
nr_file += nr_pages;
- if (pc->flags & PCG_MEM)
- nr_mem += nr_pages;
- if (pc->flags & PCG_MEMSW)
- nr_memsw += nr_pages;
pc->flags = 0;
pgpgout++;
} while (next != page_list);
if (memcg)
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
}
/**
@@ -6187,7 +6178,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
- VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
if (lrucare)
lock_page_lru(oldpage, &isolated);
--
2.1.2
--
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Johannes Weiner
2014-10-20 15:22:11 UTC
Permalink
PCG_MEM is a remnant from an earlier version of 0a31bc97c80c ("mm:
memcontrol: rewrite uncharge API"), used to tell whether migration
cleared a charge while leaving pc->mem_cgroup valid and PCG_USED set.
But in the final version, mem_cgroup_migrate() directly uncharges the
source page, rendering this distinction unnecessary. Remove it.

Signed-off-by: Johannes Weiner <***@cmpxchg.org>
---
include/linux/page_cgroup.h | 1 -
mm/memcontrol.c | 4 +---
2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index da62ee2be28b..97536e685843 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -4,7 +4,6 @@
enum {
/* flags for mem_cgroup */
PCG_USED = 0x01, /* This page is charged to a memcg */
- PCG_MEM = 0x02, /* This page holds a memory charge */
};

struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9bab35fc3e9e..1d66ac49e702 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM;
+ pc->flags = PCG_USED;

if (lrucare)
unlock_page_lru(page, isolated);
@@ -6177,8 +6177,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
if (!PageCgroupUsed(pc))
return;

- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
-
if (lrucare)
lock_page_lru(oldpage, &isolated);
--
2.1.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Kamezawa Hiroyuki
2014-10-22 01:52:05 UTC
Permalink
Post by Johannes Weiner
memcontrol: rewrite uncharge API"), used to tell whether migration
cleared a charge while leaving pc->mem_cgroup valid and PCG_USED set.
But in the final version, mem_cgroup_migrate() directly uncharges the
source page, rendering this distinction unnecessary. Remove it.
---
include/linux/page_cgroup.h | 1 -
mm/memcontrol.c | 4 +---
2 files changed, 1 insertion(+), 4 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index da62ee2be28b..97536e685843 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -4,7 +4,6 @@
enum {
/* flags for mem_cgroup */
PCG_USED = 0x01, /* This page is charged to a memcg */
- PCG_MEM = 0x02, /* This page holds a memory charge */
};
struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9bab35fc3e9e..1d66ac49e702 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM;
+ pc->flags = PCG_USED;
if (lrucare)
unlock_page_lru(page, isolated);
@@ -6177,8 +6177,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
if (!PageCgroupUsed(pc))
return;
- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
-
if (lrucare)
lock_page_lru(oldpage, &isolated);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Vladimir Davydov
2014-10-22 15:44:57 UTC
Permalink
Post by Johannes Weiner
memcontrol: rewrite uncharge API"), used to tell whether migration
cleared a charge while leaving pc->mem_cgroup valid and PCG_USED set.
But in the final version, mem_cgroup_migrate() directly uncharges the
source page, rendering this distinction unnecessary. Remove it.
---
include/linux/page_cgroup.h | 1 -
mm/memcontrol.c | 4 +---
2 files changed, 1 insertion(+), 4 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index da62ee2be28b..97536e685843 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -4,7 +4,6 @@
enum {
/* flags for mem_cgroup */
PCG_USED = 0x01, /* This page is charged to a memcg */
- PCG_MEM = 0x02, /* This page holds a memory charge */
};
struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9bab35fc3e9e..1d66ac49e702 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM;
+ pc->flags = PCG_USED;
if (lrucare)
unlock_page_lru(page, isolated);
@@ -6177,8 +6177,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
if (!PageCgroupUsed(pc))
return;
- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
-
if (lrucare)
lock_page_lru(oldpage, &isolated);
--
2.1.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Michal Hocko
2014-10-22 15:49:18 UTC
Permalink
Post by Johannes Weiner
memcontrol: rewrite uncharge API"), used to tell whether migration
cleared a charge while leaving pc->mem_cgroup valid and PCG_USED set.
But in the final version, mem_cgroup_migrate() directly uncharges the
source page, rendering this distinction unnecessary. Remove it.
---
include/linux/page_cgroup.h | 1 -
mm/memcontrol.c | 4 +---
2 files changed, 1 insertion(+), 4 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index da62ee2be28b..97536e685843 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -4,7 +4,6 @@
enum {
/* flags for mem_cgroup */
PCG_USED = 0x01, /* This page is charged to a memcg */
- PCG_MEM = 0x02, /* This page holds a memory charge */
};
struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9bab35fc3e9e..1d66ac49e702 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2606,7 +2606,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM;
+ pc->flags = PCG_USED;
if (lrucare)
unlock_page_lru(page, isolated);
@@ -6177,8 +6177,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
if (!PageCgroupUsed(pc))
return;
- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
-
if (lrucare)
lock_page_lru(oldpage, &isolated);
--
2.1.2
--
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Johannes Weiner
2014-10-20 15:22:12 UTC
Permalink
pc->mem_cgroup had to be left intact after uncharge for the final LRU
removal, and !PCG_USED indicated whether the page was uncharged. But
since 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") pages are
uncharged after the final LRU removal. Uncharge can simply clear the
pointer and the PCG_USED/PageCgroupUsed sites can test that instead.

Because this is the last page_cgroup flag, this patch reduces the
memcg per-page overhead to a single pointer.

Signed-off-by: Johannes Weiner <***@cmpxchg.org>
---
include/linux/page_cgroup.h | 10 -----
mm/memcontrol.c | 107 +++++++++++++++++---------------------------
2 files changed, 42 insertions(+), 75 deletions(-)

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 97536e685843..1289be6b436c 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -1,11 +1,6 @@
#ifndef __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H

-enum {
- /* flags for mem_cgroup */
- PCG_USED = 0x01, /* This page is charged to a memcg */
-};
-
struct pglist_data;

#ifdef CONFIG_MEMCG
@@ -19,7 +14,6 @@ struct mem_cgroup;
* then the page cgroup for pfn always exists.
*/
struct page_cgroup {
- unsigned long flags;
struct mem_cgroup *mem_cgroup;
};

@@ -39,10 +33,6 @@ static inline void page_cgroup_init(void)

struct page_cgroup *lookup_page_cgroup(struct page *page);

-static inline int PageCgroupUsed(struct page_cgroup *pc)
-{
- return !!(pc->flags & PCG_USED);
-}
#else /* !CONFIG_MEMCG */
struct page_cgroup;

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1d66ac49e702..48d49c6b08d1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1284,14 +1284,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)

pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
-
/*
* Swapcache readahead pages are added to the LRU - and
- * possibly migrated - before they are charged. Ensure
- * pc->mem_cgroup is sane.
+ * possibly migrated - before they are charged.
*/
- if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
- pc->mem_cgroup = memcg = root_mem_cgroup;
+ if (!memcg)
+ memcg = root_mem_cgroup;

mz = mem_cgroup_page_zoneinfo(memcg, page);
lruvec = &mz->lruvec;
@@ -2141,7 +2139,7 @@ void __mem_cgroup_begin_update_page_stat(struct page *page,
pc = lookup_page_cgroup(page);
again:
memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
+ if (unlikely(!memcg))
return;
/*
* If this memory cgroup is not under account moving, we don't
@@ -2154,7 +2152,7 @@ again:
return;

move_lock_mem_cgroup(memcg, flags);
- if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
+ if (memcg != pc->mem_cgroup) {
move_unlock_mem_cgroup(memcg, flags);
goto again;
}
@@ -2186,7 +2184,7 @@ void mem_cgroup_update_page_stat(struct page *page,

pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
+ if (unlikely(!memcg))
return;

this_cpu_add(memcg->stat->count[idx], val);
@@ -2525,9 +2523,10 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
VM_BUG_ON_PAGE(!PageLocked(page), page);

pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- if (memcg && !css_tryget_online(&memcg->css))
+ memcg = pc->mem_cgroup;
+
+ if (memcg) {
+ if (!css_tryget_online(&memcg->css))
memcg = NULL;
} else if (PageSwapCache(page)) {
ent.val = page_private(page);
@@ -2578,7 +2577,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
struct page_cgroup *pc = lookup_page_cgroup(page);
int isolated;

- VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
+ VM_BUG_ON_PAGE(pc->mem_cgroup, page);
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
* accessed by any other context at this point.
@@ -2593,7 +2592,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,

/*
* Nobody should be changing or seriously looking at
- * pc->mem_cgroup and pc->flags at this point:
+ * pc->mem_cgroup at this point:
*
* - the page is uncharged
*
@@ -2606,7 +2605,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED;

if (lrucare)
unlock_page_lru(page, isolated);
@@ -3120,37 +3118,22 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
memcg_uncharge_kmem(memcg, 1 << order);
return;
}
- /*
- * The page is freshly allocated and not visible to any
- * outside callers yet. Set up pc non-atomically.
- */
pc = lookup_page_cgroup(page);
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED;
}

void __memcg_kmem_uncharge_pages(struct page *page, int order)
{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
-
-
- pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
- return;
-
- memcg = pc->mem_cgroup;
- pc->flags = 0;
+ struct page_cgroup *pc = lookup_page_cgroup(page);
+ struct mem_cgroup *memcg = pc->mem_cgroup;

- /*
- * We trust that only if there is a memcg associated with the page, it
- * is a valid allocation
- */
if (!memcg)
return;

VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
+
memcg_uncharge_kmem(memcg, 1 << order);
+ pc->mem_cgroup = NULL;
}
#else
static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
@@ -3168,21 +3151,16 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
*/
void mem_cgroup_split_huge_fixup(struct page *head)
{
- struct page_cgroup *head_pc = lookup_page_cgroup(head);
- struct page_cgroup *pc;
- struct mem_cgroup *memcg;
+ struct page_cgroup *pc = lookup_page_cgroup(head);
int i;

if (mem_cgroup_disabled())
return;

- memcg = head_pc->mem_cgroup;
- for (i = 1; i < HPAGE_PMD_NR; i++) {
- pc = head_pc + i;
- pc->mem_cgroup = memcg;
- pc->flags = head_pc->flags;
- }
- __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
+ for (i = 1; i < HPAGE_PMD_NR; i++)
+ pc[i].mem_cgroup = pc[0].mem_cgroup;
+
+ __this_cpu_sub(pc[0].mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
HPAGE_PMD_NR);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -3232,7 +3210,7 @@ static int mem_cgroup_move_account(struct page *page,
goto out;

ret = -EINVAL;
- if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
+ if (pc->mem_cgroup != from)
goto out_unlock;

move_lock_mem_cgroup(from, &flags);
@@ -3342,7 +3320,7 @@ static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
* the first time, i.e. during boot or memory hotplug;
* or when mem_cgroup_disabled().
*/
- if (likely(pc) && PageCgroupUsed(pc))
+ if (likely(pc) && pc->mem_cgroup)
return pc;
return NULL;
}
@@ -3360,10 +3338,8 @@ void mem_cgroup_print_bad_page(struct page *page)
struct page_cgroup *pc;

pc = lookup_page_cgroup_used(page);
- if (pc) {
- pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
- pc, pc->flags, pc->mem_cgroup);
- }
+ if (pc)
+ pr_alert("pc:%p pc->mem_cgroup:%p\n", pc, pc->mem_cgroup);
}
#endif

@@ -5330,7 +5306,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
* mem_cgroup_move_account() checks the pc is valid or
* not under LRU exclusion.
*/
- if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+ if (pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target)
target->page = page;
@@ -5366,7 +5342,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
if (!move_anon())
return ret;
pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+ if (pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target) {
get_page(page);
@@ -5810,18 +5786,17 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;

pc = lookup_page_cgroup(page);
+ memcg = pc->mem_cgroup;

/* Readahead page, never charged */
- if (!PageCgroupUsed(pc))
+ if (!memcg)
return;

- memcg = pc->mem_cgroup;
-
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
mem_cgroup_swap_statistics(memcg, true);

- pc->flags = 0;
+ pc->mem_cgroup = NULL;

if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, 1);
@@ -5895,7 +5870,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
* the page lock, which serializes swap cache removal, which
* in turn serializes uncharging.
*/
- if (PageCgroupUsed(pc))
+ if (pc->mem_cgroup)
goto out;
}

@@ -6057,13 +6032,13 @@ static void uncharge_list(struct list_head *page_list)
VM_BUG_ON_PAGE(page_count(page), page);

pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
+ if (!pc->mem_cgroup)
continue;

/*
* Nobody should be changing or seriously looking at
- * pc->mem_cgroup and pc->flags at this point, we have
- * fully exclusive access to the page.
+ * pc->mem_cgroup at this point, we have fully
+ * exclusive access to the page.
*/

if (memcg != pc->mem_cgroup) {
@@ -6086,7 +6061,7 @@ static void uncharge_list(struct list_head *page_list)
else
nr_file += nr_pages;

- pc->flags = 0;
+ pc->mem_cgroup = NULL;

pgpgout++;
} while (next != page_list);
@@ -6112,7 +6087,7 @@ void mem_cgroup_uncharge(struct page *page)

/* Don't touch page->lru of any random page, pre-check: */
pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
+ if (!pc->mem_cgroup)
return;

INIT_LIST_HEAD(&page->lru);
@@ -6148,6 +6123,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
bool lrucare)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
int isolated;

@@ -6164,7 +6140,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,

/* Page cache replacement: new page already charged? */
pc = lookup_page_cgroup(newpage);
- if (PageCgroupUsed(pc))
+ if (pc->mem_cgroup)
return;

/*
@@ -6174,18 +6150,19 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
* reclaim just put back on the LRU but has not released yet.
*/
pc = lookup_page_cgroup(oldpage);
- if (!PageCgroupUsed(pc))
+ memcg = pc->mem_cgroup;
+ if (!memcg)
return;

if (lrucare)
lock_page_lru(oldpage, &isolated);

- pc->flags = 0;
+ pc->mem_cgroup = NULL;

if (lrucare)
unlock_page_lru(oldpage, isolated);

- commit_charge(newpage, pc->mem_cgroup, lrucare);
+ commit_charge(newpage, memcg, lrucare);
}

/*
--
2.1.2
Kamezawa Hiroyuki
2014-10-22 01:54:59 UTC
Permalink
Post by Johannes Weiner
pc->mem_cgroup had to be left intact after uncharge for the final LRU
removal, and !PCG_USED indicated whether the page was uncharged. But
since 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") pages are
uncharged after the final LRU removal. Uncharge can simply clear the
pointer and the PCG_USED/PageCgroupUsed sites can test that instead.
Because this is the last page_cgroup flag, this patch reduces the
memcg per-page overhead to a single pointer.
awesome.
Post by Johannes Weiner
---
include/linux/page_cgroup.h | 10 -----
mm/memcontrol.c | 107 +++++++++++++++++---------------------------
2 files changed, 42 insertions(+), 75 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 97536e685843..1289be6b436c 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -1,11 +1,6 @@
#ifndef __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H
-enum {
- /* flags for mem_cgroup */
- PCG_USED = 0x01, /* This page is charged to a memcg */
-};
-
struct pglist_data;
#ifdef CONFIG_MEMCG
@@ -19,7 +14,6 @@ struct mem_cgroup;
* then the page cgroup for pfn always exists.
*/
struct page_cgroup {
- unsigned long flags;
struct mem_cgroup *mem_cgroup;
};
@@ -39,10 +33,6 @@ static inline void page_cgroup_init(void)
struct page_cgroup *lookup_page_cgroup(struct page *page);
-static inline int PageCgroupUsed(struct page_cgroup *pc)
-{
- return !!(pc->flags & PCG_USED);
-}
#else /* !CONFIG_MEMCG */
struct page_cgroup;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1d66ac49e702..48d49c6b08d1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1284,14 +1284,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
-
/*
* Swapcache readahead pages are added to the LRU - and
- * possibly migrated - before they are charged. Ensure
- * pc->mem_cgroup is sane.
+ * possibly migrated - before they are charged.
*/
- if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
- pc->mem_cgroup = memcg = root_mem_cgroup;
+ if (!memcg)
+ memcg = root_mem_cgroup;
mz = mem_cgroup_page_zoneinfo(memcg, page);
lruvec = &mz->lruvec;
@@ -2141,7 +2139,7 @@ void __mem_cgroup_begin_update_page_stat(struct page *page,
pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
+ if (unlikely(!memcg))
return;
/*
* If this memory cgroup is not under account moving, we don't
return;
move_lock_mem_cgroup(memcg, flags);
- if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
+ if (memcg != pc->mem_cgroup) {
move_unlock_mem_cgroup(memcg, flags);
goto again;
}
@@ -2186,7 +2184,7 @@ void mem_cgroup_update_page_stat(struct page *page,
pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
+ if (unlikely(!memcg))
return;
this_cpu_add(memcg->stat->count[idx], val);
@@ -2525,9 +2523,10 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
VM_BUG_ON_PAGE(!PageLocked(page), page);
pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- if (memcg && !css_tryget_online(&memcg->css))
+ memcg = pc->mem_cgroup;
+
+ if (memcg) {
+ if (!css_tryget_online(&memcg->css))
memcg = NULL;
} else if (PageSwapCache(page)) {
ent.val = page_private(page);
@@ -2578,7 +2577,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
struct page_cgroup *pc = lookup_page_cgroup(page);
int isolated;
- VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
+ VM_BUG_ON_PAGE(pc->mem_cgroup, page);
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
* accessed by any other context at this point.
@@ -2593,7 +2592,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
/*
* Nobody should be changing or seriously looking at
*
* - the page is uncharged
*
@@ -2606,7 +2605,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED;
if (lrucare)
unlock_page_lru(page, isolated);
@@ -3120,37 +3118,22 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
memcg_uncharge_kmem(memcg, 1 << order);
return;
}
- /*
- * The page is freshly allocated and not visible to any
- * outside callers yet. Set up pc non-atomically.
- */
pc = lookup_page_cgroup(page);
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED;
}
void __memcg_kmem_uncharge_pages(struct page *page, int order)
{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
-
-
- pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
- return;
-
- memcg = pc->mem_cgroup;
- pc->flags = 0;
+ struct page_cgroup *pc = lookup_page_cgroup(page);
+ struct mem_cgroup *memcg = pc->mem_cgroup;
- /*
- * We trust that only if there is a memcg associated with the page, it
- * is a valid allocation
- */
if (!memcg)
return;
VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
+
memcg_uncharge_kmem(memcg, 1 << order);
+ pc->mem_cgroup = NULL;
}
#else
static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
@@ -3168,21 +3151,16 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
*/
void mem_cgroup_split_huge_fixup(struct page *head)
{
- struct page_cgroup *head_pc = lookup_page_cgroup(head);
- struct page_cgroup *pc;
- struct mem_cgroup *memcg;
+ struct page_cgroup *pc = lookup_page_cgroup(head);
int i;
if (mem_cgroup_disabled())
return;
- memcg = head_pc->mem_cgroup;
- for (i = 1; i < HPAGE_PMD_NR; i++) {
- pc = head_pc + i;
- pc->mem_cgroup = memcg;
- pc->flags = head_pc->flags;
- }
- __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
+ for (i = 1; i < HPAGE_PMD_NR; i++)
+ pc[i].mem_cgroup = pc[0].mem_cgroup;
+
+ __this_cpu_sub(pc[0].mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
HPAGE_PMD_NR);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -3232,7 +3210,7 @@ static int mem_cgroup_move_account(struct page *page,
goto out;
ret = -EINVAL;
- if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
+ if (pc->mem_cgroup != from)
goto out_unlock;
move_lock_mem_cgroup(from, &flags);
@@ -3342,7 +3320,7 @@ static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
* the first time, i.e. during boot or memory hotplug;
* or when mem_cgroup_disabled().
*/
- if (likely(pc) && PageCgroupUsed(pc))
+ if (likely(pc) && pc->mem_cgroup)
return pc;
return NULL;
}
@@ -3360,10 +3338,8 @@ void mem_cgroup_print_bad_page(struct page *page)
struct page_cgroup *pc;
pc = lookup_page_cgroup_used(page);
- if (pc) {
- pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
- pc, pc->flags, pc->mem_cgroup);
- }
+ if (pc)
+ pr_alert("pc:%p pc->mem_cgroup:%p\n", pc, pc->mem_cgroup);
}
#endif
@@ -5330,7 +5306,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
* mem_cgroup_move_account() checks the pc is valid or
* not under LRU exclusion.
*/
- if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+ if (pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target)
target->page = page;
@@ -5366,7 +5342,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
if (!move_anon())
return ret;
pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+ if (pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target) {
get_page(page);
@@ -5810,18 +5786,17 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;
pc = lookup_page_cgroup(page);
+ memcg = pc->mem_cgroup;
/* Readahead page, never charged */
- if (!PageCgroupUsed(pc))
+ if (!memcg)
return;
- memcg = pc->mem_cgroup;
-
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
mem_cgroup_swap_statistics(memcg, true);
- pc->flags = 0;
+ pc->mem_cgroup = NULL;
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, 1);
@@ -5895,7 +5870,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
* the page lock, which serializes swap cache removal, which
* in turn serializes uncharging.
*/
- if (PageCgroupUsed(pc))
+ if (pc->mem_cgroup)
goto out;
}
@@ -6057,13 +6032,13 @@ static void uncharge_list(struct list_head *page_list)
VM_BUG_ON_PAGE(page_count(page), page);
pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
+ if (!pc->mem_cgroup)
continue;
/*
* Nobody should be changing or seriously looking at
- * pc->mem_cgroup and pc->flags at this point, we have
- * fully exclusive access to the page.
+ * pc->mem_cgroup at this point, we have fully
+ * exclusive access to the page.
*/
if (memcg != pc->mem_cgroup) {
@@ -6086,7 +6061,7 @@ static void uncharge_list(struct list_head *page_list)
else
nr_file += nr_pages;
- pc->flags = 0;
+ pc->mem_cgroup = NULL;
pgpgout++;
} while (next != page_list);
@@ -6112,7 +6087,7 @@ void mem_cgroup_uncharge(struct page *page)
/* Don't touch page->lru of any random page, pre-check: */
pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
+ if (!pc->mem_cgroup)
return;
INIT_LIST_HEAD(&page->lru);
@@ -6148,6 +6123,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
bool lrucare)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
int isolated;
@@ -6164,7 +6140,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
/* Page cache replacement: new page already charged? */
pc = lookup_page_cgroup(newpage);
- if (PageCgroupUsed(pc))
+ if (pc->mem_cgroup)
return;
/*
@@ -6174,18 +6150,19 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
* reclaim just put back on the LRU but has not released yet.
*/
pc = lookup_page_cgroup(oldpage);
- if (!PageCgroupUsed(pc))
+ memcg = pc->mem_cgroup;
+ if (!memcg)
return;
if (lrucare)
lock_page_lru(oldpage, &isolated);
- pc->flags = 0;
+ pc->mem_cgroup = NULL;
if (lrucare)
unlock_page_lru(oldpage, isolated);
- commit_charge(newpage, pc->mem_cgroup, lrucare);
+ commit_charge(newpage, memcg, lrucare);
}
/*
Vladimir Davydov
2014-10-22 16:05:12 UTC
Permalink
Post by Johannes Weiner
pc->mem_cgroup had to be left intact after uncharge for the final LRU
removal, and !PCG_USED indicated whether the page was uncharged. But
since 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") pages are
uncharged after the final LRU removal. Uncharge can simply clear the
pointer and the PCG_USED/PageCgroupUsed sites can test that instead.
Because this is the last page_cgroup flag, this patch reduces the
memcg per-page overhead to a single pointer.
---
include/linux/page_cgroup.h | 10 -----
mm/memcontrol.c | 107 +++++++++++++++++---------------------------
2 files changed, 42 insertions(+), 75 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 97536e685843..1289be6b436c 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -1,11 +1,6 @@
#ifndef __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H
-enum {
- /* flags for mem_cgroup */
- PCG_USED = 0x01, /* This page is charged to a memcg */
-};
-
struct pglist_data;
#ifdef CONFIG_MEMCG
@@ -19,7 +14,6 @@ struct mem_cgroup;
* then the page cgroup for pfn always exists.
*/
struct page_cgroup {
- unsigned long flags;
struct mem_cgroup *mem_cgroup;
};
@@ -39,10 +33,6 @@ static inline void page_cgroup_init(void)
struct page_cgroup *lookup_page_cgroup(struct page *page);
-static inline int PageCgroupUsed(struct page_cgroup *pc)
-{
- return !!(pc->flags & PCG_USED);
-}
#else /* !CONFIG_MEMCG */
struct page_cgroup;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1d66ac49e702..48d49c6b08d1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1284,14 +1284,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
-
/*
* Swapcache readahead pages are added to the LRU - and
- * possibly migrated - before they are charged. Ensure
- * pc->mem_cgroup is sane.
+ * possibly migrated - before they are charged.
*/
- if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
- pc->mem_cgroup = memcg = root_mem_cgroup;
+ if (!memcg)
+ memcg = root_mem_cgroup;
mz = mem_cgroup_page_zoneinfo(memcg, page);
lruvec = &mz->lruvec;
@@ -2141,7 +2139,7 @@ void __mem_cgroup_begin_update_page_stat(struct page *page,
pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
+ if (unlikely(!memcg))
return;
/*
* If this memory cgroup is not under account moving, we don't
return;
move_lock_mem_cgroup(memcg, flags);
- if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
+ if (memcg != pc->mem_cgroup) {
move_unlock_mem_cgroup(memcg, flags);
goto again;
}
@@ -2186,7 +2184,7 @@ void mem_cgroup_update_page_stat(struct page *page,
pc = lookup_page_cgroup(page);
memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
+ if (unlikely(!memcg))
return;
this_cpu_add(memcg->stat->count[idx], val);
@@ -2525,9 +2523,10 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
VM_BUG_ON_PAGE(!PageLocked(page), page);
pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- if (memcg && !css_tryget_online(&memcg->css))
+ memcg = pc->mem_cgroup;
+
+ if (memcg) {
+ if (!css_tryget_online(&memcg->css))
memcg = NULL;
} else if (PageSwapCache(page)) {
ent.val = page_private(page);
@@ -2578,7 +2577,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
struct page_cgroup *pc = lookup_page_cgroup(page);
int isolated;
- VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
+ VM_BUG_ON_PAGE(pc->mem_cgroup, page);
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
* accessed by any other context at this point.
@@ -2593,7 +2592,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
/*
* Nobody should be changing or seriously looking at
*
* - the page is uncharged
*
@@ -2606,7 +2605,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
* have the page locked
*/
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED;
if (lrucare)
unlock_page_lru(page, isolated);
@@ -3120,37 +3118,22 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
memcg_uncharge_kmem(memcg, 1 << order);
return;
}
- /*
- * The page is freshly allocated and not visible to any
- * outside callers yet. Set up pc non-atomically.
- */
pc = lookup_page_cgroup(page);
pc->mem_cgroup = memcg;
- pc->flags = PCG_USED;
}
void __memcg_kmem_uncharge_pages(struct page *page, int order)
{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
-
-
- pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
- return;
-
- memcg = pc->mem_cgroup;
- pc->flags = 0;
+ struct page_cgroup *pc = lookup_page_cgroup(page);
+ struct mem_cgroup *memcg = pc->mem_cgroup;
- /*
- * We trust that only if there is a memcg associated with the page, it
- * is a valid allocation
- */
if (!memcg)
return;
VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
+
memcg_uncharge_kmem(memcg, 1 << order);
+ pc->mem_cgroup = NULL;
}
#else
static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
@@ -3168,21 +3151,16 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
*/
void mem_cgroup_split_huge_fixup(struct page *head)
{
- struct page_cgroup *head_pc = lookup_page_cgroup(head);
- struct page_cgroup *pc;
- struct mem_cgroup *memcg;
+ struct page_cgroup *pc = lookup_page_cgroup(head);
int i;
if (mem_cgroup_disabled())
return;
- memcg = head_pc->mem_cgroup;
- for (i = 1; i < HPAGE_PMD_NR; i++) {
- pc = head_pc + i;
- pc->mem_cgroup = memcg;
- pc->flags = head_pc->flags;
- }
- __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
+ for (i = 1; i < HPAGE_PMD_NR; i++)
+ pc[i].mem_cgroup = pc[0].mem_cgroup;
+
+ __this_cpu_sub(pc[0].mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
HPAGE_PMD_NR);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -3232,7 +3210,7 @@ static int mem_cgroup_move_account(struct page *page,
goto out;
ret = -EINVAL;
- if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
+ if (pc->mem_cgroup != from)
goto out_unlock;
move_lock_mem_cgroup(from, &flags);
@@ -3342,7 +3320,7 @@ static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
* the first time, i.e. during boot or memory hotplug;
* or when mem_cgroup_disabled().
*/
- if (likely(pc) && PageCgroupUsed(pc))
+ if (likely(pc) && pc->mem_cgroup)
return pc;
return NULL;
}
@@ -3360,10 +3338,8 @@ void mem_cgroup_print_bad_page(struct page *page)
struct page_cgroup *pc;
pc = lookup_page_cgroup_used(page);
- if (pc) {
- pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
- pc, pc->flags, pc->mem_cgroup);
- }
+ if (pc)
+ pr_alert("pc:%p pc->mem_cgroup:%p\n", pc, pc->mem_cgroup);
}
#endif
@@ -5330,7 +5306,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
* mem_cgroup_move_account() checks the pc is valid or
* not under LRU exclusion.
*/
- if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+ if (pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target)
target->page = page;
@@ -5366,7 +5342,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
if (!move_anon())
return ret;
pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+ if (pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target) {
get_page(page);
@@ -5810,18 +5786,17 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;
pc = lookup_page_cgroup(page);
+ memcg = pc->mem_cgroup;
/* Readahead page, never charged */
- if (!PageCgroupUsed(pc))
+ if (!memcg)
return;
- memcg = pc->mem_cgroup;
-
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
mem_cgroup_swap_statistics(memcg, true);
- pc->flags = 0;
+ pc->mem_cgroup = NULL;
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, 1);
@@ -5895,7 +5870,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
* the page lock, which serializes swap cache removal, which
* in turn serializes uncharging.
*/
- if (PageCgroupUsed(pc))
+ if (pc->mem_cgroup)
goto out;
}
@@ -6057,13 +6032,13 @@ static void uncharge_list(struct list_head *page_list)
VM_BUG_ON_PAGE(page_count(page), page);
pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
+ if (!pc->mem_cgroup)
continue;
/*
* Nobody should be changing or seriously looking at
- * pc->mem_cgroup and pc->flags at this point, we have
- * fully exclusive access to the page.
+ * pc->mem_cgroup at this point, we have fully
+ * exclusive access to the page.
*/
if (memcg != pc->mem_cgroup) {
@@ -6086,7 +6061,7 @@ static void uncharge_list(struct list_head *page_list)
else
nr_file += nr_pages;
- pc->flags = 0;
+ pc->mem_cgroup = NULL;
pgpgout++;
} while (next != page_list);
@@ -6112,7 +6087,7 @@ void mem_cgroup_uncharge(struct page *page)
/* Don't touch page->lru of any random page, pre-check: */
pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
+ if (!pc->mem_cgroup)
return;
INIT_LIST_HEAD(&page->lru);
@@ -6148,6 +6123,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
bool lrucare)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
int isolated;
@@ -6164,7 +6140,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
/* Page cache replacement: new page already charged? */
pc = lookup_page_cgroup(newpage);
- if (PageCgroupUsed(pc))
+ if (pc->mem_cgroup)
return;
/*
@@ -6174,18 +6150,19 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
* reclaim just put back on the LRU but has not released yet.
*/
pc = lookup_page_cgroup(oldpage);
- if (!PageCgroupUsed(pc))
+ memcg = pc->mem_cgroup;
+ if (!memcg)
return;
if (lrucare)
lock_page_lru(oldpage, &isolated);
- pc->flags = 0;
+ pc->mem_cgroup = NULL;
if (lrucare)
unlock_page_lru(oldpage, isolated);
- commit_charge(newpage, pc->mem_cgroup, lrucare);
+ commit_charge(newpage, memcg, lrucare);
}
/*
--
2.1.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Michal Hocko
2014-10-22 16:11:40 UTC
Permalink
Post by Johannes Weiner
pc->mem_cgroup had to be left intact after uncharge for the final LRU
removal, and !PCG_USED indicated whether the page was uncharged. But
since 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") pages are
uncharged after the final LRU removal. Uncharge can simply clear the
pointer and the PCG_USED/PageCgroupUsed sites can test that instead.
Because this is the last page_cgroup flag, this patch reduces the
memcg per-page overhead to a single pointer.
Nice. I have an old patch which stuck this flag into page_cgroup pointer
but this is of course much much better!
Acked-by: Michal Hocko <***@suse.cz>

Just a nit below

[...]
Post by Johannes Weiner
@@ -2525,9 +2523,10 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
memcg = NULL initialization is not needed now
Post by Johannes Weiner
VM_BUG_ON_PAGE(!PageLocked(page), page);
pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- if (memcg && !css_tryget_online(&memcg->css))
+ memcg = pc->mem_cgroup;
+
+ if (memcg) {
+ if (!css_tryget_online(&memcg->css))
memcg = NULL;
} else if (PageSwapCache(page)) {
ent.val = page_private(page);
#else
static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
[...]
--
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Johannes Weiner
2014-10-20 15:22:09 UTC
Permalink
mem_cgroup_swapout() is called with exclusive access to the page at
the end of the page's lifetime. Instead of clearing the PCG_MEMSW
flag and deferring the uncharge, just do it right away. This allows
follow-up patches to simplify the uncharge code.

Signed-off-by: Johannes Weiner <***@cmpxchg.org>
---
mm/memcontrol.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bea3fddb3372..7709f17347f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
unsigned short oldid;

@@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;

VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+ memcg = pc->mem_cgroup;

- oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);

- pc->flags &= ~PCG_MEMSW;
- css_get(&pc->mem_cgroup->css);
- mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ pc->flags = 0;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
+
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
+ local_irq_enable();
}

/**
--
2.1.2
Kamezawa Hiroyuki
2014-10-21 01:07:52 UTC
Permalink
Post by Johannes Weiner
mem_cgroup_swapout() is called with exclusive access to the page at
the end of the page's lifetime. Instead of clearing the PCG_MEMSW
flag and deferring the uncharge, just do it right away. This allows
follow-up patches to simplify the uncharge code.
---
mm/memcontrol.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bea3fddb3372..7709f17347f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
unsigned short oldid;
@@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
shouldn't be removed ?
Post by Johannes Weiner
+ memcg = pc->mem_cgroup;
- oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);
- pc->flags &= ~PCG_MEMSW;
- css_get(&pc->mem_cgroup->css);
- mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ pc->flags = 0;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
+
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
+ local_irq_enable();
}
Reviewed-by: KAMEZAWA Hiroyuki <***@jp.fujitsu.com>


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Johannes Weiner
2014-10-21 20:39:07 UTC
Permalink
Post by Kamezawa Hiroyuki
Post by Johannes Weiner
mem_cgroup_swapout() is called with exclusive access to the page at
the end of the page's lifetime. Instead of clearing the PCG_MEMSW
flag and deferring the uncharge, just do it right away. This allows
follow-up patches to simplify the uncharge code.
---
mm/memcontrol.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bea3fddb3372..7709f17347f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
unsigned short oldid;
@@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
shouldn't be removed ?
It's still a legitimate check at this point. But it's removed later
in the series when PCG_MEMSW itself goes away.
Thanks!

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Vladimir Davydov
2014-10-21 12:52:52 UTC
Permalink
Post by Johannes Weiner
mem_cgroup_swapout() is called with exclusive access to the page at
the end of the page's lifetime. Instead of clearing the PCG_MEMSW
flag and deferring the uncharge, just do it right away. This allows
follow-up patches to simplify the uncharge code.
---
mm/memcontrol.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bea3fddb3372..7709f17347f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
unsigned short oldid;
@@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+ memcg = pc->mem_cgroup;
- oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);
- pc->flags &= ~PCG_MEMSW;
- css_get(&pc->mem_cgroup->css);
- mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ pc->flags = 0;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
AFAIU it removes batched uncharge of swapped out pages, doesn't it? Will
it affect performance?

Besides, it looks asymmetric with respect to the page cache uncharge
path, where we still defer uncharge to mem_cgroup_uncharge_list(), and I
personally rather dislike this asymmetry.
Post by Johannes Weiner
+
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
+ local_irq_enable();
AFAICT mem_cgroup_swapout() is called under mapping->tree_lock with irqs
disabled, so we should use irq_save/restore here.

Thanks,
Vladimir
Post by Johannes Weiner
}
/**
--
2.1.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Johannes Weiner
2014-10-21 21:03:28 UTC
Permalink
Post by Vladimir Davydov
Post by Johannes Weiner
mem_cgroup_swapout() is called with exclusive access to the page at
the end of the page's lifetime. Instead of clearing the PCG_MEMSW
flag and deferring the uncharge, just do it right away. This allows
follow-up patches to simplify the uncharge code.
---
mm/memcontrol.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bea3fddb3372..7709f17347f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
unsigned short oldid;
@@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+ memcg = pc->mem_cgroup;
- oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);
- pc->flags &= ~PCG_MEMSW;
- css_get(&pc->mem_cgroup->css);
- mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ pc->flags = 0;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
AFAIU it removes batched uncharge of swapped out pages, doesn't it? Will
it affect performance?
During swapout and with lockless page counters? I don't think so.
Post by Vladimir Davydov
Besides, it looks asymmetric with respect to the page cache uncharge
path, where we still defer uncharge to mem_cgroup_uncharge_list(), and I
personally rather dislike this asymmetry.
The asymmetry is inherent in the fact that we mave memory and
memory+swap accounting, and here a memory charge is transferred out to
swap. Before, the asymmetry was in mem_cgroup_uncharge_list() where
we separate out memory and memsw pages (which the next patch fixes).

So nothing changed, the ugliness was just moved around. I actually
like it better now that it's part of the swap controller, because
that's where the nastiness actually comes from. This will all go away
when we account swap separately. Then, swapped pages can keep their
memory charge until mem_cgroup_uncharge() again and the swap charge
will be completely independent from it. This reshuffling is just
necessary because it allows us to get rid of the per-page flag.
Post by Vladimir Davydov
Post by Johannes Weiner
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
+ local_irq_enable();
AFAICT mem_cgroup_swapout() is called under mapping->tree_lock with irqs
disabled, so we should use irq_save/restore here.
Good catch! I don't think this function actually needs to be called
under the tree_lock, so I'd rather send a follow-up that moves it out.
For now, this should be sufficient:

---
Post by Vladimir Davydov
From 3a40bd3b85a70db104ade873007dbb84b5117993 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <***@cmpxchg.org>
Date: Tue, 21 Oct 2014 16:53:14 -0400
Subject: [patch] mm: memcontrol: uncharge pages on swapout fix
Post by Vladimir Davydov
Post by Johannes Weiner
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
+ local_irq_enable();
AFAICT mem_cgroup_swapout() is called under mapping->tree_lock with irqs
disabled, so we should use irq_save/restore here.
But this function doesn't actually need to be called under the tree
lock. So for now, simply remove the irq-disabling altogether and rely
on the caller's IRQ state. Later on, we'll move it out from there and
add back the simple, non-saving IRQ-disabling.

Reported-by: Vladimir Davydov <***@parallels.com>
Signed-off-by: Johannes Weiner <***@cmpxchg.org>
---
mm/memcontrol.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8dc46aa9ae8f..c688fb73ff35 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5806,6 +5806,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);

+ /* XXX: caller holds IRQ-safe mapping->tree_lock */
+ VM_BUG_ON(!irqs_disabled());
+
if (!do_swap_account)
return;

@@ -5827,10 +5830,8 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, 1);

- local_irq_disable();
mem_cgroup_charge_statistics(memcg, page, -1);
memcg_check_events(memcg, page);
- local_irq_enable();
}

/**
--
2.1.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Vladimir Davydov
2014-10-22 08:33:53 UTC
Permalink
Post by Johannes Weiner
Post by Vladimir Davydov
Post by Johannes Weiner
mem_cgroup_swapout() is called with exclusive access to the page at
the end of the page's lifetime. Instead of clearing the PCG_MEMSW
flag and deferring the uncharge, just do it right away. This allows
follow-up patches to simplify the uncharge code.
---
mm/memcontrol.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bea3fddb3372..7709f17347f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
unsigned short oldid;
@@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+ memcg = pc->mem_cgroup;
- oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);
- pc->flags &= ~PCG_MEMSW;
- css_get(&pc->mem_cgroup->css);
- mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ pc->flags = 0;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
AFAIU it removes batched uncharge of swapped out pages, doesn't it? Will
it affect performance?
During swapout and with lockless page counters? I don't think so.
How is this different from page cache out? I mean, we can have a lot of
pages in the swap cache that have already been swapped out, and are
waiting to be unmapped, uncharged, and freed, just like usual page
cache. Why do we use batching for file cache pages then?
Post by Johannes Weiner
Post by Vladimir Davydov
Besides, it looks asymmetric with respect to the page cache uncharge
path, where we still defer uncharge to mem_cgroup_uncharge_list(), and I
personally rather dislike this asymmetry.
The asymmetry is inherent in the fact that we mave memory and
memory+swap accounting, and here a memory charge is transferred out to
swap. Before, the asymmetry was in mem_cgroup_uncharge_list() where
we separate out memory and memsw pages (which the next patch fixes).
I agree that memsw is inherently asymmetric, but IMO it isn't the case
for swap *cache* vs page *cache*. We handle them similarly - removing
from a mapping, uncharging, freeing. If one wants batching, why
shouldn't the other?
Post by Johannes Weiner
So nothing changed, the ugliness was just moved around. I actually
like it better now that it's part of the swap controller, because
that's where the nastiness actually comes from. This will all go away
when we account swap separately. Then, swapped pages can keep their
memory charge until mem_cgroup_uncharge() again and the swap charge
will be completely independent from it. This reshuffling is just
necessary because it allows us to get rid of the per-page flag.
Do you mean that swap cache uncharge batching will be back soon?
Post by Johannes Weiner
Post by Vladimir Davydov
Post by Johannes Weiner
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
+ local_irq_enable();
AFAICT mem_cgroup_swapout() is called under mapping->tree_lock with irqs
disabled, so we should use irq_save/restore here.
Good catch! I don't think this function actually needs to be called
under the tree_lock, so I'd rather send a follow-up that moves it out.
That's exactly what I though after sending that message.
Post by Johannes Weiner
---
From 3a40bd3b85a70db104ade873007dbb84b5117993 Mon Sep 17 00:00:00 2001
Date: Tue, 21 Oct 2014 16:53:14 -0400
Subject: [patch] mm: memcontrol: uncharge pages on swapout fix
Post by Vladimir Davydov
Post by Johannes Weiner
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
+ local_irq_enable();
AFAICT mem_cgroup_swapout() is called under mapping->tree_lock with irqs
disabled, so we should use irq_save/restore here.
But this function doesn't actually need to be called under the tree
lock. So for now, simply remove the irq-disabling altogether and rely
on the caller's IRQ state. Later on, we'll move it out from there and
add back the simple, non-saving IRQ-disabling.
The fix looks good to me.

Acked-by: Vladimir Davydov <***@parallels.com>

But I'm still unsure about the original patch.
Post by Johannes Weiner
---
mm/memcontrol.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8dc46aa9ae8f..c688fb73ff35 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5806,6 +5806,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);
+ /* XXX: caller holds IRQ-safe mapping->tree_lock */
+ VM_BUG_ON(!irqs_disabled());
+
if (!do_swap_account)
return;
@@ -5827,10 +5830,8 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, 1);
- local_irq_disable();
mem_cgroup_charge_statistics(memcg, page, -1);
memcg_check_events(memcg, page);
- local_irq_enable();
}
/**
--
2.1.2
Johannes Weiner
2014-10-22 13:20:38 UTC
Permalink
Post by Vladimir Davydov
Post by Johannes Weiner
Post by Vladimir Davydov
Post by Johannes Weiner
mem_cgroup_swapout() is called with exclusive access to the page at
the end of the page's lifetime. Instead of clearing the PCG_MEMSW
flag and deferring the uncharge, just do it right away. This allows
follow-up patches to simplify the uncharge code.
---
mm/memcontrol.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bea3fddb3372..7709f17347f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
unsigned short oldid;
@@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+ memcg = pc->mem_cgroup;
- oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);
- pc->flags &= ~PCG_MEMSW;
- css_get(&pc->mem_cgroup->css);
- mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ pc->flags = 0;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
AFAIU it removes batched uncharge of swapped out pages, doesn't it? Will
it affect performance?
During swapout and with lockless page counters? I don't think so.
How is this different from page cache out? I mean, we can have a lot of
pages in the swap cache that have already been swapped out, and are
waiting to be unmapped, uncharged, and freed, just like usual page
cache. Why do we use batching for file cache pages then?
The batching is mostly for munmap(). We do it for reclaim because
it's convenient, but I don't think an extra word per struct page to
batch one, sometimes a few, locked subtractions per swapped out page
is a reasonable trade-off.
Post by Vladimir Davydov
Post by Johannes Weiner
Post by Vladimir Davydov
Besides, it looks asymmetric with respect to the page cache uncharge
path, where we still defer uncharge to mem_cgroup_uncharge_list(), and I
personally rather dislike this asymmetry.
The asymmetry is inherent in the fact that we mave memory and
memory+swap accounting, and here a memory charge is transferred out to
swap. Before, the asymmetry was in mem_cgroup_uncharge_list() where
we separate out memory and memsw pages (which the next patch fixes).
I agree that memsw is inherently asymmetric, but IMO it isn't the case
for swap *cache* vs page *cache*. We handle them similarly - removing
from a mapping, uncharging, freeing. If one wants batching, why
shouldn't the other?
It has to be worth it in practical terms. You can argue symmetry
between swap cache and page cache, but swapping simply is a much
colder path than reclaiming page cache. Our reclaim algorithm avoids
it like the plague.
Post by Vladimir Davydov
Post by Johannes Weiner
So nothing changed, the ugliness was just moved around. I actually
like it better now that it's part of the swap controller, because
that's where the nastiness actually comes from. This will all go away
when we account swap separately. Then, swapped pages can keep their
memory charge until mem_cgroup_uncharge() again and the swap charge
will be completely independent from it. This reshuffling is just
necessary because it allows us to get rid of the per-page flag.
Do you mean that swap cache uncharge batching will be back soon?
Well, yes, once we switch from memsw to a separate swap couter, it
comes automatically. Pages no longer carry two charges, and so the
uncharging of pages doesn't have to distinguish between swapped out
pages and other pages anymore.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Vladimir Davydov
2014-10-22 15:37:25 UTC
Permalink
Post by Johannes Weiner
Post by Vladimir Davydov
Post by Johannes Weiner
Post by Vladimir Davydov
Post by Johannes Weiner
mem_cgroup_swapout() is called with exclusive access to the page at
the end of the page's lifetime. Instead of clearing the PCG_MEMSW
flag and deferring the uncharge, just do it right away. This allows
follow-up patches to simplify the uncharge code.
---
mm/memcontrol.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bea3fddb3372..7709f17347f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
unsigned short oldid;
@@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+ memcg = pc->mem_cgroup;
- oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);
- pc->flags &= ~PCG_MEMSW;
- css_get(&pc->mem_cgroup->css);
- mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ pc->flags = 0;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
AFAIU it removes batched uncharge of swapped out pages, doesn't it? Will
it affect performance?
During swapout and with lockless page counters? I don't think so.
How is this different from page cache out? I mean, we can have a lot of
pages in the swap cache that have already been swapped out, and are
waiting to be unmapped, uncharged, and freed, just like usual page
cache. Why do we use batching for file cache pages then?
The batching is mostly for munmap(). We do it for reclaim because
it's convenient, but I don't think an extra word per struct page to
batch one, sometimes a few, locked subtractions per swapped out page
is a reasonable trade-off.
Post by Vladimir Davydov
Post by Johannes Weiner
Post by Vladimir Davydov
Besides, it looks asymmetric with respect to the page cache uncharge
path, where we still defer uncharge to mem_cgroup_uncharge_list(), and I
personally rather dislike this asymmetry.
The asymmetry is inherent in the fact that we mave memory and
memory+swap accounting, and here a memory charge is transferred out to
swap. Before, the asymmetry was in mem_cgroup_uncharge_list() where
we separate out memory and memsw pages (which the next patch fixes).
I agree that memsw is inherently asymmetric, but IMO it isn't the case
for swap *cache* vs page *cache*. We handle them similarly - removing
from a mapping, uncharging, freeing. If one wants batching, why
shouldn't the other?
It has to be worth it in practical terms. You can argue symmetry
between swap cache and page cache, but swapping simply is a much
colder path than reclaiming page cache. Our reclaim algorithm avoids
it like the plague.
Post by Vladimir Davydov
Post by Johannes Weiner
So nothing changed, the ugliness was just moved around. I actually
like it better now that it's part of the swap controller, because
that's where the nastiness actually comes from. This will all go away
when we account swap separately. Then, swapped pages can keep their
memory charge until mem_cgroup_uncharge() again and the swap charge
will be completely independent from it. This reshuffling is just
necessary because it allows us to get rid of the per-page flag.
Do you mean that swap cache uncharge batching will be back soon?
Well, yes, once we switch from memsw to a separate swap couter, it
comes automatically. Pages no longer carry two charges, and so the
uncharging of pages doesn't have to distinguish between swapped out
pages and other pages anymore.
With this in mind,

Acked-by: Vladimir Davydov <***@parallels.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to ***@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"***@kvack.org"> ***@kvack.org </a>
Michal Hocko
2014-10-22 15:34:55 UTC
Permalink
Post by Johannes Weiner
mem_cgroup_swapout() is called with exclusive access to the page at
the end of the page's lifetime. Instead of clearing the PCG_MEMSW
flag and deferring the uncharge, just do it right away. This allows
follow-up patches to simplify the uncharge code.
OK, it makes sense. With the irq fixup
Post by Johannes Weiner
---
mm/memcontrol.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bea3fddb3372..7709f17347f3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5799,6 +5799,7 @@ static void __init enable_swap_cgroup(void)
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
+ struct mem_cgroup *memcg;
struct page_cgroup *pc;
unsigned short oldid;
@@ -5815,13 +5816,21 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
return;
VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+ memcg = pc->mem_cgroup;
- oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);
- pc->flags &= ~PCG_MEMSW;
- css_get(&pc->mem_cgroup->css);
- mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ pc->flags = 0;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
+
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
+ local_irq_enable();
}
/**
--
2.1.2
--
Michal Hocko
SUSE Labs
Loading...