merge mm-hotfixes-stable into mm-stable to pick up depended-upon changes

This commit is contained in:
Andrew Morton 2023-08-21 14:26:20 -07:00
commit 5994eabf3b
41 changed files with 335 additions and 107 deletions

View File

@ -12474,6 +12474,7 @@ F: net/mctp/
MAPLE TREE MAPLE TREE
M: Liam R. Howlett <Liam.Howlett@oracle.com> M: Liam R. Howlett <Liam.Howlett@oracle.com>
L: maple-tree@lists.infradead.org
L: linux-mm@kvack.org L: linux-mm@kvack.org
S: Supported S: Supported
F: Documentation/core-api/maple_tree.rst F: Documentation/core-api/maple_tree.rst

View File

@ -145,6 +145,7 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
static const struct mm_walk_ops subpage_walk_ops = { static const struct mm_walk_ops subpage_walk_ops = {
.pmd_entry = subpage_walk_pmd_entry, .pmd_entry = subpage_walk_pmd_entry,
.walk_lock = PGWALK_WRLOCK_VERIFY,
}; };
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,

View File

@ -102,6 +102,7 @@ static const struct mm_walk_ops pageattr_ops = {
.pmd_entry = pageattr_pmd_entry, .pmd_entry = pageattr_pmd_entry,
.pte_entry = pageattr_pte_entry, .pte_entry = pageattr_pte_entry,
.pte_hole = pageattr_pte_hole, .pte_hole = pageattr_pte_hole,
.walk_lock = PGWALK_RDLOCK,
}; };
static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,

View File

@ -2514,6 +2514,7 @@ static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
static const struct mm_walk_ops thp_split_walk_ops = { static const struct mm_walk_ops thp_split_walk_ops = {
.pmd_entry = thp_split_walk_pmd_entry, .pmd_entry = thp_split_walk_pmd_entry,
.walk_lock = PGWALK_WRLOCK_VERIFY,
}; };
static inline void thp_split_mm(struct mm_struct *mm) static inline void thp_split_mm(struct mm_struct *mm)
@ -2565,6 +2566,7 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
static const struct mm_walk_ops zap_zero_walk_ops = { static const struct mm_walk_ops zap_zero_walk_ops = {
.pmd_entry = __zap_zero_pages, .pmd_entry = __zap_zero_pages,
.walk_lock = PGWALK_WRLOCK,
}; };
/* /*
@ -2655,6 +2657,7 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
.hugetlb_entry = __s390_enable_skey_hugetlb, .hugetlb_entry = __s390_enable_skey_hugetlb,
.pte_entry = __s390_enable_skey_pte, .pte_entry = __s390_enable_skey_pte,
.pmd_entry = __s390_enable_skey_pmd, .pmd_entry = __s390_enable_skey_pmd,
.walk_lock = PGWALK_WRLOCK,
}; };
int s390_enable_skey(void) int s390_enable_skey(void)
@ -2692,6 +2695,7 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
static const struct mm_walk_ops reset_cmma_walk_ops = { static const struct mm_walk_ops reset_cmma_walk_ops = {
.pte_entry = __s390_reset_cmma, .pte_entry = __s390_reset_cmma,
.walk_lock = PGWALK_WRLOCK,
}; };
void s390_reset_cmma(struct mm_struct *mm) void s390_reset_cmma(struct mm_struct *mm)
@ -2728,6 +2732,7 @@ static int s390_gather_pages(pte_t *ptep, unsigned long addr,
static const struct mm_walk_ops gather_pages_ops = { static const struct mm_walk_ops gather_pages_ops = {
.pte_entry = s390_gather_pages, .pte_entry = s390_gather_pages,
.walk_lock = PGWALK_RDLOCK,
}; };
/* /*

View File

@ -1101,9 +1101,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
int __nilfs_mark_inode_dirty(struct inode *inode, int flags) int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
{ {
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
struct buffer_head *ibh; struct buffer_head *ibh;
int err; int err;
/*
* Do not dirty inodes after the log writer has been detached
* and its nilfs_root struct has been freed.
*/
if (unlikely(nilfs_purging(nilfs)))
return 0;
err = nilfs_load_inode_block(inode, &ibh); err = nilfs_load_inode_block(inode, &ibh);
if (unlikely(err)) { if (unlikely(err)) {
nilfs_warn(inode->i_sb, nilfs_warn(inode->i_sb,

View File

@ -725,6 +725,11 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
struct folio *folio = fbatch.folios[i]; struct folio *folio = fbatch.folios[i];
folio_lock(folio); folio_lock(folio);
if (unlikely(folio->mapping != mapping)) {
/* Exclude folios removed from the address space */
folio_unlock(folio);
continue;
}
head = folio_buffers(folio); head = folio_buffers(folio);
if (!head) { if (!head) {
create_empty_buffers(&folio->page, i_blocksize(inode), 0); create_empty_buffers(&folio->page, i_blocksize(inode), 0);
@ -2845,6 +2850,7 @@ void nilfs_detach_log_writer(struct super_block *sb)
nilfs_segctor_destroy(nilfs->ns_writer); nilfs_segctor_destroy(nilfs->ns_writer);
nilfs->ns_writer = NULL; nilfs->ns_writer = NULL;
} }
set_nilfs_purging(nilfs);
/* Force to free the list of dirty files */ /* Force to free the list of dirty files */
spin_lock(&nilfs->ns_inode_lock); spin_lock(&nilfs->ns_inode_lock);
@ -2857,4 +2863,5 @@ void nilfs_detach_log_writer(struct super_block *sb)
up_write(&nilfs->ns_segctor_sem); up_write(&nilfs->ns_segctor_sem);
nilfs_dispose_list(nilfs, &garbage_list, 1); nilfs_dispose_list(nilfs, &garbage_list, 1);
clear_nilfs_purging(nilfs);
} }

View File

@ -29,6 +29,7 @@ enum {
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
THE_NILFS_GC_RUNNING, /* gc process is running */ THE_NILFS_GC_RUNNING, /* gc process is running */
THE_NILFS_SB_DIRTY, /* super block is dirty */ THE_NILFS_SB_DIRTY, /* super block is dirty */
THE_NILFS_PURGING, /* disposing dirty files for cleanup */
}; };
/** /**
@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(DISCONTINUED, discontinued) THE_NILFS_FNS(DISCONTINUED, discontinued)
THE_NILFS_FNS(GC_RUNNING, gc_running) THE_NILFS_FNS(GC_RUNNING, gc_running)
THE_NILFS_FNS(SB_DIRTY, sb_dirty) THE_NILFS_FNS(SB_DIRTY, sb_dirty)
THE_NILFS_FNS(PURGING, purging)
/* /*
* Mount option operations * Mount option operations

View File

@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
{ {
struct file *file = iocb->ki_filp;
char *buf = file->private_data;
loff_t *fpos = &iocb->ki_pos; loff_t *fpos = &iocb->ki_pos;
size_t phdrs_offset, notes_offset, data_offset; size_t phdrs_offset, notes_offset, data_offset;
size_t page_offline_frozen = 1; size_t page_offline_frozen = 1;
@ -555,10 +557,21 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
case KCORE_VMEMMAP: case KCORE_VMEMMAP:
case KCORE_TEXT: case KCORE_TEXT:
/* /*
* We use _copy_to_iter() to bypass usermode hardening * Sadly we must use a bounce buffer here to be able to
* which would otherwise prevent this operation. * make use of copy_from_kernel_nofault(), as these
* memory regions might not always be mapped on all
* architectures.
*/ */
if (_copy_to_iter((char *)start, tsz, iter) != tsz) { if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
if (iov_iter_zero(tsz, iter) != tsz) {
ret = -EFAULT;
goto out;
}
/*
* We know the bounce buffer is safe to copy from, so
* use _copy_to_iter() directly.
*/
} else if (_copy_to_iter(buf, tsz, iter) != tsz) {
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
} }
@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
if (ret) if (ret)
return ret; return ret;
filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!filp->private_data)
return -ENOMEM;
if (kcore_need_update) if (kcore_need_update)
kcore_update_ram(); kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) { if (i_size_read(inode) != proc_root_kcore->size) {
@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
return 0; return 0;
} }
static int release_kcore(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static const struct proc_ops kcore_proc_ops = { static const struct proc_ops kcore_proc_ops = {
.proc_read_iter = read_kcore_iter, .proc_read_iter = read_kcore_iter,
.proc_open = open_kcore, .proc_open = open_kcore,
.proc_release = release_kcore,
.proc_lseek = default_llseek, .proc_lseek = default_llseek,
}; };

View File

@ -571,8 +571,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
bool migration = false; bool migration = false;
if (pmd_present(*pmd)) { if (pmd_present(*pmd)) {
/* FOLL_DUMP will return -EFAULT on huge zero page */ page = vm_normal_page_pmd(vma, addr, *pmd);
page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
swp_entry_t entry = pmd_to_swp_entry(*pmd); swp_entry_t entry = pmd_to_swp_entry(*pmd);
@ -742,12 +741,14 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
static const struct mm_walk_ops smaps_walk_ops = { static const struct mm_walk_ops smaps_walk_ops = {
.pmd_entry = smaps_pte_range, .pmd_entry = smaps_pte_range,
.hugetlb_entry = smaps_hugetlb_range, .hugetlb_entry = smaps_hugetlb_range,
.walk_lock = PGWALK_RDLOCK,
}; };
static const struct mm_walk_ops smaps_shmem_walk_ops = { static const struct mm_walk_ops smaps_shmem_walk_ops = {
.pmd_entry = smaps_pte_range, .pmd_entry = smaps_pte_range,
.hugetlb_entry = smaps_hugetlb_range, .hugetlb_entry = smaps_hugetlb_range,
.pte_hole = smaps_pte_hole, .pte_hole = smaps_pte_hole,
.walk_lock = PGWALK_RDLOCK,
}; };
/* /*
@ -1229,6 +1230,7 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
static const struct mm_walk_ops clear_refs_walk_ops = { static const struct mm_walk_ops clear_refs_walk_ops = {
.pmd_entry = clear_refs_pte_range, .pmd_entry = clear_refs_pte_range,
.test_walk = clear_refs_test_walk, .test_walk = clear_refs_test_walk,
.walk_lock = PGWALK_WRLOCK,
}; };
static ssize_t clear_refs_write(struct file *file, const char __user *buf, static ssize_t clear_refs_write(struct file *file, const char __user *buf,
@ -1606,6 +1608,7 @@ static const struct mm_walk_ops pagemap_ops = {
.pmd_entry = pagemap_pmd_range, .pmd_entry = pagemap_pmd_range,
.pte_hole = pagemap_pte_hole, .pte_hole = pagemap_pte_hole,
.hugetlb_entry = pagemap_hugetlb_range, .hugetlb_entry = pagemap_hugetlb_range,
.walk_lock = PGWALK_RDLOCK,
}; };
/* /*
@ -1919,6 +1922,7 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
static const struct mm_walk_ops show_numa_ops = { static const struct mm_walk_ops show_numa_ops = {
.hugetlb_entry = gather_hugetlb_stats, .hugetlb_entry = gather_hugetlb_stats,
.pmd_entry = gather_pte_stats, .pmd_entry = gather_pte_stats,
.walk_lock = PGWALK_RDLOCK,
}; };
/* /*

View File

@ -25,9 +25,6 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
#endif #endif
vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf); vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd,
unsigned int flags);
bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr, unsigned long next); pmd_t *pmd, unsigned long addr, unsigned long next);
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,

View File

@ -3496,15 +3496,24 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
* Indicates whether GUP can follow a PROT_NONE mapped page, or whether * Indicates whether GUP can follow a PROT_NONE mapped page, or whether
* a (NUMA hinting) fault is required. * a (NUMA hinting) fault is required.
*/ */
static inline bool gup_can_follow_protnone(unsigned int flags) static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,
unsigned int flags)
{ {
/* /*
* FOLL_FORCE has to be able to make progress even if the VMA is * If callers don't want to honor NUMA hinting faults, no need to
* inaccessible. Further, FOLL_FORCE access usually does not represent * determine if we would actually have to trigger a NUMA hinting fault.
* application behaviour and we should avoid triggering NUMA hinting
* faults.
*/ */
return flags & FOLL_FORCE; if (!(flags & FOLL_HONOR_NUMA_FAULT))
return true;
/*
* NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
*
* Requiring a fault here even for inaccessible VMAs would mean that
* FOLL_FORCE cannot make any progress, because handle_mm_fault()
* refuses to process NUMA hinting faults in inaccessible VMAs.
*/
return !vma_is_accessible(vma);
} }
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);

View File

@ -1356,6 +1356,15 @@ enum {
FOLL_PCI_P2PDMA = 1 << 10, FOLL_PCI_P2PDMA = 1 << 10,
/* allow interrupts from generic signals */ /* allow interrupts from generic signals */
FOLL_INTERRUPTIBLE = 1 << 11, FOLL_INTERRUPTIBLE = 1 << 11,
/*
* Always honor (trigger) NUMA hinting faults.
*
* FOLL_WRITE implicitly honors NUMA hinting faults because a
* PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE
* apply). get_user_pages_fast_only() always implicitly honors NUMA
* hinting faults.
*/
FOLL_HONOR_NUMA_FAULT = 1 << 12,
/* See also internal only FOLL flags in mm/internal.h */ /* See also internal only FOLL flags in mm/internal.h */
}; };

View File

@ -6,6 +6,16 @@
struct mm_walk; struct mm_walk;
/* Locking requirement during a page walk. */
enum page_walk_lock {
/* mmap_lock should be locked for read to stabilize the vma tree */
PGWALK_RDLOCK = 0,
/* vma will be write-locked during the walk */
PGWALK_WRLOCK = 1,
/* vma is expected to be already write-locked during the walk */
PGWALK_WRLOCK_VERIFY = 2,
};
/** /**
* struct mm_walk_ops - callbacks for walk_page_range * struct mm_walk_ops - callbacks for walk_page_range
* @pgd_entry: if set, called for each non-empty PGD (top-level) entry * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
@ -66,6 +76,7 @@ struct mm_walk_ops {
int (*pre_vma)(unsigned long start, unsigned long end, int (*pre_vma)(unsigned long start, unsigned long end,
struct mm_walk *walk); struct mm_walk *walk);
void (*post_vma)(struct mm_walk *walk); void (*post_vma)(struct mm_walk *walk);
enum page_walk_lock walk_lock;
}; };
/* /*

View File

@ -1136,7 +1136,6 @@ static void set_iter_tags(struct radix_tree_iter *iter,
void __rcu **radix_tree_iter_resume(void __rcu **slot, void __rcu **radix_tree_iter_resume(void __rcu **slot,
struct radix_tree_iter *iter) struct radix_tree_iter *iter)
{ {
slot++;
iter->index = __radix_tree_iter_add(iter, 1); iter->index = __radix_tree_iter_add(iter, 1);
iter->next_index = iter->index; iter->next_index = iter->index;
iter->tags = 0; iter->tags = 0;

View File

@ -1148,7 +1148,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter,
failed: failed:
while (sgtable->nents > sgtable->orig_nents) while (sgtable->nents > sgtable->orig_nents)
put_page(sg_page(&sgtable->sgl[--sgtable->nents])); unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
return res; return res;
} }

View File

@ -933,11 +933,12 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
/* /*
* Check if the pageblock has already been marked skipped. * Check if the pageblock has already been marked skipped.
* Only the aligned PFN is checked as the caller isolates * Only the first PFN is checked as the caller isolates
* COMPACT_CLUSTER_MAX at a time so the second call must * COMPACT_CLUSTER_MAX at a time so the second call must
* not falsely conclude that the block should be skipped. * not falsely conclude that the block should be skipped.
*/ */
if (!valid_page && pageblock_aligned(low_pfn)) { if (!valid_page && (pageblock_aligned(low_pfn) ||
low_pfn == cc->zone->zone_start_pfn)) {
if (!isolation_suitable(cc, page)) { if (!isolation_suitable(cc, page)) {
low_pfn = end_pfn; low_pfn = end_pfn;
folio = NULL; folio = NULL;
@ -2030,7 +2031,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
* before making it "skip" so other compaction instances do * before making it "skip" so other compaction instances do
* not scan the same block. * not scan the same block.
*/ */
if (pageblock_aligned(low_pfn) && if ((pageblock_aligned(low_pfn) ||
low_pfn == cc->zone->zone_start_pfn) &&
!fast_find_block && !isolation_suitable(cc, page)) !fast_find_block && !isolation_suitable(cc, page))
continue; continue;

View File

@ -273,6 +273,7 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type,
return NULL; return NULL;
filter->type = type; filter->type = type;
filter->matching = matching; filter->matching = matching;
INIT_LIST_HEAD(&filter->list);
return filter; return filter;
} }

View File

@ -389,6 +389,7 @@ out:
static const struct mm_walk_ops damon_mkold_ops = { static const struct mm_walk_ops damon_mkold_ops = {
.pmd_entry = damon_mkold_pmd_entry, .pmd_entry = damon_mkold_pmd_entry,
.hugetlb_entry = damon_mkold_hugetlb_entry, .hugetlb_entry = damon_mkold_hugetlb_entry,
.walk_lock = PGWALK_RDLOCK,
}; };
static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
@ -532,6 +533,7 @@ out:
static const struct mm_walk_ops damon_young_ops = { static const struct mm_walk_ops damon_young_ops = {
.pmd_entry = damon_young_pmd_entry, .pmd_entry = damon_young_pmd_entry,
.hugetlb_entry = damon_young_hugetlb_entry, .hugetlb_entry = damon_young_hugetlb_entry,
.walk_lock = PGWALK_RDLOCK,
}; };
static bool damon_va_young(struct mm_struct *mm, unsigned long addr, static bool damon_va_young(struct mm_struct *mm, unsigned long addr,

View File

@ -597,7 +597,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
pte = ptep_get(ptep); pte = ptep_get(ptep);
if (!pte_present(pte)) if (!pte_present(pte))
goto no_page; goto no_page;
if (pte_protnone(pte) && !gup_can_follow_protnone(flags)) if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
goto no_page; goto no_page;
page = vm_normal_page(vma, address, pte); page = vm_normal_page(vma, address, pte);
@ -714,7 +714,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
if (likely(!pmd_trans_huge(pmdval))) if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags)) if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
return no_page_table(vma, flags); return no_page_table(vma, flags);
ptl = pmd_lock(mm, pmd); ptl = pmd_lock(mm, pmd);
@ -844,6 +844,10 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
if (WARN_ON_ONCE(foll_flags & FOLL_PIN)) if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
return NULL; return NULL;
/*
* We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
* to fail on PROT_NONE-mapped pages.
*/
page = follow_page_mask(vma, address, foll_flags, &ctx); page = follow_page_mask(vma, address, foll_flags, &ctx);
if (ctx.pgmap) if (ctx.pgmap)
put_dev_pagemap(ctx.pgmap); put_dev_pagemap(ctx.pgmap);
@ -2240,6 +2244,13 @@ static bool is_valid_gup_args(struct page **pages, int *locked,
gup_flags |= FOLL_UNLOCKABLE; gup_flags |= FOLL_UNLOCKABLE;
} }
/*
* For now, always trigger NUMA hinting faults. Some GUP users like
* KVM require the hint to be as the calling context of GUP is
* functionally similar to a memory reference from task context.
*/
gup_flags |= FOLL_HONOR_NUMA_FAULT;
/* FOLL_GET and FOLL_PIN are mutually exclusive. */ /* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) == if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET))) (FOLL_PIN | FOLL_GET)))
@ -2564,7 +2575,14 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
struct page *page; struct page *page;
struct folio *folio; struct folio *folio;
if (pte_protnone(pte) && !gup_can_follow_protnone(flags)) /*
* Always fallback to ordinary GUP on PROT_NONE-mapped pages:
* pte_access_permitted() better should reject these pages
* either way: otherwise, GUP-fast might succeed in
* cases where ordinary GUP would fail due to VMA access
* permissions.
*/
if (pte_protnone(pte))
goto pte_unmap; goto pte_unmap;
if (!pte_access_permitted(pte, flags & FOLL_WRITE)) if (!pte_access_permitted(pte, flags & FOLL_WRITE))
@ -2983,8 +3001,8 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
pmd_devmap(pmd))) { pmd_devmap(pmd))) {
if (pmd_protnone(pmd) && /* See gup_pte_range() */
!gup_can_follow_protnone(flags)) if (pmd_protnone(pmd))
return 0; return 0;
if (!gup_huge_pmd(pmd, pmdp, addr, next, flags, if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
@ -3164,7 +3182,7 @@ static int internal_get_user_pages_fast(unsigned long start,
if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
FOLL_FORCE | FOLL_PIN | FOLL_GET | FOLL_FORCE | FOLL_PIN | FOLL_GET |
FOLL_FAST_ONLY | FOLL_NOFAULT | FOLL_FAST_ONLY | FOLL_NOFAULT |
FOLL_PCI_P2PDMA))) FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
return -EINVAL; return -EINVAL;
if (gup_flags & FOLL_PIN) if (gup_flags & FOLL_PIN)

View File

@ -562,6 +562,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
.pte_hole = hmm_vma_walk_hole, .pte_hole = hmm_vma_walk_hole,
.hugetlb_entry = hmm_vma_walk_hugetlb_entry, .hugetlb_entry = hmm_vma_walk_hugetlb_entry,
.test_walk = hmm_vma_walk_test, .test_walk = hmm_vma_walk_test,
.walk_lock = PGWALK_RDLOCK,
}; };
/** /**

View File

@ -1467,8 +1467,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
/* Full NUMA hinting faults to serialise migration in fault paths */ if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))
if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))
return NULL; return NULL;
if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page)) if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))

View File

@ -1580,9 +1580,37 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
unsigned int order) { } unsigned int order) { }
#endif #endif
static inline void __clear_hugetlb_destructor(struct hstate *h,
struct folio *folio)
{
lockdep_assert_held(&hugetlb_lock);
/*
* Very subtle
*
* For non-gigantic pages set the destructor to the normal compound
* page dtor. This is needed in case someone takes an additional
* temporary ref to the page, and freeing is delayed until they drop
* their reference.
*
* For gigantic pages set the destructor to the null dtor. This
* destructor will never be called. Before freeing the gigantic
* page destroy_compound_gigantic_folio will turn the folio into a
* simple group of pages. After this the destructor does not
* apply.
*
*/
if (hstate_is_gigantic(h))
folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
else
folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
}
/* /*
* Remove hugetlb folio from lists, and update dtor so that the folio appears * Remove hugetlb folio from lists.
* as just a compound page. * If vmemmap exists for the folio, update dtor so that the folio appears
* as just a compound page. Otherwise, wait until after allocating vmemmap
* to update dtor.
* *
* A reference is held on the folio, except in the case of demote. * A reference is held on the folio, except in the case of demote.
* *
@ -1613,31 +1641,19 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
} }
/* /*
* Very subtle * We can only clear the hugetlb destructor after allocating vmemmap
* * pages. Otherwise, someone (memory error handling) may try to write
* For non-gigantic pages set the destructor to the normal compound * to tail struct pages.
* page dtor. This is needed in case someone takes an additional */
* temporary ref to the page, and freeing is delayed until they drop if (!folio_test_hugetlb_vmemmap_optimized(folio))
* their reference. __clear_hugetlb_destructor(h, folio);
*
* For gigantic pages set the destructor to the null dtor. This /*
* destructor will never be called. Before freeing the gigantic
* page destroy_compound_gigantic_folio will turn the folio into a
* simple group of pages. After this the destructor does not
* apply.
*
* This handles the case where more than one ref is held when and
* after update_and_free_hugetlb_folio is called.
*
* In the case of demote we do not ref count the page as it will soon * In the case of demote we do not ref count the page as it will soon
* be turned into a page of smaller size. * be turned into a page of smaller size.
*/ */
if (!demote) if (!demote)
folio_ref_unfreeze(folio, 1); folio_ref_unfreeze(folio, 1);
if (hstate_is_gigantic(h))
folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
else
folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
h->nr_huge_pages--; h->nr_huge_pages--;
h->nr_huge_pages_node[nid]--; h->nr_huge_pages_node[nid]--;
@ -1706,6 +1722,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
{ {
int i; int i;
struct page *subpage; struct page *subpage;
bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio);
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return; return;
@ -1736,6 +1753,16 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
if (unlikely(folio_test_hwpoison(folio))) if (unlikely(folio_test_hwpoison(folio)))
folio_clear_hugetlb_hwpoison(folio); folio_clear_hugetlb_hwpoison(folio);
/*
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb destructor under the hugetlb lock.
*/
if (clear_dtor) {
spin_lock_irq(&hugetlb_lock);
__clear_hugetlb_destructor(h, folio);
spin_unlock_irq(&hugetlb_lock);
}
for (i = 0; i < pages_per_huge_page(h); i++) { for (i = 0; i < pages_per_huge_page(h); i++) {
subpage = folio_page(folio, i); subpage = folio_page(folio, i);
subpage->flags &= ~(1 << PG_locked | 1 << PG_error | subpage->flags &= ~(1 << PG_locked | 1 << PG_error |

View File

@ -941,6 +941,13 @@ int migrate_device_coherent_page(struct page *page);
struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags); struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
int __must_check try_grab_page(struct page *page, unsigned int flags); int __must_check try_grab_page(struct page *page, unsigned int flags);
/*
* mm/huge_memory.c
*/
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd,
unsigned int flags);
enum { enum {
/* mark page accessed */ /* mark page accessed */
FOLL_TOUCH = 1 << 16, FOLL_TOUCH = 1 << 16,
@ -1014,6 +1021,16 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma,
if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
smp_rmb(); smp_rmb();
/*
* During GUP-fast we might not get called on the head page for a
* hugetlb page that is mapped using cont-PTE, because GUP-fast does
* not work with the abstracted hugetlb PTEs that always point at the
* head page. For hugetlb, PageAnonExclusive only applies on the head
* page (as it cannot be partially COW-shared), so lookup the head page.
*/
if (unlikely(!PageHead(page) && PageHuge(page)))
page = compound_head(page);
/* /*
* Note that PageKsm() pages cannot be exclusive, and consequently, * Note that PageKsm() pages cannot be exclusive, and consequently,
* cannot get pinned. * cannot get pinned.

View File

@ -462,6 +462,12 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
static const struct mm_walk_ops break_ksm_ops = { static const struct mm_walk_ops break_ksm_ops = {
.pmd_entry = break_ksm_pmd_entry, .pmd_entry = break_ksm_pmd_entry,
.walk_lock = PGWALK_RDLOCK,
};
static const struct mm_walk_ops break_ksm_lock_vma_ops = {
.pmd_entry = break_ksm_pmd_entry,
.walk_lock = PGWALK_WRLOCK,
}; };
/* /*
@ -477,16 +483,17 @@ static const struct mm_walk_ops break_ksm_ops = {
* of the process that owns 'vma'. We also do not want to enforce * of the process that owns 'vma'. We also do not want to enforce
* protection keys here anyway. * protection keys here anyway.
*/ */
static int break_ksm(struct vm_area_struct *vma, unsigned long addr) static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
{ {
vm_fault_t ret = 0; vm_fault_t ret = 0;
const struct mm_walk_ops *ops = lock_vma ?
&break_ksm_lock_vma_ops : &break_ksm_ops;
do { do {
int ksm_page; int ksm_page;
cond_resched(); cond_resched();
ksm_page = walk_page_range_vma(vma, addr, addr + 1, ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);
&break_ksm_ops, NULL);
if (WARN_ON_ONCE(ksm_page < 0)) if (WARN_ON_ONCE(ksm_page < 0))
return ksm_page; return ksm_page;
if (!ksm_page) if (!ksm_page)
@ -572,7 +579,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
mmap_read_lock(mm); mmap_read_lock(mm);
vma = find_mergeable_vma(mm, addr); vma = find_mergeable_vma(mm, addr);
if (vma) if (vma)
break_ksm(vma, addr); break_ksm(vma, addr, false);
mmap_read_unlock(mm); mmap_read_unlock(mm);
} }
@ -878,7 +885,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
* in cmp_and_merge_page on one of the rmap_items we would be removing. * in cmp_and_merge_page on one of the rmap_items we would be removing.
*/ */
static int unmerge_ksm_pages(struct vm_area_struct *vma, static int unmerge_ksm_pages(struct vm_area_struct *vma,
unsigned long start, unsigned long end) unsigned long start, unsigned long end, bool lock_vma)
{ {
unsigned long addr; unsigned long addr;
int err = 0; int err = 0;
@ -889,7 +896,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
if (signal_pending(current)) if (signal_pending(current))
err = -ERESTARTSYS; err = -ERESTARTSYS;
else else
err = break_ksm(vma, addr); err = break_ksm(vma, addr, lock_vma);
} }
return err; return err;
} }
@ -1036,7 +1043,7 @@ static int unmerge_and_remove_all_rmap_items(void)
if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
continue; continue;
err = unmerge_ksm_pages(vma, err = unmerge_ksm_pages(vma,
vma->vm_start, vma->vm_end); vma->vm_start, vma->vm_end, false);
if (err) if (err)
goto error; goto error;
} }
@ -2546,7 +2553,7 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
return 0; return 0;
if (vma->anon_vma) { if (vma->anon_vma) {
err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end); err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true);
if (err) if (err)
return err; return err;
} }
@ -2684,7 +2691,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
return 0; /* just ignore the advice */ return 0; /* just ignore the advice */
if (vma->anon_vma) { if (vma->anon_vma) {
err = unmerge_ksm_pages(vma, start, end); err = unmerge_ksm_pages(vma, start, end, true);
if (err) if (err)
return err; return err;
} }
@ -2800,6 +2807,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
anon_vma->root == vma->anon_vma->root) { anon_vma->root == vma->anon_vma->root) {
return page; /* still no need to copy it */ return page; /* still no need to copy it */
} }
if (PageHWPoison(page))
return ERR_PTR(-EHWPOISON);
if (!PageUptodate(page)) if (!PageUptodate(page))
return page; /* let do_swap_page report the error */ return page; /* let do_swap_page report the error */

View File

@ -232,6 +232,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
static const struct mm_walk_ops swapin_walk_ops = { static const struct mm_walk_ops swapin_walk_ops = {
.pmd_entry = swapin_walk_pmd_entry, .pmd_entry = swapin_walk_pmd_entry,
.walk_lock = PGWALK_RDLOCK,
}; };
static void shmem_swapin_range(struct vm_area_struct *vma, static void shmem_swapin_range(struct vm_area_struct *vma,
@ -537,6 +538,7 @@ regular_folio:
static const struct mm_walk_ops cold_walk_ops = { static const struct mm_walk_ops cold_walk_ops = {
.pmd_entry = madvise_cold_or_pageout_pte_range, .pmd_entry = madvise_cold_or_pageout_pte_range,
.walk_lock = PGWALK_RDLOCK,
}; };
static void madvise_cold_page_range(struct mmu_gather *tlb, static void madvise_cold_page_range(struct mmu_gather *tlb,
@ -760,6 +762,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
static const struct mm_walk_ops madvise_free_walk_ops = { static const struct mm_walk_ops madvise_free_walk_ops = {
.pmd_entry = madvise_free_pte_range, .pmd_entry = madvise_free_pte_range,
.walk_lock = PGWALK_RDLOCK,
}; };
static int madvise_free_single_vma(struct vm_area_struct *vma, static int madvise_free_single_vma(struct vm_area_struct *vma,

View File

@ -6013,6 +6013,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
static const struct mm_walk_ops precharge_walk_ops = { static const struct mm_walk_ops precharge_walk_ops = {
.pmd_entry = mem_cgroup_count_precharge_pte_range, .pmd_entry = mem_cgroup_count_precharge_pte_range,
.walk_lock = PGWALK_RDLOCK,
}; };
static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
@ -6292,6 +6293,7 @@ put: /* get_mctgt_type() gets & locks the page */
static const struct mm_walk_ops charge_walk_ops = { static const struct mm_walk_ops charge_walk_ops = {
.pmd_entry = mem_cgroup_move_charge_pte_range, .pmd_entry = mem_cgroup_move_charge_pte_range,
.walk_lock = PGWALK_RDLOCK,
}; };
static void mem_cgroup_move_charge(void) static void mem_cgroup_move_charge(void)

View File

@ -827,6 +827,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
static const struct mm_walk_ops hwp_walk_ops = { static const struct mm_walk_ops hwp_walk_ops = {
.pmd_entry = hwpoison_pte_range, .pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range, .hugetlb_entry = hwpoison_hugetlb_range,
.walk_lock = PGWALK_RDLOCK,
}; };
/* /*
@ -2500,7 +2501,7 @@ int unpoison_memory(unsigned long pfn)
{ {
struct folio *folio; struct folio *folio;
struct page *p; struct page *p;
int ret = -EBUSY; int ret = -EBUSY, ghp;
unsigned long count = 1; unsigned long count = 1;
bool huge = false; bool huge = false;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
@ -2533,6 +2534,13 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex; goto unlock_mutex;
} }
if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
goto unlock_mutex;
/*
* Note that folio->_mapcount is overloaded in SLAB, so the simple test
* in folio_mapped() has to be done after folio_test_slab() is checked.
*/
if (folio_mapped(folio)) { if (folio_mapped(folio)) {
unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n", unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
pfn, &unpoison_rs); pfn, &unpoison_rs);
@ -2545,32 +2553,28 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex; goto unlock_mutex;
} }
if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio)) ghp = get_hwpoison_page(p, MF_UNPOISON);
goto unlock_mutex; if (!ghp) {
ret = get_hwpoison_page(p, MF_UNPOISON);
if (!ret) {
if (PageHuge(p)) { if (PageHuge(p)) {
huge = true; huge = true;
count = folio_free_raw_hwp(folio, false); count = folio_free_raw_hwp(folio, false);
if (count == 0) { if (count == 0)
ret = -EBUSY;
goto unlock_mutex; goto unlock_mutex;
} }
}
ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY; ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY;
} else if (ret < 0) { } else if (ghp < 0) {
if (ret == -EHWPOISON) { if (ghp == -EHWPOISON) {
ret = put_page_back_buddy(p) ? 0 : -EBUSY; ret = put_page_back_buddy(p) ? 0 : -EBUSY;
} else } else {
ret = ghp;
unpoison_pr_info("Unpoison: failed to grab page %#lx\n", unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
pfn, &unpoison_rs); pfn, &unpoison_rs);
}
} else { } else {
if (PageHuge(p)) { if (PageHuge(p)) {
huge = true; huge = true;
count = folio_free_raw_hwp(folio, false); count = folio_free_raw_hwp(folio, false);
if (count == 0) { if (count == 0) {
ret = -EBUSY;
folio_put(folio); folio_put(folio);
goto unlock_mutex; goto unlock_mutex;
} }
@ -2771,11 +2775,14 @@ retry:
if (ret > 0) { if (ret > 0) {
ret = soft_offline_in_use_page(page); ret = soft_offline_in_use_page(page);
} else if (ret == 0) { } else if (ret == 0) {
if (!page_handle_poison(page, true, false) && try_again) { if (!page_handle_poison(page, true, false)) {
if (try_again) {
try_again = false; try_again = false;
flags &= ~MF_COUNT_INCREASED; flags &= ~MF_COUNT_INCREASED;
goto retry; goto retry;
} }
ret = -EBUSY;
}
} }
mutex_unlock(&mf_mutex); mutex_unlock(&mf_mutex);

View File

@ -718,6 +718,14 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
.hugetlb_entry = queue_folios_hugetlb, .hugetlb_entry = queue_folios_hugetlb,
.pmd_entry = queue_folios_pte_range, .pmd_entry = queue_folios_pte_range,
.test_walk = queue_pages_test_walk, .test_walk = queue_pages_test_walk,
.walk_lock = PGWALK_RDLOCK,
};
static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = {
.hugetlb_entry = queue_folios_hugetlb,
.pmd_entry = queue_folios_pte_range,
.test_walk = queue_pages_test_walk,
.walk_lock = PGWALK_WRLOCK,
}; };
/* /*
@ -738,7 +746,7 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
static int static int
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
nodemask_t *nodes, unsigned long flags, nodemask_t *nodes, unsigned long flags,
struct list_head *pagelist) struct list_head *pagelist, bool lock_vma)
{ {
int err; int err;
struct queue_pages qp = { struct queue_pages qp = {
@ -749,8 +757,10 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
.end = end, .end = end,
.first = NULL, .first = NULL,
}; };
const struct mm_walk_ops *ops = lock_vma ?
&queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp); err = walk_page_range(mm, start, end, ops, &qp);
if (!qp.first) if (!qp.first)
/* whole range in hole */ /* whole range in hole */
@ -1078,7 +1088,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
vma = find_vma(mm, 0); vma = find_vma(mm, 0);
VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask, queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
flags | MPOL_MF_DISCONTIG_OK, &pagelist); flags | MPOL_MF_DISCONTIG_OK, &pagelist, false);
if (!list_empty(&pagelist)) { if (!list_empty(&pagelist)) {
err = migrate_pages(&pagelist, alloc_migration_target, NULL, err = migrate_pages(&pagelist, alloc_migration_target, NULL,
@ -1321,12 +1331,8 @@ static long do_mbind(unsigned long start, unsigned long len,
* Lock the VMAs before scanning for pages to migrate, to ensure we don't * Lock the VMAs before scanning for pages to migrate, to ensure we don't
* miss a concurrently inserted page. * miss a concurrently inserted page.
*/ */
vma_iter_init(&vmi, mm, start);
for_each_vma_range(vmi, vma, end)
vma_start_write(vma);
ret = queue_pages_range(mm, start, end, nmask, ret = queue_pages_range(mm, start, end, nmask,
flags | MPOL_MF_INVERT, &pagelist); flags | MPOL_MF_INVERT, &pagelist, true);
if (ret < 0) { if (ret < 0) {
err = ret; err = ret;

View File

@ -279,6 +279,7 @@ next:
static const struct mm_walk_ops migrate_vma_walk_ops = { static const struct mm_walk_ops migrate_vma_walk_ops = {
.pmd_entry = migrate_vma_collect_pmd, .pmd_entry = migrate_vma_collect_pmd,
.pte_hole = migrate_vma_collect_hole, .pte_hole = migrate_vma_collect_hole,
.walk_lock = PGWALK_RDLOCK,
}; };
/* /*

View File

@ -176,6 +176,7 @@ static const struct mm_walk_ops mincore_walk_ops = {
.pmd_entry = mincore_pte_range, .pmd_entry = mincore_pte_range,
.pte_hole = mincore_unmapped_range, .pte_hole = mincore_unmapped_range,
.hugetlb_entry = mincore_hugetlb, .hugetlb_entry = mincore_hugetlb,
.walk_lock = PGWALK_RDLOCK,
}; };
/* /*

View File

@ -371,6 +371,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
{ {
static const struct mm_walk_ops mlock_walk_ops = { static const struct mm_walk_ops mlock_walk_ops = {
.pmd_entry = mlock_pte_range, .pmd_entry = mlock_pte_range,
.walk_lock = PGWALK_WRLOCK_VERIFY,
}; };
/* /*

View File

@ -568,6 +568,7 @@ static const struct mm_walk_ops prot_none_walk_ops = {
.pte_entry = prot_none_pte_entry, .pte_entry = prot_none_pte_entry,
.hugetlb_entry = prot_none_hugetlb_entry, .hugetlb_entry = prot_none_hugetlb_entry,
.test_walk = prot_none_test, .test_walk = prot_none_test,
.walk_lock = PGWALK_WRLOCK,
}; };
int int

View File

@ -400,6 +400,33 @@ static int __walk_page_range(unsigned long start, unsigned long end,
return err; return err;
} }
static inline void process_mm_walk_lock(struct mm_struct *mm,
enum page_walk_lock walk_lock)
{
if (walk_lock == PGWALK_RDLOCK)
mmap_assert_locked(mm);
else
mmap_assert_write_locked(mm);
}
static inline void process_vma_walk_lock(struct vm_area_struct *vma,
enum page_walk_lock walk_lock)
{
#ifdef CONFIG_PER_VMA_LOCK
switch (walk_lock) {
case PGWALK_WRLOCK:
vma_start_write(vma);
break;
case PGWALK_WRLOCK_VERIFY:
vma_assert_write_locked(vma);
break;
case PGWALK_RDLOCK:
/* PGWALK_RDLOCK is handled by process_mm_walk_lock */
break;
}
#endif
}
/** /**
* walk_page_range - walk page table with caller specific callbacks * walk_page_range - walk page table with caller specific callbacks
* @mm: mm_struct representing the target process of page table walk * @mm: mm_struct representing the target process of page table walk
@ -459,7 +486,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
if (!walk.mm) if (!walk.mm)
return -EINVAL; return -EINVAL;
mmap_assert_locked(walk.mm); process_mm_walk_lock(walk.mm, ops->walk_lock);
vma = find_vma(walk.mm, start); vma = find_vma(walk.mm, start);
do { do {
@ -474,6 +501,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
if (ops->pte_hole) if (ops->pte_hole)
err = ops->pte_hole(start, next, -1, &walk); err = ops->pte_hole(start, next, -1, &walk);
} else { /* inside vma */ } else { /* inside vma */
process_vma_walk_lock(vma, ops->walk_lock);
walk.vma = vma; walk.vma = vma;
next = min(end, vma->vm_end); next = min(end, vma->vm_end);
vma = find_vma(mm, vma->vm_end); vma = find_vma(mm, vma->vm_end);
@ -549,7 +577,8 @@ int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
if (start < vma->vm_start || end > vma->vm_end) if (start < vma->vm_start || end > vma->vm_end)
return -EINVAL; return -EINVAL;
mmap_assert_locked(walk.mm); process_mm_walk_lock(walk.mm, ops->walk_lock);
process_vma_walk_lock(vma, ops->walk_lock);
return __walk_page_range(start, end, &walk); return __walk_page_range(start, end, &walk);
} }
@ -566,7 +595,8 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
if (!walk.mm) if (!walk.mm)
return -EINVAL; return -EINVAL;
mmap_assert_locked(walk.mm); process_mm_walk_lock(walk.mm, ops->walk_lock);
process_vma_walk_lock(vma, ops->walk_lock);
return __walk_page_range(vma->vm_start, vma->vm_end, &walk); return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
} }

View File

@ -1745,7 +1745,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
struct page *swapcache; struct page *swapcache;
spinlock_t *ptl; spinlock_t *ptl;
pte_t *pte, new_pte, old_pte; pte_t *pte, new_pte, old_pte;
bool hwposioned = false; bool hwpoisoned = PageHWPoison(page);
int ret = 1; int ret = 1;
swapcache = page; swapcache = page;
@ -1753,7 +1753,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
if (unlikely(!page)) if (unlikely(!page))
return -ENOMEM; return -ENOMEM;
else if (unlikely(PTR_ERR(page) == -EHWPOISON)) else if (unlikely(PTR_ERR(page) == -EHWPOISON))
hwposioned = true; hwpoisoned = true;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte), if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte),
@ -1764,11 +1764,11 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
old_pte = ptep_get(pte); old_pte = ptep_get(pte);
if (unlikely(hwposioned || !PageUptodate(page))) { if (unlikely(hwpoisoned || !PageUptodate(page))) {
swp_entry_t swp_entry; swp_entry_t swp_entry;
dec_mm_counter(vma->vm_mm, MM_SWAPENTS); dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
if (hwposioned) { if (hwpoisoned) {
swp_entry = make_hwpoison_entry(swapcache); swp_entry = make_hwpoison_entry(swapcache);
page = swapcache; page = swapcache;
} else { } else {

View File

@ -2979,6 +2979,10 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
free_vm_area(area); free_vm_area(area);
return NULL; return NULL;
} }
flush_cache_vmap((unsigned long)area->addr,
(unsigned long)area->addr + count * PAGE_SIZE);
return area->addr; return area->addr;
} }
EXPORT_SYMBOL_GPL(vmap_pfn); EXPORT_SYMBOL_GPL(vmap_pfn);

View File

@ -4284,6 +4284,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
static const struct mm_walk_ops mm_walk_ops = { static const struct mm_walk_ops mm_walk_ops = {
.test_walk = should_skip_vma, .test_walk = should_skip_vma,
.p4d_entry = walk_pud_range, .p4d_entry = walk_pud_range,
.walk_lock = PGWALK_RDLOCK,
}; };
int err; int err;
@ -4855,16 +4856,17 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
spin_lock_irq(&pgdat->memcg_lru.lock); spin_lock_irq(&pgdat->memcg_lru.lock);
VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); if (hlist_nulls_unhashed(&lruvec->lrugen.list))
goto unlock;
gen = lruvec->lrugen.gen; gen = lruvec->lrugen.gen;
hlist_nulls_del_rcu(&lruvec->lrugen.list); hlist_nulls_del_init_rcu(&lruvec->lrugen.list);
pgdat->memcg_lru.nr_memcgs[gen]--; pgdat->memcg_lru.nr_memcgs[gen]--;
if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq)) if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
unlock:
spin_unlock_irq(&pgdat->memcg_lru.lock); spin_unlock_irq(&pgdat->memcg_lru.lock);
} }
} }
@ -5446,8 +5448,10 @@ restart:
rcu_read_lock(); rcu_read_lock();
hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) { hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
if (op) if (op) {
lru_gen_rotate_memcg(lruvec, op); lru_gen_rotate_memcg(lruvec, op);
op = 0;
}
mem_cgroup_put(memcg); mem_cgroup_put(memcg);
@ -5455,7 +5459,7 @@ restart:
memcg = lruvec_memcg(lruvec); memcg = lruvec_memcg(lruvec);
if (!mem_cgroup_tryget(memcg)) { if (!mem_cgroup_tryget(memcg)) {
op = 0; lru_gen_release_memcg(memcg);
memcg = NULL; memcg = NULL;
continue; continue;
} }

View File

@ -1777,6 +1777,7 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
static bool zs_page_isolate(struct page *page, isolate_mode_t mode) static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
{ {
struct zs_pool *pool;
struct zspage *zspage; struct zspage *zspage;
/* /*
@ -1786,9 +1787,10 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
VM_BUG_ON_PAGE(PageIsolated(page), page); VM_BUG_ON_PAGE(PageIsolated(page), page);
zspage = get_zspage(page); zspage = get_zspage(page);
migrate_write_lock(zspage); pool = zspage->pool;
spin_lock(&pool->lock);
inc_zspage_isolation(zspage); inc_zspage_isolation(zspage);
migrate_write_unlock(zspage); spin_unlock(&pool->lock);
return true; return true;
} }
@ -1854,12 +1856,12 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
kunmap_atomic(s_addr); kunmap_atomic(s_addr);
replace_sub_page(class, zspage, newpage, page); replace_sub_page(class, zspage, newpage, page);
dec_zspage_isolation(zspage);
/* /*
* Since we complete the data copy and set up new zspage structure, * Since we complete the data copy and set up new zspage structure,
* it's okay to release the pool's lock. * it's okay to release the pool's lock.
*/ */
spin_unlock(&pool->lock); spin_unlock(&pool->lock);
dec_zspage_isolation(zspage);
migrate_write_unlock(zspage); migrate_write_unlock(zspage);
get_page(newpage); get_page(newpage);
@ -1876,14 +1878,16 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
static void zs_page_putback(struct page *page) static void zs_page_putback(struct page *page)
{ {
struct zs_pool *pool;
struct zspage *zspage; struct zspage *zspage;
VM_BUG_ON_PAGE(!PageIsolated(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page);
zspage = get_zspage(page); zspage = get_zspage(page);
migrate_write_lock(zspage); pool = zspage->pool;
spin_lock(&pool->lock);
dec_zspage_isolation(zspage); dec_zspage_isolation(zspage);
migrate_write_unlock(zspage); spin_unlock(&pool->lock);
} }
static const struct movable_operations zsmalloc_mops = { static const struct movable_operations zsmalloc_mops = {

View File

@ -177,7 +177,7 @@ void regression1_test(void)
nr_threads = 2; nr_threads = 2;
pthread_barrier_init(&worker_barrier, NULL, nr_threads); pthread_barrier_init(&worker_barrier, NULL, nr_threads);
threads = malloc(nr_threads * sizeof(pthread_t *)); threads = malloc(nr_threads * sizeof(*threads));
for (i = 0; i < nr_threads; i++) { for (i = 0; i < nr_threads; i++) {
arg = i; arg = i;

View File

@ -70,12 +70,16 @@ static int test_kmem_basic(const char *root)
goto cleanup; goto cleanup;
cg_write(cg, "memory.high", "1M"); cg_write(cg, "memory.high", "1M");
/* wait for RCU freeing */
sleep(1);
slab1 = cg_read_key_long(cg, "memory.stat", "slab "); slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
if (slab1 <= 0) if (slab1 < 0)
goto cleanup; goto cleanup;
current = cg_read_long(cg, "memory.current"); current = cg_read_long(cg, "memory.current");
if (current <= 0) if (current < 0)
goto cleanup; goto cleanup;
if (slab1 < slab0 / 2 && current < slab0 / 2) if (slab1 < slab0 / 2 && current < slab0 / 2)

View File

@ -57,9 +57,14 @@ enum {
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
/* Just the flags we need, copied from mm.h: */ /* Just the flags we need, copied from mm.h: */
#define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite */
#ifndef FOLL_WRITE
#define FOLL_WRITE 0x01 /* check pte is writable */
#endif
#ifndef FOLL_LONGTERM
#define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */
#endif
FIXTURE(hmm) FIXTURE(hmm)
{ {
int fd; int fd;

View File

@ -831,6 +831,7 @@ int main(int argc, char *argv[])
printf("Size must be greater than 0\n"); printf("Size must be greater than 0\n");
return KSFT_FAIL; return KSFT_FAIL;
} }
break;
case 't': case 't':
{ {
int tmp = atoi(optarg); int tmp = atoi(optarg);