mm/vma: document possible vma->vm_refcnt values and reference comment

The possible vma->vm_refcnt values are confusing and vague, explain in
detail what these can be in a comment describing the vma->vm_refcnt field
and reference this comment in various places that read/write this field.

No functional change intended.

[akpm@linux-foundation.org: fix typo, per Suren]
Link: https://lkml.kernel.org/r/d462e7678c6cc7461f94e5b26c776547d80a67e8.1769198904.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Waiman Long <longman@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Lorenzo Stoakes 2026-01-23 20:12:12 +00:00 committed by Andrew Morton
parent 25faccd699
commit ef4c0cea1e
3 changed files with 53 additions and 2 deletions

View File

@ -758,7 +758,8 @@ static inline struct anon_vma_name *anon_vma_name_alloc(const char *name)
* set the VM_REFCNT_EXCLUDE_READERS_FLAG in vma->vm_refcnt to indiciate to
* vma_start_read() that the reference count should be left alone.
*
* Once the operation is complete, this value is subtracted from vma->vm_refcnt.
* See the comment describing vm_refcnt in vm_area_struct for details as to
* which values the VMA reference count can be.
*/
#define VM_REFCNT_EXCLUDE_READERS_BIT (30)
#define VM_REFCNT_EXCLUDE_READERS_FLAG (1U << VM_REFCNT_EXCLUDE_READERS_BIT)
@ -989,7 +990,44 @@ struct vm_area_struct {
struct vma_numab_state *numab_state; /* NUMA Balancing state */
#endif
#ifdef CONFIG_PER_VMA_LOCK
/* Unstable RCU readers are allowed to read this. */
/*
* Used to keep track of firstly, whether the VMA is attached, secondly,
* if attached, how many read locks are taken, and thirdly, if the
* VM_REFCNT_EXCLUDE_READERS_FLAG is set, whether any read locks held
* are currently in the process of being excluded.
*
* This value can be equal to:
*
* 0 - Detached. IMPORTANT: when the refcnt is zero, readers cannot
* increment it.
*
* 1 - Attached and either unlocked or write-locked. Write locks are
* identified via __is_vma_write_locked() which checks for equality of
* vma->vm_lock_seq and mm->mm_lock_seq.
*
* >1, < VM_REFCNT_EXCLUDE_READERS_FLAG - Read-locked or (unlikely)
* write-locked with other threads having temporarily incremented the
* reference count prior to determining it is write-locked and
* decrementing it again.
*
* VM_REFCNT_EXCLUDE_READERS_FLAG - Detached, pending
* __vma_exit_locked() completion which will decrement the reference
* count to zero. IMPORTANT - at this stage no further readers can
* increment the reference count. It can only be reduced.
*
* VM_REFCNT_EXCLUDE_READERS_FLAG + 1 - A thread is either write-locking
* an attached VMA and has yet to invoke __vma_exit_locked(), OR a
* thread is detaching a VMA and is waiting on a single spurious reader
* in order to decrement the reference count. IMPORTANT - as above, no
* further readers can increment the reference count.
*
* > VM_REFCNT_EXCLUDE_READERS_FLAG + 1 - A thread is either
* write-locking or detaching a VMA is waiting on readers to
* exit. IMPORTANT - as above, no further readers can increment the
* reference count.
*
* NOTE: Unstable RCU readers are allowed to read this.
*/
refcount_t vm_refcnt ____cacheline_aligned_in_smp;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map vmlock_dep_map;

View File

@ -130,6 +130,9 @@ static inline bool is_vma_writer_only(int refcnt)
* attached. Waiting on a detached vma happens only in
* vma_mark_detached() and is a rare case, therefore most of the time
* there will be no unnecessary wakeup.
*
* See the comment describing the vm_area_struct->vm_refcnt field for
* details of possible refcnt values.
*/
return (refcnt & VM_REFCNT_EXCLUDE_READERS_FLAG) &&
refcnt <= VM_REFCNT_EXCLUDE_READERS_FLAG + 1;
@ -249,6 +252,10 @@ static inline void vma_assert_locked(struct vm_area_struct *vma)
{
unsigned int mm_lock_seq;
/*
* See the comment describing the vm_area_struct->vm_refcnt field for
* details of possible refcnt values.
*/
VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
!__is_vma_write_locked(vma, &mm_lock_seq), vma);
}

View File

@ -65,6 +65,9 @@ static inline int __vma_enter_locked(struct vm_area_struct *vma,
/*
* If vma is detached then only vma_mark_attached() can raise the
* vm_refcnt. mmap_write_lock prevents racing with vma_mark_attached().
*
* See the comment describing the vm_area_struct->vm_refcnt field for
* details of possible refcnt values.
*/
if (!refcount_add_not_zero(VM_REFCNT_EXCLUDE_READERS_FLAG, &vma->vm_refcnt))
return 0;
@ -137,6 +140,9 @@ void vma_mark_detached(struct vm_area_struct *vma)
* before they check vm_lock_seq, realize the vma is locked and drop
* back the vm_refcnt. That is a narrow window for observing a raised
* vm_refcnt.
*
* See the comment describing the vm_area_struct->vm_refcnt field for
* details of possible refcnt values.
*/
if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
/* Wait until vma is detached with no readers. */