aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_trace.h
diff options
context:
space:
mode:
authorGravatar Darrick J. Wong <djwong@kernel.org> 2021-08-06 11:05:43 -0700
committerGravatar Darrick J. Wong <djwong@kernel.org> 2021-08-09 11:13:17 -0700
commit40b1de007aca4f9ec4ee4322c29f026ebb60ac96 (patch)
tree0933ecaa5f4f262b63e94f1a8da9bf60e2810ab8 /fs/xfs/xfs_trace.h
parentxfs: avoid buffer deadlocks when walking fs inodes (diff)
downloadlinux-40b1de007aca4f9ec4ee4322c29f026ebb60ac96.tar.gz
linux-40b1de007aca4f9ec4ee4322c29f026ebb60ac96.tar.bz2
linux-40b1de007aca4f9ec4ee4322c29f026ebb60ac96.zip
xfs: throttle inode inactivation queuing on memory reclaim
Now that we defer inode inactivation, we've decoupled the process of unlinking or closing an inode from the process of inactivating it. In theory this should lead to better throughput since we now inactivate the queued inodes in batches instead of one at a time. Unfortunately, one of the primary risks with this decoupling is the loss of rate control feedback between the frontend and background threads. In other words, a rm -rf /* thread can run the system out of memory if it can queue inodes for inactivation and jump to a new CPU faster than the background threads can actually clear the deferred work. The workers can get scheduled off the CPU if they have to do IO, etc. To solve this problem, we configure a shrinker so that it will activate the /second/ time the shrinkers are called. The custom shrinker will queue all percpu deferred inactivation workers immediately and set a flag to force frontend callers who are releasing a vfs inode to wait for the inactivation workers. On my test VM with 560M of RAM and a 2TB filesystem, this seems to solve most of the OOMing problem when deleting 10 million inodes. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
Diffstat (limited to 'fs/xfs/xfs_trace.h')
-rw-r--r--fs/xfs/xfs_trace.h37
1 files changed, 36 insertions, 1 deletions
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 4a6616490315..57ce91dcc0a6 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -157,6 +157,22 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
+TRACE_EVENT(xfs_inodegc_worker,
+ TP_PROTO(struct xfs_mount *mp, unsigned int shrinker_hits),
+ TP_ARGS(mp, shrinker_hits),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, shrinker_hits)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->shrinker_hits = shrinker_hits;
+ ),
+ TP_printk("dev %d:%d shrinker_hits %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->shrinker_hits)
+);
+
DECLARE_EVENT_CLASS(xfs_fs_class,
TP_PROTO(struct xfs_mount *mp, void *caller_ip),
TP_ARGS(mp, caller_ip),
@@ -191,7 +207,6 @@ DEFINE_EVENT(xfs_fs_class, name, \
DEFINE_FS_EVENT(xfs_inodegc_flush);
DEFINE_FS_EVENT(xfs_inodegc_start);
DEFINE_FS_EVENT(xfs_inodegc_stop);
-DEFINE_FS_EVENT(xfs_inodegc_worker);
DEFINE_FS_EVENT(xfs_inodegc_queue);
DEFINE_FS_EVENT(xfs_inodegc_throttle);
DEFINE_FS_EVENT(xfs_fs_sync_fs);
@@ -200,6 +215,26 @@ DEFINE_FS_EVENT(xfs_blockgc_stop);
DEFINE_FS_EVENT(xfs_blockgc_worker);
DEFINE_FS_EVENT(xfs_blockgc_flush_all);
+TRACE_EVENT(xfs_inodegc_shrinker_scan,
+ TP_PROTO(struct xfs_mount *mp, struct shrink_control *sc,
+ void *caller_ip),
+ TP_ARGS(mp, sc, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned long, nr_to_scan)
+ __field(void *, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->nr_to_scan = sc->nr_to_scan;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d nr_to_scan %lu caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->nr_to_scan,
+ __entry->caller_ip)
+);
+
DECLARE_EVENT_CLASS(xfs_ag_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno),
TP_ARGS(mp, agno),