aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Kent Overstreet <kent.overstreet@gmail.com> 2019-02-14 18:38:52 -0500
committerGravatar Kent Overstreet <kent.overstreet@linux.dev> 2023-10-22 17:08:16 -0400
commit768ac63924775d9fe2e76fbb254704d5ee3bcb85 (patch)
treeef815d5e731c2a1062ebb27bc1da414098c245a4
parentbcachefs: Convert bucket invalidation to key marking path (diff)
downloadlinux-768ac63924775d9fe2e76fbb254704d5ee3bcb85.tar.gz
linux-768ac63924775d9fe2e76fbb254704d5ee3bcb85.tar.bz2
linux-768ac63924775d9fe2e76fbb254704d5ee3bcb85.zip
bcachefs: Add a mechanism for blocking the journal
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/alloc_foreground.c2
-rw-r--r--fs/bcachefs/btree_gc.c12
-rw-r--r--fs/bcachefs/buckets.c94
-rw-r--r--fs/bcachefs/buckets.h9
-rw-r--r--fs/bcachefs/buckets_types.h28
-rw-r--r--fs/bcachefs/chardev.c6
-rw-r--r--fs/bcachefs/journal.c44
-rw-r--r--fs/bcachefs/journal.h3
-rw-r--r--fs/bcachefs/journal_types.h3
-rw-r--r--fs/bcachefs/recovery.c2
-rw-r--r--fs/bcachefs/replicas.c8
-rw-r--r--fs/bcachefs/super-io.c4
-rw-r--r--fs/bcachefs/sysfs.c14
13 files changed, 138 insertions, 91 deletions
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index f40fca9328f9..ba0640e3f981 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -724,7 +724,7 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
{
u64 stranded = c->write_points_nr * c->bucket_size_max;
- u64 free = bch2_fs_sectors_free(c);
+ u64 free = bch2_fs_usage_read_short(c).free;
return stranded * factor > free;
}
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 922d34abc675..5091966b7b54 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -612,11 +612,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
struct bch_fs_usage *src = (void *)
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
- copy_fs_field(s.hidden, "hidden");
- copy_fs_field(s.data, "data");
- copy_fs_field(s.cached, "cached");
- copy_fs_field(s.reserved, "reserved");
- copy_fs_field(s.nr_inodes, "nr_inodes");
+ copy_fs_field(hidden, "hidden");
+ copy_fs_field(data, "data");
+ copy_fs_field(cached, "cached");
+ copy_fs_field(reserved, "reserved");
+ copy_fs_field(nr_inodes, "nr_inodes");
for (i = 0; i < BCH_REPLICAS_MAX; i++)
copy_fs_field(persistent_reserved[i],
@@ -629,7 +629,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
bch2_replicas_entry_to_text(&PBUF(buf), e);
- copy_fs_field(data[i], "%s", buf);
+ copy_fs_field(replicas[i], "%s", buf);
}
}
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 3286ee26f7e2..ac54d82f9e11 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -124,7 +124,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], nr);
for (i = 0; i < BCH_REPLICAS_MAX; i++)
- usage->s.reserved += usage->persistent_reserved[i];
+ usage->reserved += usage->persistent_reserved[i];
for (i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry *e =
@@ -133,10 +133,10 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
switch (e->data_type) {
case BCH_DATA_BTREE:
case BCH_DATA_USER:
- usage->s.data += usage->data[i];
+ usage->data += usage->replicas[i];
break;
case BCH_DATA_CACHED:
- usage->s.cached += usage->data[i];
+ usage->cached += usage->replicas[i];
break;
}
}
@@ -144,21 +144,16 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
percpu_up_write(&c->mark_lock);
}
-#define bch2_usage_read_raw(_stats) \
-({ \
- typeof(*this_cpu_ptr(_stats)) _acc; \
- \
- memset(&_acc, 0, sizeof(_acc)); \
- acc_u64s_percpu((u64 *) &_acc, \
- (u64 __percpu *) _stats, \
- sizeof(_acc) / sizeof(u64)); \
- \
- _acc; \
-})
-
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
{
- return bch2_usage_read_raw(ca->usage[0]);
+ struct bch_dev_usage ret;
+
+ memset(&ret, 0, sizeof(ret));
+ acc_u64s_percpu((u64 *) &ret,
+ (u64 __percpu *) ca->usage[0],
+ sizeof(ret) / sizeof(u64));
+
+ return ret;
}
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
@@ -198,27 +193,44 @@ static u64 avail_factor(u64 r)
return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
}
-u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage)
+u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
{
- return min(fs_usage.s.hidden +
- fs_usage.s.data +
- reserve_factor(fs_usage.s.reserved +
- fs_usage.s.online_reserved),
+ return min(fs_usage->hidden +
+ fs_usage->data +
+ reserve_factor(fs_usage->reserved +
+ fs_usage->online_reserved),
c->capacity);
}
+static struct bch_fs_usage_short
+__bch2_fs_usage_read_short(struct bch_fs *c)
+{
+ struct bch_fs_usage_short ret;
+ u64 data, reserved;
+
+ ret.capacity = c->capacity -
+ percpu_u64_get(&c->usage[0]->hidden);
+
+ data = percpu_u64_get(&c->usage[0]->data);
+ reserved = percpu_u64_get(&c->usage[0]->reserved) +
+ percpu_u64_get(&c->usage[0]->online_reserved);
+
+ ret.used = min(ret.capacity, data + reserve_factor(reserved));
+ ret.free = ret.capacity - ret.used;
+
+ ret.nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
+
+ return ret;
+}
+
struct bch_fs_usage_short
bch2_fs_usage_read_short(struct bch_fs *c)
{
- struct bch_fs_usage_summarized usage =
- bch2_usage_read_raw(&c->usage[0]->s);
struct bch_fs_usage_short ret;
- ret.capacity = READ_ONCE(c->capacity) - usage.hidden;
- ret.used = min(ret.capacity, usage.data +
- reserve_factor(usage.reserved +
- usage.online_reserved));
- ret.nr_inodes = usage.nr_inodes;
+ percpu_down_read(&c->mark_lock);
+ ret = __bch2_fs_usage_read_short(c);
+ percpu_up_read(&c->mark_lock);
return ret;
}
@@ -257,7 +269,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
struct disk_reservation *disk_res)
{
- s64 added = fs_usage->s.data + fs_usage->s.reserved;
+ s64 added = fs_usage->data + fs_usage->reserved;
s64 should_not_have_added;
int ret = 0;
@@ -277,7 +289,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,
if (added > 0) {
disk_res->sectors -= added;
- fs_usage->s.online_reserved -= added;
+ fs_usage->online_reserved -= added;
}
preempt_disable();
@@ -295,7 +307,7 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage,
int nr, s64 size)
{
if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL)
- fs_usage->s.hidden += size;
+ fs_usage->hidden += size;
dev_usage->buckets[type] += nr;
}
@@ -381,10 +393,10 @@ static inline void update_replicas(struct bch_fs *c,
BUG_ON(!sectors);
if (r->data_type == BCH_DATA_CACHED)
- fs_usage->s.cached += sectors;
+ fs_usage->cached += sectors;
else
- fs_usage->s.data += sectors;
- fs_usage->data[idx] += sectors;
+ fs_usage->data += sectors;
+ fs_usage->replicas[idx] += sectors;
}
static inline void update_cached_sectors(struct bch_fs *c,
@@ -911,9 +923,9 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
fs_usage, journal_seq, flags, gc);
case KEY_TYPE_inode:
if (inserting)
- fs_usage->s.nr_inodes++;
+ fs_usage->nr_inodes++;
else
- fs_usage->s.nr_inodes--;
+ fs_usage->nr_inodes--;
return 0;
case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@@ -922,7 +934,7 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
replicas = clamp_t(unsigned, replicas, 1,
ARRAY_SIZE(fs_usage->persistent_reserved));
- fs_usage->s.reserved += sectors;
+ fs_usage->reserved += sectors;
fs_usage->persistent_reserved[replicas - 1] += sectors;
return 0;
}
@@ -1074,13 +1086,13 @@ static u64 bch2_recalc_sectors_available(struct bch_fs *c)
{
percpu_u64_set(&c->pcpu->sectors_available, 0);
- return avail_factor(bch2_fs_sectors_free(c));
+ return avail_factor(__bch2_fs_usage_read_short(c).free);
}
void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
{
percpu_down_read(&c->mark_lock);
- this_cpu_sub(c->usage[0]->s.online_reserved, res->sectors);
+ this_cpu_sub(c->usage[0]->online_reserved, res->sectors);
percpu_up_read(&c->mark_lock);
res->sectors = 0;
@@ -1120,7 +1132,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
out:
pcpu->sectors_available -= sectors;
- this_cpu_add(c->usage[0]->s.online_reserved, sectors);
+ this_cpu_add(c->usage[0]->online_reserved, sectors);
res->sectors += sectors;
preempt_enable();
@@ -1136,7 +1148,7 @@ recalculate:
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
atomic64_set(&c->sectors_available,
max_t(s64, 0, sectors_available - sectors));
- this_cpu_add(c->usage[0]->s.online_reserved, sectors);
+ this_cpu_add(c->usage[0]->online_reserved, sectors);
res->sectors += sectors;
ret = 0;
} else {
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 973bf605cbd9..67a1d17610f3 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -225,18 +225,11 @@ static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
-u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
+u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
struct bch_fs_usage_short
bch2_fs_usage_read_short(struct bch_fs *);
-static inline u64 bch2_fs_sectors_free(struct bch_fs *c)
-{
- struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
-
- return usage.capacity - usage.used;
-}
-
/* key/bucket marking: */
void bch2_bucket_seq_cleanup(struct bch_fs *);
diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
index 6eaee889f1e1..348d062dd744 100644
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@@ -64,35 +64,33 @@ struct bch_dev_usage {
struct bch_fs_usage {
/* all fields are in units of 512 byte sectors: */
- /* summarized: */
- struct bch_fs_usage_summarized {
- u64 online_reserved;
+ u64 online_reserved;
- /* fields after online_reserved are cleared/recalculated by gc: */
- u64 gc_start[0];
+ /* fields after online_reserved are cleared/recalculated by gc: */
+ u64 gc_start[0];
- u64 hidden;
- u64 data;
- u64 cached;
- u64 reserved;
- u64 nr_inodes;
+ u64 hidden;
+ u64 data;
+ u64 cached;
+ u64 reserved;
+ u64 nr_inodes;
- /* XXX: add stats for compression ratio */
+ /* XXX: add stats for compression ratio */
#if 0
- u64 uncompressed;
- u64 compressed;
+ u64 uncompressed;
+ u64 compressed;
#endif
- } s;
/* broken out: */
u64 persistent_reserved[BCH_REPLICAS_MAX];
- u64 data[];
+ u64 replicas[];
};
struct bch_fs_usage_short {
u64 capacity;
u64 used;
+ u64 free;
u64 nr_inodes;
};
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index f090b61f23f1..5ee38a6a442f 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -403,10 +403,10 @@ static long bch2_ioctl_usage(struct bch_fs *c,
if (!src)
return -ENOMEM;
- percpu_up_read(&c->mark_lock);
+ dst.used = bch2_fs_sectors_used(c, src);
+ dst.online_reserved = src->online_reserved;
- dst.used = bch2_fs_sectors_used(c, *src);
- dst.online_reserved = src->s.online_reserved;
+ percpu_up_read(&c->mark_lock);
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
dst.persistent_reserved[i] =
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index dd10f1c993e5..cf4729b7a083 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -212,6 +212,9 @@ static int journal_entry_open(struct journal *j)
lockdep_assert_held(&j->lock);
BUG_ON(journal_entry_is_open(j));
+ if (j->blocked)
+ return -EAGAIN;
+
if (!fifo_free(&j->pin))
return 0;
@@ -287,7 +290,7 @@ static bool __journal_entry_close(struct journal *j)
spin_unlock(&j->lock);
fallthrough;
case JOURNAL_UNLOCKED:
- return true;
+ return false;
}
}
@@ -297,6 +300,22 @@ static bool journal_entry_close(struct journal *j)
return __journal_entry_close(j);
}
+static bool journal_quiesced(struct journal *j)
+{
+ bool ret;
+
+ spin_lock(&j->lock);
+ ret = !j->reservations.prev_buf_unwritten &&
+ !journal_entry_is_open(j);
+ __journal_entry_close(j);
+ return ret;
+}
+
+static void journal_quiesce(struct journal *j)
+{
+ wait_event(j->wait, journal_quiesced(j));
+}
+
static void journal_write_work(struct work_struct *work)
{
struct journal *j = container_of(work, struct journal, write_work.work);
@@ -722,6 +741,26 @@ int bch2_journal_flush(struct journal *j)
return bch2_journal_flush_seq(j, seq);
}
+/* block/unlock the journal: */
+
+void bch2_journal_unblock(struct journal *j)
+{
+ spin_lock(&j->lock);
+ j->blocked--;
+ spin_unlock(&j->lock);
+
+ journal_wake(j);
+}
+
+void bch2_journal_block(struct journal *j)
+{
+ spin_lock(&j->lock);
+ j->blocked++;
+ spin_unlock(&j->lock);
+
+ journal_quiesce(j);
+}
+
/* allocate journal on a device: */
static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
@@ -931,8 +970,7 @@ void bch2_fs_journal_stop(struct journal *j)
c->btree_roots_dirty)
bch2_journal_meta(j);
- BUG_ON(journal_entry_is_open(j) ||
- j->reservations.prev_buf_unwritten);
+ journal_quiesce(j);
BUG_ON(!bch2_journal_error(j) &&
test_bit(JOURNAL_NOT_EMPTY, &j->flags));
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 6ef34bdae628..5290cdeab585 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -370,6 +370,9 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
set_bit(JOURNAL_REPLAY_DONE, &j->flags);
}
+void bch2_journal_unblock(struct journal *);
+void bch2_journal_block(struct journal *);
+
ssize_t bch2_journal_print_debug(struct journal *, char *);
ssize_t bch2_journal_print_pins(struct journal *, char *);
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 5f6d2320c5cd..e952eb06eff5 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -142,6 +142,9 @@ struct journal {
spinlock_t lock;
+ /* if nonzero, we may not open a new journal entry: */
+ unsigned blocked;
+
/* Used when waiting because the journal was full */
wait_queue_head_t wait;
struct closure_waitlist async_wait;
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index e28917cf2cec..5ceab8c14d72 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -83,7 +83,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
le64_to_cpu(u->v));
break;
case FS_USAGE_INODES:
- percpu_u64_set(&c->usage[0]->s.nr_inodes,
+ percpu_u64_set(&c->usage[0]->nr_inodes,
le64_to_cpu(u->v));
break;
case FS_USAGE_KEY_VERSION:
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 6fee8fe37688..03bb6b51d15f 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -245,14 +245,14 @@ static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
*dst = *src;
for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
- if (!src->data[src_idx])
+ if (!src->replicas[src_idx])
continue;
dst_idx = __replicas_entry_idx(dst_r,
cpu_replicas_entry(src_r, src_idx));
BUG_ON(dst_idx < 0);
- dst->data[dst_idx] = src->data[src_idx];
+ dst->replicas[dst_idx] = src->replicas[src_idx];
}
}
@@ -457,7 +457,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
if (__replicas_has_entry(&c->replicas_gc, e))
continue;
- v = percpu_u64_get(&c->usage[0]->data[i]);
+ v = percpu_u64_get(&c->usage[0]->replicas[i]);
if (!v)
continue;
@@ -558,7 +558,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
BUG_ON(ret < 0);
}
- percpu_u64_set(&c->usage[0]->data[idx], sectors);
+ percpu_u64_set(&c->usage[0]->replicas[idx], sectors);
return 0;
}
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 0b3a761fe93e..66e174d93a9c 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -930,7 +930,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
percpu_down_write(&c->mark_lock);
{
- u64 nr_inodes = percpu_u64_get(&c->usage[0]->s.nr_inodes);
+ u64 nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
struct jset_entry_usage *u =
container_of(entry, struct jset_entry_usage, entry);
@@ -977,7 +977,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
for (i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
- u64 sectors = percpu_u64_get(&c->usage[0]->data[i]);
+ u64 sectors = percpu_u64_get(&c->usage[0]->replicas[i]);
struct jset_entry_data_usage *u =
container_of(entry, struct jset_entry_data_usage, entry);
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 8ad7b6026d1b..361f7b7addcf 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -244,17 +244,17 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity);
pr_buf(&out, "hidden:\t\t\t\t%llu\n",
- fs_usage->s.hidden);
+ fs_usage->hidden);
pr_buf(&out, "data:\t\t\t\t%llu\n",
- fs_usage->s.data);
+ fs_usage->data);
pr_buf(&out, "cached:\t\t\t\t%llu\n",
- fs_usage->s.cached);
+ fs_usage->cached);
pr_buf(&out, "reserved:\t\t\t%llu\n",
- fs_usage->s.reserved);
+ fs_usage->reserved);
pr_buf(&out, "nr_inodes:\t\t\t%llu\n",
- fs_usage->s.nr_inodes);
+ fs_usage->nr_inodes);
pr_buf(&out, "online reserved:\t\t%llu\n",
- fs_usage->s.online_reserved);
+ fs_usage->online_reserved);
for (i = 0;
i < ARRAY_SIZE(fs_usage->persistent_reserved);
@@ -270,7 +270,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
pr_buf(&out, "\t");
bch2_replicas_entry_to_text(&out, e);
- pr_buf(&out, ":\t%llu\n", fs_usage->data[i]);
+ pr_buf(&out, ":\t%llu\n", fs_usage->replicas[i]);
}
percpu_up_read(&c->mark_lock);