aboutsummaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
authorGravatar Kent Overstreet <kent.overstreet@gmail.com> 2019-04-04 21:53:12 -0400
committerGravatar Kent Overstreet <kent.overstreet@linux.dev> 2023-10-22 17:08:20 -0400
commit1dd7f9d98de0740b42f1ac3f0b1d8af9c76801de (patch)
treebcc22ad8766da57180ccc67812966aab79434512 /fs/bcachefs
parentbcachefs: don't lose errors from iterators that have been freed (diff)
downloadlinux-1dd7f9d98de0740b42f1ac3f0b1d8af9c76801de.tar.gz
linux-1dd7f9d98de0740b42f1ac3f0b1d8af9c76801de.tar.bz2
linux-1dd7f9d98de0740b42f1ac3f0b1d8af9c76801de.zip
bcachefs: Rewrite journal_seq_blacklist machinery
Now, we store blacklisted journal sequence numbers in the superblock, not the journal: this helps to greatly simplify the code, and more importantly it's now implemented in a way that doesn't require all btree nodes to be visited before starting the journal - instead, we unconditionally blacklist the next 4 journal sequence numbers after an unclean shutdown. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/bcachefs.h16
-rw-r--r--fs/bcachefs/bcachefs_format.h18
-rw-r--r--fs/bcachefs/btree_io.c24
-rw-r--r--fs/bcachefs/btree_iter.c2
-rw-r--r--fs/bcachefs/inode.h2
-rw-r--r--fs/bcachefs/journal.c65
-rw-r--r--fs/bcachefs/journal.h4
-rw-r--r--fs/bcachefs/journal_io.c108
-rw-r--r--fs/bcachefs/journal_io.h1
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c491
-rw-r--r--fs/bcachefs/journal_seq_blacklist.h15
-rw-r--r--fs/bcachefs/journal_types.h22
-rw-r--r--fs/bcachefs/recovery.c154
-rw-r--r--fs/bcachefs/super-io.c1
-rw-r--r--fs/bcachefs/super.c9
15 files changed, 460 insertions, 472 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index d8c487e33592..8acdc7ffeca3 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -185,6 +185,7 @@
#include <linux/closure.h>
#include <linux/kobject.h>
#include <linux/list.h>
+#include <linux/math64.h>
#include <linux/mutex.h>
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
@@ -486,6 +487,7 @@ enum {
BCH_FS_RW,
/* shutdown: */
+ BCH_FS_STOPPING,
BCH_FS_EMERGENCY_RO,
BCH_FS_WRITE_DISABLE_COMPLETE,
@@ -511,6 +513,15 @@ struct bch_fs_pcpu {
u64 sectors_available;
};
+struct journal_seq_blacklist_table {
+ size_t nr;
+ struct journal_seq_blacklist_table_entry {
+ u64 start;
+ u64 end;
+ bool dirty;
+ } entries[0];
+};
+
struct bch_fs {
struct closure cl;
@@ -646,6 +657,11 @@ struct bch_fs {
struct io_clock io_clock[2];
+ /* JOURNAL SEQ BLACKLIST */
+ struct journal_seq_blacklist_table *
+ journal_seq_blacklist_table;
+ struct work_struct journal_seq_blacklist_gc_work;
+
/* ALLOCATOR */
spinlock_t freelist_lock;
struct closure_waitlist freelist_wait;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 646910a6a4bb..7edc410c5391 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -909,7 +909,8 @@ struct bch_sb_field {
x(quota, 4) \
x(disk_groups, 5) \
x(clean, 6) \
- x(replicas, 7)
+ x(replicas, 7) \
+ x(journal_seq_blacklist, 8)
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
@@ -1124,6 +1125,20 @@ struct bch_sb_field_clean {
};
};
+struct journal_seq_blacklist_entry {
+ __le64 start;
+ __le64 end;
+};
+
+struct bch_sb_field_journal_seq_blacklist {
+ struct bch_sb_field field;
+
+ union {
+ struct journal_seq_blacklist_entry start[0];
+ __u64 _data[0];
+ };
+};
+
/* Superblock: */
/*
@@ -1279,6 +1294,7 @@ enum bch_sb_features {
BCH_FEATURE_ZSTD = 2,
BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
BCH_FEATURE_EC = 4,
+ BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5,
BCH_FEATURE_NR,
};
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 10b3d53b6ebb..fa261a175f5e 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -770,7 +770,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
struct btree_node *sorted;
struct bkey_packed *k;
struct bset *i;
- bool used_mempool;
+ bool used_mempool, blacklisted;
unsigned u64s;
int ret, retry_read = 0, write = READ;
@@ -844,20 +844,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
b->written += sectors;
- ret = bch2_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b);
- if (ret < 0) {
- btree_err(BTREE_ERR_FATAL, c, b, i,
- "insufficient memory");
- goto err;
- }
+ blacklisted = bch2_journal_seq_is_blacklisted(c,
+ le64_to_cpu(i->journal_seq),
+ true);
- if (ret) {
- btree_err_on(first,
- BTREE_ERR_FIXABLE, c, b, i,
- "first btree node bset has blacklisted journal seq");
- if (!first)
- continue;
- }
+ btree_err_on(blacklisted && first,
+ BTREE_ERR_FIXABLE, c, b, i,
+ "first btree node bset has blacklisted journal seq");
+ if (blacklisted && !first)
+ continue;
bch2_btree_node_iter_large_push(iter, b,
i->start,
@@ -930,7 +925,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
out:
mempool_free(iter, &c->fill_iter);
return retry_read;
-err:
fsck_err:
if (ret == BTREE_RETRY_READ) {
retry_read = 1;
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 02eb28bfe9b9..6b9af53a3e77 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -1156,6 +1156,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
if (!btree_iter_node(iter, iter->level))
return NULL;
+ bch2_trans_cond_resched(iter->trans);
+
btree_iter_up(iter);
if (!bch2_btree_node_relock(iter, iter->level))
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index ada639c06619..af0c355f2f04 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -4,8 +4,6 @@
#include "opts.h"
-#include <linux/math64.h>
-
extern const char * const bch2_inode_opts[];
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index dbecb4072af0..2e84af8a044c 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -988,27 +988,57 @@ void bch2_fs_journal_stop(struct journal *j)
cancel_delayed_work_sync(&j->reclaim_work);
}
-void bch2_fs_journal_start(struct journal *j)
+int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
+ struct list_head *journal_entries)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- struct journal_seq_blacklist *bl;
- u64 blacklist = 0;
+ struct journal_entry_pin_list *p;
+ struct journal_replay *i;
+ u64 last_seq = cur_seq, nr, seq;
+
+ if (!list_empty(journal_entries))
+ last_seq = le64_to_cpu(list_last_entry(journal_entries,
+ struct journal_replay,
+ list)->j.last_seq);
+
+ nr = cur_seq - last_seq;
+
+ if (nr + 1 > j->pin.size) {
+ free_fifo(&j->pin);
+ init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
+ if (!j->pin.data) {
+ bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
+ return -ENOMEM;
+ }
+ }
+
+ j->last_seq_ondisk = last_seq;
+ j->pin.front = last_seq;
+ j->pin.back = cur_seq;
+ atomic64_set(&j->seq, cur_seq - 1);
+
+ fifo_for_each_entry_ptr(p, &j->pin, seq) {
+ INIT_LIST_HEAD(&p->list);
+ INIT_LIST_HEAD(&p->flushed);
+ atomic_set(&p->count, 0);
+ p->devs.nr = 0;
+ }
+
+ list_for_each_entry(i, journal_entries, list) {
+ seq = le64_to_cpu(i->j.seq);
+
+ BUG_ON(seq < last_seq || seq >= cur_seq);
- list_for_each_entry(bl, &j->seq_blacklist, list)
- blacklist = max(blacklist, bl->end);
+ p = journal_seq_pin(j, seq);
+
+ atomic_set(&p->count, 1);
+ p->devs = i->devs;
+ }
spin_lock(&j->lock);
set_bit(JOURNAL_STARTED, &j->flags);
- while (journal_cur_seq(j) < blacklist)
- journal_pin_new_entry(j, 0);
-
- /*
- * __journal_entry_close() only inits the next journal entry when it
- * closes an open journal entry - the very first journal entry gets
- * initialized here:
- */
journal_pin_new_entry(j, 1);
bch2_journal_buf_init(j);
@@ -1017,12 +1047,7 @@ void bch2_fs_journal_start(struct journal *j)
bch2_journal_space_available(j);
spin_unlock(&j->lock);
- /*
- * Adding entries to the next journal entry before allocating space on
- * disk for the next journal entry - this is ok, because these entries
- * only have to go down with the next journal entry we write:
- */
- bch2_journal_seq_blacklist_write(j);
+ return 0;
}
/* init/exit: */
@@ -1090,8 +1115,6 @@ int bch2_fs_journal_init(struct journal *j)
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
init_waitqueue_head(&j->pin_flush_wait);
- mutex_init(&j->blacklist_lock);
- INIT_LIST_HEAD(&j->seq_blacklist);
mutex_init(&j->reclaim_lock);
mutex_init(&j->discard_lock);
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 809cf25f5a03..3447b4ad462d 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -472,8 +472,10 @@ int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
int bch2_dev_journal_alloc(struct bch_dev *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
+
void bch2_fs_journal_stop(struct journal *);
-void bch2_fs_journal_start(struct journal *);
+int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
+
void bch2_dev_journal_exit(struct bch_dev *);
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
void bch2_fs_journal_exit(struct journal *);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 1293bb66e62c..8010b38114ac 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -10,7 +10,6 @@
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
-#include "journal_seq_blacklist.h"
#include "replicas.h"
#include "trace.h"
@@ -655,45 +654,11 @@ void bch2_journal_entries_free(struct list_head *list)
}
}
-int bch2_journal_set_seq(struct bch_fs *c, u64 last_seq, u64 end_seq)
-{
- struct journal *j = &c->journal;
- struct journal_entry_pin_list *p;
- u64 seq, nr = end_seq - last_seq + 1;
-
- if (nr > j->pin.size) {
- free_fifo(&j->pin);
- init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
- if (!j->pin.data) {
- bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
- return -ENOMEM;
- }
- }
-
- atomic64_set(&j->seq, end_seq);
- j->last_seq_ondisk = last_seq;
-
- j->pin.front = last_seq;
- j->pin.back = end_seq + 1;
-
- fifo_for_each_entry_ptr(p, &j->pin, seq) {
- INIT_LIST_HEAD(&p->list);
- INIT_LIST_HEAD(&p->flushed);
- atomic_set(&p->count, 0);
- p->devs.nr = 0;
- }
-
- return 0;
-}
-
int bch2_journal_read(struct bch_fs *c, struct list_head *list)
{
- struct journal *j = &c->journal;
struct journal_list jlist;
struct journal_replay *i;
- struct journal_entry_pin_list *p;
struct bch_dev *ca;
- u64 cur_seq, end_seq;
unsigned iter;
size_t keys = 0, entries = 0;
bool degraded = false;
@@ -725,17 +690,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
if (jlist.ret)
return jlist.ret;
- if (list_empty(list)){
- bch_err(c, "no journal entries found");
- return BCH_FSCK_REPAIR_IMPOSSIBLE;
- }
-
list_for_each_entry(i, list, list) {
+ struct jset_entry *entry;
+ struct bkey_i *k, *_n;
struct bch_replicas_padded replicas;
char buf[80];
- bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
-
ret = jset_validate_entries(c, &i->j, READ);
if (ret)
goto fsck_err;
@@ -745,6 +705,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
* the devices - this is wrong:
*/
+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
+
if (!degraded &&
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
@@ -755,68 +717,18 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
if (ret)
return ret;
}
- }
-
- i = list_last_entry(list, struct journal_replay, list);
-
- ret = bch2_journal_set_seq(c,
- le64_to_cpu(i->j.last_seq),
- le64_to_cpu(i->j.seq));
- if (ret)
- return ret;
-
- mutex_lock(&j->blacklist_lock);
-
- list_for_each_entry(i, list, list) {
- p = journal_seq_pin(j, le64_to_cpu(i->j.seq));
-
- atomic_set(&p->count, 1);
- p->devs = i->devs;
-
- if (bch2_journal_seq_blacklist_read(j, i)) {
- mutex_unlock(&j->blacklist_lock);
- return -ENOMEM;
- }
- }
-
- mutex_unlock(&j->blacklist_lock);
-
- cur_seq = journal_last_seq(j);
- end_seq = le64_to_cpu(list_last_entry(list,
- struct journal_replay, list)->j.seq);
-
- list_for_each_entry(i, list, list) {
- struct jset_entry *entry;
- struct bkey_i *k, *_n;
- bool blacklisted;
-
- mutex_lock(&j->blacklist_lock);
- while (cur_seq < le64_to_cpu(i->j.seq) &&
- bch2_journal_seq_blacklist_find(j, cur_seq))
- cur_seq++;
-
- blacklisted = bch2_journal_seq_blacklist_find(j,
- le64_to_cpu(i->j.seq));
- mutex_unlock(&j->blacklist_lock);
-
- fsck_err_on(blacklisted, c,
- "found blacklisted journal entry %llu",
- le64_to_cpu(i->j.seq));
-
- fsck_err_on(le64_to_cpu(i->j.seq) != cur_seq, c,
- "journal entries %llu-%llu missing! (replaying %llu-%llu)",
- cur_seq, le64_to_cpu(i->j.seq) - 1,
- journal_last_seq(j), end_seq);
-
- cur_seq = le64_to_cpu(i->j.seq) + 1;
for_each_jset_key(k, _n, entry, &i->j)
keys++;
entries++;
}
- bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
- keys, entries, journal_cur_seq(j));
+ if (!list_empty(list)) {
+ i = list_last_entry(list, struct journal_replay, list);
+
+ bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
+ keys, entries, le64_to_cpu(i->j.seq));
+ }
fsck_err:
return ret;
}
diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h
index a79c396903f0..4bb174839956 100644
--- a/fs/bcachefs/journal_io.h
+++ b/fs/bcachefs/journal_io.h
@@ -35,7 +35,6 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \
vstruct_for_each_safe(entry, k, _n)
-int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
int bch2_journal_read(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *);
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index 45c8d38d12de..0df8dfccd5b5 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -1,13 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
-#include "btree_update.h"
-#include "btree_update_interior.h"
-#include "error.h"
-#include "journal.h"
-#include "journal_io.h"
-#include "journal_reclaim.h"
+#include "btree_iter.h"
+#include "eytzinger.h"
#include "journal_seq_blacklist.h"
+#include "super-io.h"
/*
* journal_seq_blacklist machinery:
@@ -37,327 +34,285 @@
* record that it was blacklisted so that a) on recovery we don't think we have
* missing journal entries and b) so that the btree code continues to ignore
* that bset, until that btree node is rewritten.
- *
- * Blacklisted journal sequence numbers are themselves recorded as entries in
- * the journal.
*/
-/*
- * Called when journal needs to evict a blacklist entry to reclaim space: find
- * any btree nodes that refer to the blacklist journal sequence numbers, and
- * rewrite them:
- */
-static void journal_seq_blacklist_flush(struct journal *j,
- struct journal_entry_pin *pin, u64 seq)
+static unsigned
+blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
{
- struct bch_fs *c =
- container_of(j, struct bch_fs, journal);
- struct journal_seq_blacklist *bl =
- container_of(pin, struct journal_seq_blacklist, pin);
- struct blacklisted_node n;
- struct closure cl;
- unsigned i;
- int ret;
+ return bl
+ ? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
+ sizeof(struct journal_seq_blacklist_entry))
+ : 0;
+}
- closure_init_stack(&cl);
+static unsigned sb_blacklist_u64s(unsigned nr)
+{
+ struct bch_sb_field_journal_seq_blacklist *bl;
- for (i = 0;; i++) {
- struct btree_trans trans;
- struct btree_iter *iter;
- struct btree *b;
+ return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
+}
- bch2_trans_init(&trans, c);
+static struct bch_sb_field_journal_seq_blacklist *
+blacklist_entry_try_merge(struct bch_fs *c,
+ struct bch_sb_field_journal_seq_blacklist *bl,
+ unsigned i)
+{
+ unsigned nr = blacklist_nr_entries(bl);
+
+ if (le64_to_cpu(bl->start[i].end) >=
+ le64_to_cpu(bl->start[i + 1].start)) {
+ bl->start[i].end = bl->start[i + 1].end;
+ --nr;
+ memmove(&bl->start[i],
+ &bl->start[i + 1],
+ sizeof(bl->start[0]) * (nr - i));
+
+ bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
+ sb_blacklist_u64s(nr));
+ BUG_ON(!bl);
+ }
- mutex_lock(&j->blacklist_lock);
- if (i >= bl->nr_entries) {
- mutex_unlock(&j->blacklist_lock);
- break;
- }
- n = bl->entries[i];
- mutex_unlock(&j->blacklist_lock);
+ return bl;
+}
- iter = bch2_trans_get_node_iter(&trans, n.btree_id, n.pos,
- 0, 0, 0);
+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
+{
+ struct bch_sb_field_journal_seq_blacklist *bl;
+ unsigned i, nr;
+ int ret = 0;
- b = bch2_btree_iter_peek_node(iter);
+ mutex_lock(&c->sb_lock);
+ bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
+ nr = blacklist_nr_entries(bl);
- /* The node might have already been rewritten: */
+ if (bl) {
+ for (i = 0; i < nr; i++) {
+ struct journal_seq_blacklist_entry *e =
+ bl->start + i;
- if (b->data->keys.seq == n.seq) {
- ret = bch2_btree_node_rewrite(c, iter, n.seq, 0);
- if (ret) {
- bch2_trans_exit(&trans);
- bch2_fs_fatal_error(c,
- "error %i rewriting btree node with blacklisted journal seq",
- ret);
- bch2_journal_halt(j);
- return;
+ if (start == le64_to_cpu(e->start) &&
+ end == le64_to_cpu(e->end))
+ goto out;
+
+ if (start <= le64_to_cpu(e->start) &&
+ end >= le64_to_cpu(e->end)) {
+ e->start = cpu_to_le64(start);
+ e->end = cpu_to_le64(end);
+
+ if (i + 1 < nr)
+ bl = blacklist_entry_try_merge(c,
+ bl, i);
+ if (i)
+ bl = blacklist_entry_try_merge(c,
+ bl, i - 1);
+ goto out_write_sb;
}
}
-
- bch2_trans_exit(&trans);
}
- for (i = 0;; i++) {
- struct btree_update *as;
- struct pending_btree_node_free *d;
-
- mutex_lock(&j->blacklist_lock);
- if (i >= bl->nr_entries) {
- mutex_unlock(&j->blacklist_lock);
- break;
- }
- n = bl->entries[i];
- mutex_unlock(&j->blacklist_lock);
-redo_wait:
- mutex_lock(&c->btree_interior_update_lock);
-
- /*
- * Is the node on the list of pending interior node updates -
- * being freed? If so, wait for that to finish:
- */
- for_each_pending_btree_node_free(c, as, d)
- if (n.seq == d->seq &&
- n.btree_id == d->btree_id &&
- !d->level &&
- !bkey_cmp(n.pos, d->key.k.p)) {
- closure_wait(&as->wait, &cl);
- mutex_unlock(&c->btree_interior_update_lock);
- closure_sync(&cl);
- goto redo_wait;
- }
-
- mutex_unlock(&c->btree_interior_update_lock);
+ bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
+ sb_blacklist_u64s(nr + 1));
+ if (!bl) {
+ ret = -ENOMEM;
+ goto out;
}
- mutex_lock(&j->blacklist_lock);
+ bl->start[nr].start = cpu_to_le64(start);
+ bl->start[nr].end = cpu_to_le64(end);
+out_write_sb:
+ c->disk_sb.sb->features[0] |=
+ 1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3;
- bch2_journal_pin_drop(j, &bl->pin);
- list_del(&bl->list);
- kfree(bl->entries);
- kfree(bl);
+ ret = bch2_write_super(c);
+out:
+ mutex_unlock(&c->sb_lock);
- mutex_unlock(&j->blacklist_lock);
+ return ret;
}
-/*
- * Determine if a particular sequence number is blacklisted - if so, return
- * blacklist entry:
- */
-struct journal_seq_blacklist *
-bch2_journal_seq_blacklist_find(struct journal *j, u64 seq)
+static int journal_seq_blacklist_table_cmp(const void *_l,
+ const void *_r, size_t size)
{
- struct journal_seq_blacklist *bl;
+ const struct journal_seq_blacklist_table_entry *l = _l;
+ const struct journal_seq_blacklist_table_entry *r = _r;
- lockdep_assert_held(&j->blacklist_lock);
-
- list_for_each_entry(bl, &j->seq_blacklist, list)
- if (seq >= bl->start && seq <= bl->end)
- return bl;
-
- return NULL;
+ return (l->start > r->start) - (l->start < r->start);
}
-/*
- * Allocate a new, in memory blacklist entry:
- */
-static struct journal_seq_blacklist *
-bch2_journal_seq_blacklisted_new(struct journal *j, u64 start, u64 end)
+bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
+ bool dirty)
{
- struct journal_seq_blacklist *bl;
+ struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
+ struct journal_seq_blacklist_table_entry search = { .start = seq };
+ int idx;
- lockdep_assert_held(&j->blacklist_lock);
+ if (!t)
+ return false;
- /*
- * When we start the journal, bch2_journal_start() will skip over @seq:
- */
+ idx = eytzinger0_find_le(t->entries, t->nr,
+ sizeof(t->entries[0]),
+ journal_seq_blacklist_table_cmp,
+ &search);
+ if (idx < 0)
+ return false;
- bl = kzalloc(sizeof(*bl), GFP_KERNEL);
- if (!bl)
- return NULL;
+ BUG_ON(t->entries[idx].start > seq);
- bl->start = start;
- bl->end = end;
+ if (seq >= t->entries[idx].end)
+ return false;
- list_add_tail(&bl->list, &j->seq_blacklist);
- return bl;
+ if (dirty)
+ t->entries[idx].dirty = true;
+ return true;
}
-/*
- * Returns true if @seq is newer than the most recent journal entry that got
- * written, and data corresponding to @seq should be ignored - also marks @seq
- * as blacklisted so that on future restarts the corresponding data will still
- * be ignored:
- */
-int bch2_journal_seq_should_ignore(struct bch_fs *c, u64 seq, struct btree *b)
+int bch2_blacklist_table_initialize(struct bch_fs *c)
{
- struct journal *j = &c->journal;
- struct journal_seq_blacklist *bl = NULL;
- struct blacklisted_node *n;
- u64 journal_seq;
- int ret = 0;
-
- if (!seq)
- return 0;
+ struct bch_sb_field_journal_seq_blacklist *bl =
+ bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
+ struct journal_seq_blacklist_table *t;
+ unsigned i, nr = blacklist_nr_entries(bl);
- spin_lock(&j->lock);
- journal_seq = journal_cur_seq(j);
- spin_unlock(&j->lock);
+ BUG_ON(c->journal_seq_blacklist_table);
- /* Interier updates aren't journalled: */
- BUG_ON(b->level);
- BUG_ON(seq > journal_seq && test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags));
+ if (!bl)
+ return 0;
- /*
- * Decrease this back to j->seq + 2 when we next rev the on disk format:
- * increasing it temporarily to work around bug in old kernels
- */
- fsck_err_on(seq > journal_seq + 4, c,
- "bset journal seq too far in the future: %llu > %llu",
- seq, journal_seq);
+ t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
+ GFP_KERNEL);
+ if (!t)
+ return -ENOMEM;
- if (seq <= journal_seq &&
- list_empty_careful(&j->seq_blacklist))
- return 0;
+ t->nr = nr;
- mutex_lock(&j->blacklist_lock);
-
- if (seq <= journal_seq) {
- bl = bch2_journal_seq_blacklist_find(j, seq);
- if (!bl)
- goto out;
- } else {
- bch_verbose(c, "btree node %u:%llu:%llu has future journal sequence number %llu, blacklisting",
- b->btree_id, b->key.k.p.inode, b->key.k.p.offset, seq);
-
- if (!j->new_blacklist) {
- j->new_blacklist = bch2_journal_seq_blacklisted_new(j,
- journal_seq + 1,
- journal_seq + 1);
- if (!j->new_blacklist) {
- ret = -ENOMEM;
- goto out;
- }
- }
- bl = j->new_blacklist;
- bl->end = max(bl->end, seq);
+ for (i = 0; i < nr; i++) {
+ t->entries[i].start = le64_to_cpu(bl->start[i].start);
+ t->entries[i].end = le64_to_cpu(bl->start[i].end);
}
- for (n = bl->entries; n < bl->entries + bl->nr_entries; n++)
- if (b->data->keys.seq == n->seq &&
- b->btree_id == n->btree_id &&
- !bkey_cmp(b->key.k.p, n->pos))
- goto found_entry;
-
- if (!bl->nr_entries ||
- is_power_of_2(bl->nr_entries)) {
- n = krealloc(bl->entries,
- max_t(size_t, bl->nr_entries * 2, 8) * sizeof(*n),
- GFP_KERNEL);
- if (!n) {
- ret = -ENOMEM;
- goto out;
- }
- bl->entries = n;
- }
+ eytzinger0_sort(t->entries,
+ t->nr,
+ sizeof(t->entries[0]),
+ journal_seq_blacklist_table_cmp,
+ NULL);
- bl->entries[bl->nr_entries++] = (struct blacklisted_node) {
- .seq = b->data->keys.seq,
- .btree_id = b->btree_id,
- .pos = b->key.k.p,
- };
-found_entry:
- ret = 1;
-out:
-fsck_err:
- mutex_unlock(&j->blacklist_lock);
- return ret;
+ c->journal_seq_blacklist_table = t;
+ return 0;
}
-static int __bch2_journal_seq_blacklist_read(struct journal *j,
- struct journal_replay *i,
- u64 start, u64 end)
+static const char *
+bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
+ struct bch_sb_field *f)
{
- struct bch_fs *c = container_of(j, struct bch_fs, journal);
- struct journal_seq_blacklist *bl;
-
- bch_verbose(c, "blacklisting existing journal seq %llu-%llu",
- start, end);
+ struct bch_sb_field_journal_seq_blacklist *bl =
+ field_to_type(f, journal_seq_blacklist);
+ struct journal_seq_blacklist_entry *i;
+ unsigned nr = blacklist_nr_entries(bl);
+
+ for (i = bl->start; i < bl->start + nr; i++) {
+ if (le64_to_cpu(i->start) >=
+ le64_to_cpu(i->end))
+ return "entry start >= end";
+
+ if (i + 1 < bl->start + nr &&
+ le64_to_cpu(i[0].end) >
+ le64_to_cpu(i[1].start))
+ return "entries out of order";
+ }
- bl = bch2_journal_seq_blacklisted_new(j, start, end);
- if (!bl)
- return -ENOMEM;
+ return NULL;
+}
- bch2_journal_pin_add(j, le64_to_cpu(i->j.seq), &bl->pin,
- journal_seq_blacklist_flush);
- return 0;
+static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
+ struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_journal_seq_blacklist *bl =
+ field_to_type(f, journal_seq_blacklist);
+ struct journal_seq_blacklist_entry *i;
+ unsigned nr = blacklist_nr_entries(bl);
+
+ for (i = bl->start; i < bl->start + nr; i++) {
+ if (i != bl->start)
+ pr_buf(out, " ");
+
+ pr_buf(out, "%llu-%llu",
+ le64_to_cpu(i->start),
+ le64_to_cpu(i->end));
+ }
}
-/*
- * After reading the journal, find existing journal seq blacklist entries and
- * read them into memory:
- */
-int bch2_journal_seq_blacklist_read(struct journal *j,
- struct journal_replay *i)
+const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
+ .validate = bch2_sb_journal_seq_blacklist_validate,
+ .to_text = bch2_sb_journal_seq_blacklist_to_text
+};
+
+void bch2_blacklist_entries_gc(struct work_struct *work)
{
- struct jset_entry *entry;
- int ret = 0;
+ struct bch_fs *c = container_of(work, struct bch_fs,
+ journal_seq_blacklist_gc_work);
+ struct journal_seq_blacklist_table *t;
+ struct bch_sb_field_journal_seq_blacklist *bl;
+ struct journal_seq_blacklist_entry *src, *dst;
+ struct btree_trans trans;
+ unsigned i, nr, new_nr;
+ int ret;
- vstruct_for_each(&i->j, entry) {
- switch (entry->type) {
- case BCH_JSET_ENTRY_blacklist: {
- struct jset_entry_blacklist *bl_entry =
- container_of(entry, struct jset_entry_blacklist, entry);
+ bch2_trans_init(&trans, c);
- ret = __bch2_journal_seq_blacklist_read(j, i,
- le64_to_cpu(bl_entry->seq),
- le64_to_cpu(bl_entry->seq));
- break;
- }
- case BCH_JSET_ENTRY_blacklist_v2: {
- struct jset_entry_blacklist_v2 *bl_entry =
- container_of(entry, struct jset_entry_blacklist_v2, entry);
-
- ret = __bch2_journal_seq_blacklist_read(j, i,
- le64_to_cpu(bl_entry->start),
- le64_to_cpu(bl_entry->end));
- break;
- }
- }
+ for (i = 0; i < BTREE_ID_NR; i++) {
+ struct btree_iter *iter;
+ struct btree *b;
- if (ret)
- break;
+ for_each_btree_node(&trans, iter, i, POS_MIN,
+ BTREE_ITER_PREFETCH, b)
+ if (test_bit(BCH_FS_STOPPING, &c->flags)) {
+ bch2_trans_exit(&trans);
+ return;
+ }
+ bch2_trans_iter_free(&trans, iter);
}
- return ret;
-}
-
-/*
- * After reading the journal and walking the btree, we might have new journal
- * sequence numbers to blacklist - add entries to the next journal entry to be
- * written:
- */
-void bch2_journal_seq_blacklist_write(struct journal *j)
-{
- struct journal_seq_blacklist *bl = j->new_blacklist;
- struct jset_entry_blacklist_v2 *bl_entry;
- struct jset_entry *entry;
+ ret = bch2_trans_exit(&trans);
+ if (ret)
+ return;
+ mutex_lock(&c->sb_lock);
+ bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
if (!bl)
- return;
+ goto out;
- entry = bch2_journal_add_entry_noreservation(journal_cur_buf(j),
- (sizeof(*bl_entry) - sizeof(*entry)) / sizeof(u64));
+ nr = blacklist_nr_entries(bl);
+ dst = bl->start;
- bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
- bl_entry->entry.type = BCH_JSET_ENTRY_blacklist_v2;
- bl_entry->start = cpu_to_le64(bl->start);
- bl_entry->end = cpu_to_le64(bl->end);
+ t = c->journal_seq_blacklist_table;
+ BUG_ON(nr != t->nr);
+
+ for (src = bl->start, i = eytzinger0_first(t->nr);
+ src < bl->start + nr;
+ src++, i = eytzinger0_next(i, nr)) {
+ BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
+ BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
+
+ if (t->entries[i].dirty)
+ *dst++ = *src;
+ }
- bch2_journal_pin_add(j,
- journal_cur_seq(j),
- &bl->pin,
- journal_seq_blacklist_flush);
+ new_nr = dst - bl->start;
- j->new_blacklist = NULL;
+ bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
+
+ if (new_nr != nr) {
+ bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
+ new_nr ? sb_blacklist_u64s(new_nr) : 0);
+ BUG_ON(new_nr && !bl);
+
+ if (!new_nr)
+ c->disk_sb.sb->features[0] &=
+ ~(1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3);
+
+ bch2_write_super(c);
+ }
+out:
+ mutex_unlock(&c->sb_lock);
}
diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h
index b4a3b270e9d2..03f4b97247fd 100644
--- a/fs/bcachefs/journal_seq_blacklist.h
+++ b/fs/bcachefs/journal_seq_blacklist.h
@@ -2,13 +2,12 @@
#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
-struct journal_replay;
-
-struct journal_seq_blacklist *
-bch2_journal_seq_blacklist_find(struct journal *, u64);
-int bch2_journal_seq_should_ignore(struct bch_fs *, u64, struct btree *);
-int bch2_journal_seq_blacklist_read(struct journal *,
- struct journal_replay *);
-void bch2_journal_seq_blacklist_write(struct journal *);
+bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
+int bch2_blacklist_table_initialize(struct bch_fs *);
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
+
+void bch2_blacklist_entries_gc(struct work_struct *);
#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 85bf5e2706f7..7349b50bc5e7 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -54,24 +54,6 @@ struct journal_entry_pin {
u64 seq;
};
-/* corresponds to a btree node with a blacklisted bset: */
-struct blacklisted_node {
- __le64 seq;
- enum btree_id btree_id;
- struct bpos pos;
-};
-
-struct journal_seq_blacklist {
- struct list_head list;
- u64 start;
- u64 end;
-
- struct journal_entry_pin pin;
-
- struct blacklisted_node *entries;
- size_t nr_entries;
-};
-
struct journal_res {
bool ref;
u8 idx;
@@ -222,10 +204,6 @@ struct journal {
u64 replay_journal_seq;
- struct mutex blacklist_lock;
- struct list_head seq_blacklist;
- struct journal_seq_blacklist *new_blacklist;
-
struct write_point wp;
spinlock_t err_lock;
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 67b4dda9cfeb..9411a1f550f3 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -12,6 +12,7 @@
#include "error.h"
#include "fsck.h"
#include "journal_io.h"
+#include "journal_seq_blacklist.h"
#include "quota.h"
#include "recovery.h"
#include "replicas.h"
@@ -99,18 +100,49 @@ fsck_err:
return ret;
}
+static int
+verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
+ struct list_head *journal)
+{
+ struct journal_replay *i =
+ list_last_entry(journal, struct journal_replay, list);
+ u64 start_seq = le64_to_cpu(i->j.last_seq);
+ u64 end_seq = le64_to_cpu(i->j.seq);
+ u64 seq = start_seq;
+ int ret = 0;
+
+ list_for_each_entry(i, journal, list) {
+ fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
+ "journal entries %llu-%llu missing! (replaying %llu-%llu)",
+ seq, le64_to_cpu(i->j.seq) - 1,
+ start_seq, end_seq);
+
+ seq = le64_to_cpu(i->j.seq);
+
+ fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
+ "found blacklisted journal entry %llu", seq);
+
+ do {
+ seq++;
+ } while (bch2_journal_seq_is_blacklisted(c, seq, false));
+ }
+fsck_err:
+ return ret;
+}
+
static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
{
struct bch_sb_field_clean *clean, *sb_clean;
-
- if (!c->sb.clean)
- return NULL;
+ int ret;
mutex_lock(&c->sb_lock);
sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
- if (!sb_clean) {
+
+ if (fsck_err_on(!sb_clean, c,
+ "superblock marked clean but clean section not present")) {
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+ c->sb.clean = false;
mutex_unlock(&c->sb_lock);
- bch_err(c, "superblock marked clean but clean section not present");
return NULL;
}
@@ -128,6 +160,9 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
mutex_unlock(&c->sb_lock);
return clean;
+fsck_err:
+ mutex_unlock(&c->sb_lock);
+ return ERR_PTR(ret);
}
static int journal_replay_entry_early(struct bch_fs *c,
@@ -179,14 +214,32 @@ static int journal_replay_entry_early(struct bch_fs *c,
le64_to_cpu(u->v));
break;
}
+ case BCH_JSET_ENTRY_blacklist: {
+ struct jset_entry_blacklist *bl_entry =
+ container_of(entry, struct jset_entry_blacklist, entry);
+
+ ret = bch2_journal_seq_blacklist_add(c,
+ le64_to_cpu(bl_entry->seq),
+ le64_to_cpu(bl_entry->seq) + 1);
+ break;
+ }
+ case BCH_JSET_ENTRY_blacklist_v2: {
+ struct jset_entry_blacklist_v2 *bl_entry =
+ container_of(entry, struct jset_entry_blacklist_v2, entry);
+
+ ret = bch2_journal_seq_blacklist_add(c,
+ le64_to_cpu(bl_entry->start),
+ le64_to_cpu(bl_entry->end) + 1);
+ break;
+ }
}
return ret;
}
-static int load_journal_metadata(struct bch_fs *c,
- struct bch_sb_field_clean *clean,
- struct list_head *journal)
+static int journal_replay_early(struct bch_fs *c,
+ struct bch_sb_field_clean *clean,
+ struct list_head *journal)
{
struct jset_entry *entry;
int ret;
@@ -300,37 +353,76 @@ static bool journal_empty(struct list_head *journal)
int bch2_fs_recovery(struct bch_fs *c)
{
const char *err = "cannot allocate memory";
- struct bch_sb_field_clean *clean;
+ struct bch_sb_field_clean *clean = NULL;
+ u64 journal_seq;
LIST_HEAD(journal);
int ret;
- clean = read_superblock_clean(c);
- if (clean)
+ if (c->sb.clean)
+ clean = read_superblock_clean(c);
+ ret = PTR_ERR_OR_ZERO(clean);
+ if (ret)
+ goto err;
+
+ if (c->sb.clean)
bch_info(c, "recovering from clean shutdown, journal seq %llu",
le64_to_cpu(clean->journal_seq));
- if (!clean || c->opts.fsck) {
+ if (!c->replicas.entries) {
+ bch_info(c, "building replicas info");
+ set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
+ }
+
+ if (!c->sb.clean || c->opts.fsck) {
+ struct jset *j;
+
ret = bch2_journal_read(c, &journal);
if (ret)
goto err;
- ret = verify_superblock_clean(c, &clean,
- &list_last_entry(&journal, struct journal_replay,
- list)->j);
+ fsck_err_on(c->sb.clean && !journal_empty(&journal), c,
+ "filesystem marked clean but journal not empty");
+
+ if (!c->sb.clean && list_empty(&journal)){
+ bch_err(c, "no journal entries found");
+ ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
+ goto err;
+ }
+
+ j = &list_last_entry(&journal, struct journal_replay, list)->j;
+
+ ret = verify_superblock_clean(c, &clean, j);
if (ret)
goto err;
+
+ journal_seq = le64_to_cpu(j->seq) + 1;
} else {
- ret = bch2_journal_set_seq(c,
- le64_to_cpu(clean->journal_seq),
- le64_to_cpu(clean->journal_seq));
- if (ret)
+ journal_seq = le64_to_cpu(clean->journal_seq) + 1;
+ }
+
+ ret = journal_replay_early(c, clean, &journal);
+ if (ret)
+ goto err;
+
+ if (!c->sb.clean) {
+ ret = bch2_journal_seq_blacklist_add(c,
+ journal_seq,
+ journal_seq + 4);
+ if (ret) {
+ bch_err(c, "error creating new journal seq blacklist entry");
goto err;
+ }
+
+ journal_seq += 4;
}
- fsck_err_on(clean && !journal_empty(&journal), c,
- "filesystem marked clean but journal not empty");
+ ret = bch2_blacklist_table_initialize(c);
+
+ ret = verify_journal_entries_not_blacklisted_or_missing(c, &journal);
+ if (ret)
+ goto err;
- ret = load_journal_metadata(c, clean, &journal);
+ ret = bch2_fs_journal_start(&c->journal, journal_seq, &journal);
if (ret)
goto err;
@@ -351,11 +443,6 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
- if (!c->replicas.entries) {
- bch_info(c, "building replicas info");
- set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
- }
-
if (c->opts.fsck ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
@@ -377,13 +464,6 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->sb.encryption_type && !c->sb.clean)
atomic64_add(1 << 16, &c->key_version);
- /*
- * bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
- * will give spurious errors about oldest_gen > bucket_gen -
- * this is a hack but oh well.
- */
- bch2_fs_journal_start(&c->journal);
-
if (c->opts.noreplay)
goto out;
@@ -424,6 +504,10 @@ int bch2_fs_recovery(struct bch_fs *c)
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
}
mutex_unlock(&c->sb_lock);
+
+ if (c->journal_seq_blacklist_table &&
+ c->journal_seq_blacklist_table->nr > 128)
+ queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
out:
bch2_journal_entries_free(&journal);
kfree(clean);
@@ -472,7 +556,7 @@ int bch2_fs_initialize(struct bch_fs *c)
* journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer:
*/
- bch2_fs_journal_start(&c->journal);
+ bch2_fs_journal_start(&c->journal, 1, &journal);
bch2_journal_set_replay_done(&c->journal);
err = "error going read write";
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 9fd77e57cafe..7aaa8b785d57 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -7,6 +7,7 @@
#include "error.h"
#include "io.h"
#include "journal.h"
+#include "journal_seq_blacklist.h"
#include "replicas.h"
#include "quota.h"
#include "super-io.h"
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 8c31a9a67eee..27eacb1cd144 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -30,6 +30,7 @@
#include "io.h"
#include "journal.h"
#include "journal_reclaim.h"
+#include "journal_seq_blacklist.h"
#include "move.h"
#include "migrate.h"
#include "movinggc.h"
@@ -468,6 +469,7 @@ static void bch2_fs_free(struct bch_fs *c)
kfree(c->replicas.entries);
kfree(c->replicas_gc.entries);
kfree(rcu_dereference_protected(c->disk_groups, 1));
+ kfree(c->journal_seq_blacklist_table);
if (c->journal_reclaim_wq)
destroy_workqueue(c->journal_reclaim_wq);
@@ -496,6 +498,10 @@ void bch2_fs_stop(struct bch_fs *c)
bch_verbose(c, "shutting down");
+ set_bit(BCH_FS_STOPPING, &c->flags);
+
+ cancel_work_sync(&c->journal_seq_blacklist_gc_work);
+
for_each_member_device(ca, c, i)
if (ca->kobj.state_in_sysfs &&
ca->disk_sb.bdev)
@@ -631,6 +637,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
spin_lock_init(&c->btree_write_error_lock);
INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
+ INIT_WORK(&c->journal_seq_blacklist_gc_work,
+ bch2_blacklist_entries_gc);
+
INIT_LIST_HEAD(&c->fsck_errors);
mutex_init(&c->fsck_error_lock);