aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/alloc_background.c289
-rw-r--r--fs/bcachefs/alloc_background.h14
-rw-r--r--fs/bcachefs/alloc_foreground.c4
-rw-r--r--fs/bcachefs/bcachefs.h21
-rw-r--r--fs/bcachefs/bcachefs_format.h13
-rw-r--r--fs/bcachefs/bkey.c2
-rw-r--r--fs/bcachefs/bkey_methods.c6
-rw-r--r--fs/bcachefs/bkey_methods.h3
-rw-r--r--fs/bcachefs/btree_iter.c24
-rw-r--r--fs/bcachefs/btree_types.h16
-rw-r--r--fs/bcachefs/chardev.c9
-rw-r--r--fs/bcachefs/debug.c109
-rw-r--r--fs/bcachefs/errcode.h3
-rw-r--r--fs/bcachefs/error.c19
-rw-r--r--fs/bcachefs/error.h7
-rw-r--r--fs/bcachefs/fs-ioctl.c2
-rw-r--r--fs/bcachefs/fs.c21
-rw-r--r--fs/bcachefs/journal.c8
-rw-r--r--fs/bcachefs/journal_io.c20
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c2
-rw-r--r--fs/bcachefs/lru.h3
-rw-r--r--fs/bcachefs/opts.h2
-rw-r--r--fs/bcachefs/recovery.c12
-rw-r--r--fs/bcachefs/sb-downgrade.c2
-rw-r--r--fs/bcachefs/sb-errors.c14
-rw-r--r--fs/bcachefs/sb-errors_format.h559
-rw-r--r--fs/bcachefs/seqmutex.h11
-rw-r--r--fs/bcachefs/snapshot.c12
-rw-r--r--fs/bcachefs/str_hash.h2
-rw-r--r--fs/bcachefs/super-io.c7
-rw-r--r--fs/bcachefs/super.c19
-rw-r--r--fs/btrfs/bio.c4
-rw-r--r--fs/btrfs/block-group.c11
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/scrub.c24
-rw-r--r--fs/btrfs/tree-log.c43
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfsd/netlink.c2
-rw-r--r--fs/nfsd/netlink.h3
-rw-r--r--fs/nfsd/nfsctl.c50
-rw-r--r--fs/nfsd/nfssvc.c1
-rw-r--r--fs/ocfs2/aops.c5
-rw-r--r--fs/ocfs2/journal.c17
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/open.c4
-rw-r--r--fs/overlayfs/dir.c8
-rw-r--r--fs/overlayfs/export.c6
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/smb/client/cifsfs.c2
-rw-r--r--fs/smb/client/cifsglob.h3
-rw-r--r--fs/smb/client/cifssmb.c8
-rw-r--r--fs/smb/client/file.c27
-rw-r--r--fs/smb/client/smb2pdu.c19
-rw-r--r--fs/xfs/xfs_inode.c23
56 files changed, 865 insertions, 645 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index c4b6601f5b74..1de9fac3bcf4 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -29,7 +29,7 @@
#include <linux/sched/task.h>
#include <linux/sort.h>
-static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket);
+static void bch2_discard_one_bucket_fast(struct bch_dev *, u64);
/* Persistent alloc info: */
@@ -259,6 +259,14 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
"invalid data type (got %u should be %u)",
a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
+ for (unsigned i = 0; i < 2; i++)
+ bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX,
+ c, err,
+ alloc_key_io_time_bad,
+ "invalid io_time[%s]: %llu, max %llu",
+ i == READ ? "read" : "write",
+ a.v->io_time[i], LRU_TIME_MAX);
+
switch (a.v->data_type) {
case BCH_DATA_free:
case BCH_DATA_need_gc_gens:
@@ -757,8 +765,8 @@ int bch2_trigger_alloc(struct btree_trans *trans,
alloc_data_type_set(new_a, new_a->data_type);
if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) {
- new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
- new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
+ new_a->io_time[READ] = bch2_current_io_time(c, READ);
+ new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE);
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
}
@@ -768,6 +776,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
new_a->gen++;
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
+ alloc_data_type_set(new_a, new_a->data_type);
}
if (old_a->data_type != new_a->data_type ||
@@ -781,7 +790,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if (new_a->data_type == BCH_DATA_cached &&
!new_a->io_time[READ])
- new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+ new_a->io_time[READ] = bch2_current_io_time(c, READ);
u64 old_lru = alloc_lru_idx_read(*old_a);
u64 new_lru = alloc_lru_idx_read(*new_a);
@@ -882,14 +891,14 @@ int bch2_trigger_alloc(struct btree_trans *trans,
closure_wake_up(&c->freelist_wait);
if (statechange(a->data_type == BCH_DATA_need_discard) &&
- !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
+ !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
bucket_flushed(new_a))
- bch2_discard_one_bucket_fast(c, new.k->p);
+ bch2_discard_one_bucket_fast(ca, new.k->p.offset);
if (statechange(a->data_type == BCH_DATA_cached) &&
!bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
- bch2_do_invalidates(c);
+ bch2_dev_do_invalidates(ca);
if (statechange(a->data_type == BCH_DATA_need_gc_gens))
bch2_gc_gens_async(c);
@@ -1579,7 +1588,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
if (ret)
goto err;
- a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
+ a_mut->v.io_time[READ] = bch2_current_io_time(c, READ);
ret = bch2_trans_update(trans, alloc_iter,
&a_mut->k_i, BTREE_TRIGGER_norun);
if (ret)
@@ -1627,34 +1636,38 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
return ret;
}
-static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket)
+static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
{
int ret;
- mutex_lock(&c->discard_buckets_in_flight_lock);
- darray_for_each(c->discard_buckets_in_flight, i)
- if (bkey_eq(*i, bucket)) {
- ret = -EEXIST;
+ mutex_lock(&ca->discard_buckets_in_flight_lock);
+ darray_for_each(ca->discard_buckets_in_flight, i)
+ if (i->bucket == bucket) {
+ ret = -BCH_ERR_EEXIST_discard_in_flight_add;
goto out;
}
- ret = darray_push(&c->discard_buckets_in_flight, bucket);
+ ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
+ .in_progress = in_progress,
+ .bucket = bucket,
+ }));
out:
- mutex_unlock(&c->discard_buckets_in_flight_lock);
+ mutex_unlock(&ca->discard_buckets_in_flight_lock);
return ret;
}
-static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket)
+static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
{
- mutex_lock(&c->discard_buckets_in_flight_lock);
- darray_for_each(c->discard_buckets_in_flight, i)
- if (bkey_eq(*i, bucket)) {
- darray_remove_item(&c->discard_buckets_in_flight, i);
+ mutex_lock(&ca->discard_buckets_in_flight_lock);
+ darray_for_each(ca->discard_buckets_in_flight, i)
+ if (i->bucket == bucket) {
+ BUG_ON(!i->in_progress);
+ darray_remove_item(&ca->discard_buckets_in_flight, i);
goto found;
}
BUG();
found:
- mutex_unlock(&c->discard_buckets_in_flight_lock);
+ mutex_unlock(&ca->discard_buckets_in_flight_lock);
}
struct discard_buckets_state {
@@ -1662,26 +1675,11 @@ struct discard_buckets_state {
u64 open;
u64 need_journal_commit;
u64 discarded;
- struct bch_dev *ca;
u64 need_journal_commit_this_dev;
};
-static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca)
-{
- if (s->ca == ca)
- return;
-
- if (s->ca && s->need_journal_commit_this_dev >
- bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets)
- bch2_journal_flush_async(&c->journal, NULL);
-
- if (s->ca)
- percpu_ref_put(&s->ca->io_ref);
- s->ca = ca;
- s->need_journal_commit_this_dev = 0;
-}
-
static int bch2_discard_one_bucket(struct btree_trans *trans,
+ struct bch_dev *ca,
struct btree_iter *need_discard_iter,
struct bpos *discard_pos_done,
struct discard_buckets_state *s)
@@ -1695,16 +1693,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
bool discard_locked = false;
int ret = 0;
- struct bch_dev *ca = s->ca && s->ca->dev_idx == pos.inode
- ? s->ca
- : bch2_dev_get_ioref(c, pos.inode, WRITE);
- if (!ca) {
- bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
- return 0;
- }
-
- discard_buckets_next_dev(c, s, ca);
-
if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
s->open++;
goto out;
@@ -1764,7 +1752,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
goto out;
}
- if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true)))
+ if (discard_in_flight_add(ca, iter.pos.offset, true))
goto out;
discard_locked = true;
@@ -1788,8 +1776,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
}
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
- alloc_data_type_set(&a->v, a->v.data_type);
write:
+ alloc_data_type_set(&a->v, a->v.data_type);
+
ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL,
BCH_WATERMARK_btree|
@@ -1801,7 +1790,7 @@ write:
s->discarded++;
out:
if (discard_locked)
- discard_in_flight_remove(c, iter.pos);
+ discard_in_flight_remove(ca, iter.pos.offset);
s->seen++;
bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf);
@@ -1810,7 +1799,8 @@ out:
static void bch2_do_discards_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
+ struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
+ struct bch_fs *c = ca->fs;
struct discard_buckets_state s = {};
struct bpos discard_pos_done = POS_MAX;
int ret;
@@ -1821,23 +1811,41 @@ static void bch2_do_discards_work(struct work_struct *work)
* successful commit:
*/
ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter,
- BTREE_ID_need_discard, POS_MIN, 0, k,
- bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s)));
-
- discard_buckets_next_dev(c, &s, NULL);
+ for_each_btree_key_upto(trans, iter,
+ BTREE_ID_need_discard,
+ POS(ca->dev_idx, 0),
+ POS(ca->dev_idx, U64_MAX), 0, k,
+ bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s)));
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+ percpu_ref_put(&ca->io_ref);
+}
+
+void bch2_dev_do_discards(struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ return;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
+ goto put_ioref;
+
+ if (queue_work(c->write_ref_wq, &ca->discard_work))
+ return;
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+put_ioref:
+ percpu_ref_put(&ca->io_ref);
}
void bch2_do_discards(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
- !queue_work(c->write_ref_wq, &c->discard_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+ for_each_member_device(c, ca)
+ bch2_dev_do_discards(ca);
}
static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket)
@@ -1866,68 +1874,69 @@ err:
static void bch2_do_discards_fast_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work);
+ struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work);
+ struct bch_fs *c = ca->fs;
while (1) {
bool got_bucket = false;
- struct bpos bucket;
- struct bch_dev *ca;
-
- mutex_lock(&c->discard_buckets_in_flight_lock);
- darray_for_each(c->discard_buckets_in_flight, i) {
- if (i->snapshot)
- continue;
+ u64 bucket;
- ca = bch2_dev_get_ioref(c, i->inode, WRITE);
- if (!ca) {
- darray_remove_item(&c->discard_buckets_in_flight, i);
+ mutex_lock(&ca->discard_buckets_in_flight_lock);
+ darray_for_each(ca->discard_buckets_in_flight, i) {
+ if (i->in_progress)
continue;
- }
got_bucket = true;
- bucket = *i;
- i->snapshot = true;
+ bucket = i->bucket;
+ i->in_progress = true;
break;
}
- mutex_unlock(&c->discard_buckets_in_flight_lock);
+ mutex_unlock(&ca->discard_buckets_in_flight_lock);
if (!got_bucket)
break;
if (ca->mi.discard && !c->opts.nochanges)
blkdev_issue_discard(ca->disk_sb.bdev,
- bucket.offset * ca->mi.bucket_size,
+ bucket_to_sector(ca, bucket),
ca->mi.bucket_size,
GFP_KERNEL);
int ret = bch2_trans_do(c, NULL, NULL,
- BCH_WATERMARK_btree|
- BCH_TRANS_COMMIT_no_enospc,
- bch2_clear_bucket_needs_discard(trans, bucket));
+ BCH_WATERMARK_btree|
+ BCH_TRANS_COMMIT_no_enospc,
+ bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket)));
bch_err_fn(c, ret);
- percpu_ref_put(&ca->io_ref);
- discard_in_flight_remove(c, bucket);
+ discard_in_flight_remove(ca, bucket);
if (ret)
break;
}
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+ percpu_ref_put(&ca->io_ref);
}
-static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket)
+static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
{
- rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu(c, bucket.inode);
- bool dead = !ca || percpu_ref_is_dying(&ca->io_ref);
- rcu_read_unlock();
+ struct bch_fs *c = ca->fs;
+
+ if (discard_in_flight_add(ca, bucket, false))
+ return;
+
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ return;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
+ goto put_ioref;
+
+ if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
+ return;
- if (!dead &&
- !discard_in_flight_add(c, bucket) &&
- bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) &&
- !queue_work(c->write_ref_wq, &c->discard_fast_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+put_ioref:
+ percpu_ref_put(&ca->io_ref);
}
static int invalidate_one_bucket(struct btree_trans *trans,
@@ -1975,8 +1984,8 @@ static int invalidate_one_bucket(struct btree_trans *trans,
a->v.data_type = 0;
a->v.dirty_sectors = 0;
a->v.cached_sectors = 0;
- a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
- a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now);
+ a->v.io_time[READ] = bch2_current_io_time(c, READ);
+ a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE);
ret = bch2_trans_commit(trans, NULL, NULL,
BCH_WATERMARK_btree|
@@ -2011,9 +2020,25 @@ err:
goto out;
}
+static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter,
+ struct bch_dev *ca, bool *wrapped)
+{
+ struct bkey_s_c k;
+again:
+ k = bch2_btree_iter_peek_upto(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX));
+ if (!k.k && !*wrapped) {
+ bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0));
+ *wrapped = true;
+ goto again;
+ }
+
+ return k;
+}
+
static void bch2_do_invalidates_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
+ struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work);
+ struct bch_fs *c = ca->fs;
struct btree_trans *trans = bch2_trans_get(c);
int ret = 0;
@@ -2021,31 +2046,63 @@ static void bch2_do_invalidates_work(struct work_struct *work)
if (ret)
goto err;
- for_each_member_device(c, ca) {
- s64 nr_to_invalidate =
- should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
+ s64 nr_to_invalidate =
+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
+ struct btree_iter iter;
+ bool wrapped = false;
- ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
- lru_pos(ca->dev_idx, 0, 0),
- lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
- BTREE_ITER_intent, k,
- invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
+ lru_pos(ca->dev_idx, 0,
+ ((bch2_current_io_time(c, READ) + U32_MAX) &
+ LRU_TIME_MAX)), 0);
- if (ret < 0) {
- bch2_dev_put(ca);
+ while (true) {
+ bch2_trans_begin(trans);
+
+ struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
+ ret = bkey_err(k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
break;
- }
+ if (!k.k)
+ break;
+
+ ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
+ if (ret)
+ break;
+
+ bch2_btree_iter_advance(&iter);
}
+ bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+ percpu_ref_put(&ca->io_ref);
+}
+
+void bch2_dev_do_invalidates(struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ return;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
+ goto put_ioref;
+
+ if (queue_work(c->write_ref_wq, &ca->invalidate_work))
+ return;
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+put_ioref:
+ percpu_ref_put(&ca->io_ref);
}
void bch2_do_invalidates(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
- !queue_work(c->write_ref_wq, &c->invalidate_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+ for_each_member_device(c, ca)
+ bch2_dev_do_invalidates(ca);
}
int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
@@ -2204,7 +2261,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
if (ret)
return ret;
- now = atomic64_read(&c->io_clock[rw].now);
+ now = bch2_current_io_time(c, rw);
if (a->v.io_time[rw] == now)
goto out;
@@ -2361,16 +2418,20 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
set_bit(ca->dev_idx, c->rw_devs[i].d);
}
-void bch2_fs_allocator_background_exit(struct bch_fs *c)
+void bch2_dev_allocator_background_exit(struct bch_dev *ca)
+{
+ darray_exit(&ca->discard_buckets_in_flight);
+}
+
+void bch2_dev_allocator_background_init(struct bch_dev *ca)
{
- darray_exit(&c->discard_buckets_in_flight);
+ mutex_init(&ca->discard_buckets_in_flight_lock);
+ INIT_WORK(&ca->discard_work, bch2_do_discards_work);
+ INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work);
+ INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work);
}
void bch2_fs_allocator_background_init(struct bch_fs *c)
{
spin_lock_init(&c->freelist_lock);
- mutex_init(&c->discard_buckets_in_flight_lock);
- INIT_WORK(&c->discard_work, bch2_do_discards_work);
- INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work);
- INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work);
}
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index ae31a94be6f9..ba2c5557a3f0 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -141,7 +141,13 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
!bch2_bucket_sectors_fragmented(ca, a))
return 0;
- u64 d = bch2_bucket_sectors_dirty(a);
+ /*
+ * avoid overflowing LRU_TIME_BITS on a corrupted fs, when
+ * bucket_sectors_dirty is (much) bigger than bucket_size
+ */
+ u64 d = min(bch2_bucket_sectors_dirty(a),
+ ca->mi.bucket_size);
+
return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
}
@@ -269,6 +275,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
enum btree_iter_update_trigger_flags);
int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
+void bch2_dev_do_discards(struct bch_dev *);
void bch2_do_discards(struct bch_fs *);
static inline u64 should_invalidate_buckets(struct bch_dev *ca,
@@ -283,6 +290,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
}
+void bch2_dev_do_invalidates(struct bch_dev *);
void bch2_do_invalidates(struct bch_fs *);
static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
@@ -306,7 +314,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *);
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
-void bch2_fs_allocator_background_exit(struct bch_fs *);
+void bch2_dev_allocator_background_exit(struct bch_dev *);
+void bch2_dev_allocator_background_init(struct bch_dev *);
+
void bch2_fs_allocator_background_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 927a5f300b30..9d3d64746a5b 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -621,13 +621,13 @@ again:
avail = dev_buckets_free(ca, *usage, watermark);
if (usage->d[BCH_DATA_need_discard].buckets > avail)
- bch2_do_discards(c);
+ bch2_dev_do_discards(ca);
if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
bch2_gc_gens_async(c);
if (should_invalidate_buckets(ca, *usage))
- bch2_do_invalidates(c);
+ bch2_dev_do_invalidates(ca);
if (!avail) {
if (cl && !waiting) {
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 2992a644d822..1106fec6e155 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -493,6 +493,11 @@ struct io_count {
u64 sectors[2][BCH_DATA_NR];
};
+struct discard_in_flight {
+ bool in_progress:1;
+ u64 bucket:63;
+};
+
struct bch_dev {
struct kobject kobj;
#ifdef CONFIG_BCACHEFS_DEBUG
@@ -554,6 +559,12 @@ struct bch_dev {
size_t inc_gen_really_needs_gc;
size_t buckets_waiting_on_journal;
+ struct work_struct invalidate_work;
+ struct work_struct discard_work;
+ struct mutex discard_buckets_in_flight_lock;
+ DARRAY(struct discard_in_flight) discard_buckets_in_flight;
+ struct work_struct discard_fast_work;
+
atomic64_t rebalance_work;
struct journal_device journal;
@@ -915,11 +926,6 @@ struct bch_fs {
unsigned write_points_nr;
struct buckets_waiting_for_journal buckets_waiting_for_journal;
- struct work_struct invalidate_work;
- struct work_struct discard_work;
- struct mutex discard_buckets_in_flight_lock;
- DARRAY(struct bpos) discard_buckets_in_flight;
- struct work_struct discard_fast_work;
/* GARBAGE COLLECTION */
struct work_struct gc_gens_work;
@@ -1214,6 +1220,11 @@ static inline s64 bch2_current_time(const struct bch_fs *c)
return timespec_to_bch2_time(c, now);
}
+static inline u64 bch2_current_io_time(const struct bch_fs *c, int rw)
+{
+ return max(1ULL, (u64) atomic64_read(&c->io_clock[rw].now) & LRU_TIME_MAX);
+}
+
static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c)
{
struct stdio_redirect *stdio = c->stdio;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 90c12fe2a2cd..e3b1bde489c3 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -476,6 +476,9 @@ struct bch_lru {
#define LRU_ID_STRIPES (1U << 16)
+#define LRU_TIME_BITS 48
+#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
+
/* Optional/variable size superblock sections: */
struct bch_sb_field {
@@ -987,8 +990,9 @@ enum bch_version_upgrade_opts {
#define BCH_ERROR_ACTIONS() \
x(continue, 0) \
- x(ro, 1) \
- x(panic, 2)
+ x(fix_safe, 1) \
+ x(panic, 2) \
+ x(ro, 3)
enum bch_error_actions {
#define x(t, n) BCH_ON_ERROR_##t = n,
@@ -1382,9 +1386,10 @@ enum btree_id {
/*
* Maximum number of btrees that we will _ever_ have under the current scheme,
- * where we refer to them with bitfields
+ * where we refer to them with 64 bit bitfields - and we also need a bit for
+ * the interior btree node type:
*/
-#define BTREE_ID_NR_MAX 64
+#define BTREE_ID_NR_MAX 63
static inline bool btree_id_is_alloc(enum btree_id id)
{
diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
index f46978e5cb7c..94a1d1982fa8 100644
--- a/fs/bcachefs/bkey.c
+++ b/fs/bcachefs/bkey.c
@@ -1064,7 +1064,7 @@ void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
{
const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
u8 *l = k->key_start;
- u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
+ u8 *h = (u8 *) ((u64 *) k->_data + f->key_u64s) - 1;
while (l < h) {
swap(*l, *h);
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index c2c3dae52186..bd32aac05192 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -398,8 +398,12 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
for (i = 0; i < nr_compat; i++)
switch (!write ? i : nr_compat - 1 - i) {
case 0:
- if (big_endian != CPU_BIG_ENDIAN)
+ if (big_endian != CPU_BIG_ENDIAN) {
+ bch2_bkey_swab_key(f, k);
+ } else if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
bch2_bkey_swab_key(f, k);
+ bch2_bkey_swab_key(f, k);
+ }
break;
case 1:
if (version < bcachefs_metadata_version_bkey_renumber)
diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
index 726ef7483763..baef0722f5fb 100644
--- a/fs/bcachefs/bkey_methods.h
+++ b/fs/bcachefs/bkey_methods.h
@@ -129,7 +129,8 @@ static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
struct bkey_packed *k)
{
if (version < bcachefs_metadata_version_current ||
- big_endian != CPU_BIG_ENDIAN)
+ big_endian != CPU_BIG_ENDIAN ||
+ IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
__bch2_bkey_compat(level, btree_id, version,
big_endian, write, f, k);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 3694c600a3ad..0ed9e6574fcd 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -3130,7 +3130,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
memset(trans, 0, sizeof(*trans));
- closure_init_stack(&trans->ref);
seqmutex_lock(&c->btree_trans_lock);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
@@ -3150,15 +3149,10 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
BUG_ON(pos_task &&
pid == pos_task->pid &&
pos->locked);
-
- if (pos_task && pid < pos_task->pid) {
- list_add_tail(&trans->list, &pos->list);
- goto list_add_done;
- }
}
}
- list_add_tail(&trans->list, &c->btree_trans_list);
-list_add_done:
+
+ list_add(&trans->list, &c->btree_trans_list);
seqmutex_unlock(&c->btree_trans_lock);
got_trans:
trans->c = c;
@@ -3199,6 +3193,8 @@ got_trans:
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
+
+ closure_init_stack_release(&trans->ref);
return trans;
}
@@ -3235,7 +3231,6 @@ void bch2_trans_put(struct btree_trans *trans)
trans_for_each_update(trans, i)
__btree_path_put(trans->paths + i->path, true);
trans->nr_updates = 0;
- trans->locking_wait.task = NULL;
check_btree_paths_leaked(trans);
@@ -3256,6 +3251,13 @@ void bch2_trans_put(struct btree_trans *trans)
if (unlikely(trans->journal_replay_not_finished))
bch2_journal_keys_put(c);
+ /*
+ * trans->ref protects trans->locking_wait.task, btree_paths array; used
+ * by cycle detector
+ */
+ closure_return_sync(&trans->ref);
+ trans->locking_wait.task = NULL;
+
unsigned long *paths_allocated = trans->paths_allocated;
trans->paths_allocated = NULL;
trans->paths = NULL;
@@ -3273,8 +3275,6 @@ void bch2_trans_put(struct btree_trans *trans)
trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
if (trans) {
- closure_sync(&trans->ref);
-
seqmutex_lock(&c->btree_trans_lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);
@@ -3380,8 +3380,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
if (trans) {
- closure_sync(&trans->ref);
-
seqmutex_lock(&c->btree_trans_lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index d63db4fefe73..87f485e9c552 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -761,13 +761,13 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type)
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
- const unsigned mask = 0
+ const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
BCH_BTREE_IDS()
#undef x
;
- return (1U << type) & mask;
+ return BIT_ULL(type) & mask;
}
static inline bool btree_id_is_extents(enum btree_id btree)
@@ -777,35 +777,35 @@ static inline bool btree_id_is_extents(enum btree_id btree)
static inline bool btree_type_has_snapshots(enum btree_id id)
{
- const unsigned mask = 0
+ const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
BCH_BTREE_IDS()
#undef x
;
- return (1U << id) & mask;
+ return BIT_ULL(id) & mask;
}
static inline bool btree_type_has_snapshot_field(enum btree_id id)
{
- const unsigned mask = 0
+ const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
BCH_BTREE_IDS()
#undef x
;
- return (1U << id) & mask;
+ return BIT_ULL(id) & mask;
}
static inline bool btree_type_has_ptrs(enum btree_id id)
{
- const unsigned mask = 0
+ const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
BCH_BTREE_IDS()
#undef x
;
- return (1U << id) & mask;
+ return BIT_ULL(id) & mask;
}
struct btree_root {
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 9e54323f0f5f..6d82e1165adc 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -216,7 +216,8 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
ret = PTR_ERR_OR_ZERO(optstr) ?:
bch2_parse_mount_opts(NULL, &thr->opts, optstr);
- kfree(optstr);
+ if (!IS_ERR(optstr))
+ kfree(optstr);
if (ret)
goto err;
@@ -319,7 +320,8 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
return ret;
ret = bch2_dev_add(c, path);
- kfree(path);
+ if (!IS_ERR(path))
+ kfree(path);
return ret;
}
@@ -850,7 +852,8 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
ret = PTR_ERR_OR_ZERO(optstr) ?:
bch2_parse_mount_opts(c, &thr->opts, optstr);
- kfree(optstr);
+ if (!IS_ERR(optstr))
+ kfree(optstr);
if (ret)
goto err;
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 51cbf3928361..f0d4727c4dc2 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -568,6 +568,32 @@ static const struct file_operations cached_btree_nodes_ops = {
.read = bch2_cached_btree_nodes_read,
};
+typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r);
+
+static void list_sort(struct list_head *head, list_cmp_fn cmp)
+{
+ struct list_head *pos;
+
+ list_for_each(pos, head)
+ while (!list_is_last(pos, head) &&
+ cmp(pos, pos->next) > 0) {
+ struct list_head *pos2, *next = pos->next;
+
+ list_del(next);
+ list_for_each(pos2, head)
+ if (cmp(next, pos2) < 0)
+ goto pos_found;
+ BUG();
+pos_found:
+ list_add_tail(next, pos2);
+ }
+}
+
+static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r)
+{
+ return cmp_int(l, r);
+}
+
static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
@@ -575,41 +601,39 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
struct bch_fs *c = i->c;
struct btree_trans *trans;
ssize_t ret = 0;
- u32 seq;
i->ubuf = buf;
i->size = size;
i->ret = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
- list_for_each_entry(trans, &c->btree_trans_list, list) {
- struct task_struct *task = READ_ONCE(trans->locking_wait.task);
+ list_sort(&c->btree_trans_list, list_ptr_order_cmp);
- if (!task || task->pid <= i->iter)
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ if ((ulong) trans < i->iter)
continue;
- closure_get(&trans->ref);
- seq = seqmutex_seq(&c->btree_trans_lock);
- seqmutex_unlock(&c->btree_trans_lock);
+ i->iter = (ulong) trans;
- ret = flush_buf(i);
- if (ret) {
- closure_put(&trans->ref);
- goto unlocked;
- }
+ if (!closure_get_not_zero(&trans->ref))
+ continue;
+
+ u32 seq = seqmutex_unlock(&c->btree_trans_lock);
bch2_btree_trans_to_text(&i->buf, trans);
prt_printf(&i->buf, "backtrace:\n");
printbuf_indent_add(&i->buf, 2);
- bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL);
+ bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
- i->iter = task->pid;
-
closure_put(&trans->ref);
+ ret = flush_buf(i);
+ if (ret)
+ goto unlocked;
+
if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart;
}
@@ -804,50 +828,55 @@ static const struct file_operations btree_transaction_stats_op = {
.read = btree_transaction_stats_read,
};
-static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
- size_t size, loff_t *ppos)
+/* walk btree transactions until we find a deadlock and print it */
+static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
{
- struct dump_iter *i = file->private_data;
- struct bch_fs *c = i->c;
struct btree_trans *trans;
- ssize_t ret = 0;
- u32 seq;
-
- i->ubuf = buf;
- i->size = size;
- i->ret = 0;
-
- if (i->iter)
- goto out;
+ pid_t iter = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) {
struct task_struct *task = READ_ONCE(trans->locking_wait.task);
- if (!task || task->pid <= i->iter)
+ if (!task || task->pid <= iter)
continue;
- closure_get(&trans->ref);
- seq = seqmutex_seq(&c->btree_trans_lock);
- seqmutex_unlock(&c->btree_trans_lock);
+ iter = task->pid;
- ret = flush_buf(i);
- if (ret) {
- closure_put(&trans->ref);
- goto out;
- }
+ if (!closure_get_not_zero(&trans->ref))
+ continue;
- bch2_check_for_deadlock(trans, &i->buf);
+ u32 seq = seqmutex_unlock(&c->btree_trans_lock);
- i->iter = task->pid;
+ bool found = bch2_check_for_deadlock(trans, out) != 0;
closure_put(&trans->ref);
+ if (found)
+ return;
+
if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart;
}
seqmutex_unlock(&c->btree_trans_lock);
-out:
+}
+
+static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ ssize_t ret = 0;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ if (!i->iter) {
+ btree_deadlock_to_text(&i->buf, c);
+ i->iter++;
+ }
+
if (i->buf.allocation_failure)
ret = -ENOMEM;
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index dbe35b80bc0b..58612abf7927 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -116,6 +116,9 @@
x(ENOENT, ENOENT_dev_idx_not_found) \
x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \
x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \
+ x(EEXIST, EEXIST_str_hash_set) \
+ x(EEXIST, EEXIST_discard_in_flight_add) \
+ x(EEXIST, EEXIST_subvolume_create) \
x(0, open_buckets_empty) \
x(0, freelist_empty) \
x(BCH_ERR_freelist_empty, no_buckets_found) \
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index c66eeffcd7f2..d95c40f1b6af 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -15,6 +15,7 @@ bool bch2_inconsistent_error(struct bch_fs *c)
switch (c->opts.errors) {
case BCH_ON_ERROR_continue:
return false;
+ case BCH_ON_ERROR_fix_safe:
case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c))
bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
@@ -191,6 +192,12 @@ static void prt_actioning(struct printbuf *out, const char *action)
prt_str(out, "ing");
}
+static const u8 fsck_flags_extra[] = {
+#define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags,
+ BCH_SB_ERRS()
+#undef x
+};
+
int bch2_fsck_err(struct bch_fs *c,
enum bch_fsck_flags flags,
enum bch_sb_error_id err,
@@ -203,6 +210,9 @@ int bch2_fsck_err(struct bch_fs *c,
int ret = -BCH_ERR_fsck_ignore;
const char *action_orig = "fix?", *action = action_orig;
+ if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
+ flags |= fsck_flags_extra[err];
+
if ((flags & FSCK_CAN_FIX) &&
test_bit(err, c->sb.errors_silent))
return -BCH_ERR_fsck_fix;
@@ -265,7 +275,14 @@ int bch2_fsck_err(struct bch_fs *c,
prt_printf(out, bch2_log_msg(c, ""));
#endif
- if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
+ if ((flags & FSCK_CAN_FIX) &&
+ (flags & FSCK_AUTOFIX) &&
+ (c->opts.errors == BCH_ON_ERROR_continue ||
+ c->opts.errors == BCH_ON_ERROR_fix_safe)) {
+ prt_str(out, ", ");
+ prt_actioning(out, action);
+ ret = -BCH_ERR_fsck_fix;
+ } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
if (c->opts.errors != BCH_ON_ERROR_continue ||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
prt_str(out, ", shutting down");
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 36caedf72d89..777711504c35 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -108,13 +108,6 @@ struct fsck_err_state {
char *last_msg;
};
-enum bch_fsck_flags {
- FSCK_CAN_FIX = 1 << 0,
- FSCK_CAN_IGNORE = 1 << 1,
- FSCK_NEED_FSCK = 1 << 2,
- FSCK_NO_RATELIMIT = 1 << 3,
-};
-
#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
__printf(4, 5) __cold
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 3551a737181b..79a0c8732bce 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -373,7 +373,7 @@ retry:
}
if (dst_dentry->d_inode) {
- error = -EEXIST;
+ error = -BCH_ERR_EEXIST_subvolume_create;
goto err3;
}
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 77126992dba8..f9c9a95d7d4c 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -188,6 +188,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
BUG_ON(!old);
if (unlikely(old != inode)) {
+ /*
+ * bcachefs doesn't use I_NEW; we have no use for it since we
+ * only insert fully created inodes in the inode hash table. But
+ * discard_new_inode() expects it to be set...
+ */
+ inode->v.i_flags |= I_NEW;
discard_new_inode(&inode->v);
inode = old;
} else {
@@ -195,8 +201,10 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
mutex_unlock(&c->vfs_inodes_lock);
/*
- * we really don't want insert_inode_locked2() to be setting
- * I_NEW...
+ * Again, I_NEW makes no sense for bcachefs. This is only needed
+ * for clearing I_NEW, but since the inode was already fully
+ * created and initialized we didn't actually want
+ * inode_insert5() to set it for us.
*/
unlock_new_inode(&inode->v);
}
@@ -1157,6 +1165,7 @@ static const struct file_operations bch_file_operations = {
.read_iter = bch2_read_iter,
.write_iter = bch2_write_iter,
.mmap = bch2_mmap,
+ .get_unmapped_area = thp_get_unmapped_area,
.fsync = bch2_fsync,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
@@ -1488,11 +1497,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
bch2_iget5_set(&inode->v, &inum);
bch2_inode_update_after_write(trans, inode, bi, ~0);
- if (BCH_SUBVOLUME_SNAP(subvol))
- set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
- else
- clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
-
inode->v.i_blocks = bi->bi_sectors;
inode->v.i_ino = bi->bi_inum;
inode->v.i_rdev = bi->bi_dev;
@@ -1504,6 +1508,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
inode->ei_qid = bch_qid(bi);
inode->ei_subvol = inum.subvol;
+ if (BCH_SUBVOLUME_SNAP(subvol))
+ set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
+
inode->v.i_mapping->a_ops = &bch_address_space_operations;
switch (inode->v.i_mode & S_IFMT) {
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index adec8e1ea73e..13669dd0e375 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1167,6 +1167,9 @@ void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
void bch2_fs_journal_stop(struct journal *j)
{
+ if (!test_bit(JOURNAL_running, &j->flags))
+ return;
+
bch2_journal_reclaim_stop(j);
bch2_journal_flush_all_pins(j);
@@ -1518,6 +1521,11 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
struct journal_entry_pin *pin;
spin_lock(&j->lock);
+ if (!test_bit(JOURNAL_running, &j->flags)) {
+ spin_unlock(&j->lock);
+ return true;
+ }
+
*seq = max(*seq, j->pin.front);
if (*seq >= j->pin.back) {
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index cdcb1ad49af4..db24ce21b2ac 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1677,6 +1677,13 @@ static CLOSURE_CALLBACK(journal_write_done)
mod_delayed_work(j->wq, &j->write_work, max(0L, delta));
}
+ /*
+ * We don't typically trigger journal writes from her - the next journal
+ * write will be triggered immediately after the previous one is
+ * allocated, in bch2_journal_write() - but the journal write error path
+ * is special:
+ */
+ bch2_journal_do_writes(j);
spin_unlock(&j->lock);
}
@@ -1967,7 +1974,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas;
- struct printbuf journal_debug_buf = PRINTBUF;
unsigned nr_rw_members = 0;
int ret;
@@ -2011,11 +2017,15 @@ CLOSURE_CALLBACK(bch2_journal_write)
}
if (ret) {
- __bch2_journal_debug_to_text(&journal_debug_buf, j);
+ struct printbuf buf = PRINTBUF;
+ buf.atomic++;
+
+ prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write: %s"),
+ bch2_err_str(ret));
+ __bch2_journal_debug_to_text(&buf, j);
spin_unlock(&j->lock);
- bch_err(c, "Unable to allocate journal write:\n%s",
- journal_debug_buf.buf);
- printbuf_exit(&journal_debug_buf);
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
goto err;
}
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index ed4846709611..1f25c111c54c 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -232,7 +232,7 @@ bool bch2_blacklist_entries_gc(struct bch_fs *c)
BUG_ON(nr != t->nr);
unsigned i;
- for (src = bl->start, i = eytzinger0_first(t->nr);
+ for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr);
src < bl->start + nr;
src++, i = eytzinger0_next(i, nr)) {
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index fb11ab0dd00e..bd71ba77de07 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -2,9 +2,6 @@
#ifndef _BCACHEFS_LRU_H
#define _BCACHEFS_LRU_H
-#define LRU_TIME_BITS 48
-#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
-
static inline u64 lru_pos_id(struct bpos pos)
{
return pos.inode >> LRU_TIME_BITS;
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 25530e0bb2f3..b197ec90d4cb 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -137,7 +137,7 @@ enum fsck_err_opts {
x(errors, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_STR(bch2_error_actions), \
- BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \
+ BCH_SB_ERROR_ACTION, BCH_ON_ERROR_fix_safe, \
NULL, "Action to take on filesystem error") \
x(metadata_replicas, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index cf513fc79ce4..1f9d044ed920 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -326,6 +326,12 @@ static int journal_replay_entry_early(struct bch_fs *c,
case BCH_JSET_ENTRY_btree_root: {
struct btree_root *r;
+ if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX,
+ c, invalid_btree_id,
+ "invalid btree id %u (max %u)",
+ entry->btree_id, BTREE_ID_NR_MAX))
+ return 0;
+
while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) {
ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL });
if (ret)
@@ -415,7 +421,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time));
}
}
-
+fsck_err:
return ret;
}
@@ -658,10 +664,10 @@ int bch2_fs_recovery(struct bch_fs *c)
if (check_version_upgrade(c))
write_sb = true;
+ c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+
if (write_sb)
bch2_write_super(c);
-
- c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
mutex_unlock(&c->sb_lock);
if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index 3fb23e399ffb..4710b61631f0 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -228,7 +228,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
dst = (void *) &darray_top(table);
dst->version = cpu_to_le16(src->version);
- dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes);
+ dst->recovery_passes[0] = cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes));
dst->recovery_passes[1] = 0;
dst->nr_errors = cpu_to_le16(src->nr_errors);
for (unsigned i = 0; i < src->nr_errors; i++)
diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
index bda33e59e226..c1270d790e43 100644
--- a/fs/bcachefs/sb-errors.c
+++ b/fs/bcachefs/sb-errors.c
@@ -110,19 +110,25 @@ out:
void bch2_sb_errors_from_cpu(struct bch_fs *c)
{
bch_sb_errors_cpu *src = &c->fsck_error_counts;
- struct bch_sb_field_errors *dst =
- bch2_sb_field_resize(&c->disk_sb, errors,
- bch2_sb_field_errors_u64s(src->nr));
+ struct bch_sb_field_errors *dst;
unsigned i;
+ mutex_lock(&c->fsck_error_counts_lock);
+
+ dst = bch2_sb_field_resize(&c->disk_sb, errors,
+ bch2_sb_field_errors_u64s(src->nr));
+
if (!dst)
- return;
+ goto err;
for (i = 0; i < src->nr; i++) {
SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id);
SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr);
dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time);
}
+
+err:
+ mutex_unlock(&c->fsck_error_counts_lock);
}
static int bch2_sb_errors_to_cpu(struct bch_fs *c)
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 84d2763bd597..d6f35a99c429 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -2,281 +2,294 @@
#ifndef _BCACHEFS_SB_ERRORS_FORMAT_H
#define _BCACHEFS_SB_ERRORS_FORMAT_H
-#define BCH_SB_ERRS() \
- x(clean_but_journal_not_empty, 0) \
- x(dirty_but_no_journal_entries, 1) \
- x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
- x(sb_clean_journal_seq_mismatch, 3) \
- x(sb_clean_btree_root_mismatch, 4) \
- x(sb_clean_missing, 5) \
- x(jset_unsupported_version, 6) \
- x(jset_unknown_csum, 7) \
- x(jset_last_seq_newer_than_seq, 8) \
- x(jset_past_bucket_end, 9) \
- x(jset_seq_blacklisted, 10) \
- x(journal_entries_missing, 11) \
- x(journal_entry_replicas_not_marked, 12) \
- x(journal_entry_past_jset_end, 13) \
- x(journal_entry_replicas_data_mismatch, 14) \
- x(journal_entry_bkey_u64s_0, 15) \
- x(journal_entry_bkey_past_end, 16) \
- x(journal_entry_bkey_bad_format, 17) \
- x(journal_entry_bkey_invalid, 18) \
- x(journal_entry_btree_root_bad_size, 19) \
- x(journal_entry_blacklist_bad_size, 20) \
- x(journal_entry_blacklist_v2_bad_size, 21) \
- x(journal_entry_blacklist_v2_start_past_end, 22) \
- x(journal_entry_usage_bad_size, 23) \
- x(journal_entry_data_usage_bad_size, 24) \
- x(journal_entry_clock_bad_size, 25) \
- x(journal_entry_clock_bad_rw, 26) \
- x(journal_entry_dev_usage_bad_size, 27) \
- x(journal_entry_dev_usage_bad_dev, 28) \
- x(journal_entry_dev_usage_bad_pad, 29) \
- x(btree_node_unreadable, 30) \
- x(btree_node_fault_injected, 31) \
- x(btree_node_bad_magic, 32) \
- x(btree_node_bad_seq, 33) \
- x(btree_node_unsupported_version, 34) \
- x(btree_node_bset_older_than_sb_min, 35) \
- x(btree_node_bset_newer_than_sb, 36) \
- x(btree_node_data_missing, 37) \
- x(btree_node_bset_after_end, 38) \
- x(btree_node_replicas_sectors_written_mismatch, 39) \
- x(btree_node_replicas_data_mismatch, 40) \
- x(bset_unknown_csum, 41) \
- x(bset_bad_csum, 42) \
- x(bset_past_end_of_btree_node, 43) \
- x(bset_wrong_sector_offset, 44) \
- x(bset_empty, 45) \
- x(bset_bad_seq, 46) \
- x(bset_blacklisted_journal_seq, 47) \
- x(first_bset_blacklisted_journal_seq, 48) \
- x(btree_node_bad_btree, 49) \
- x(btree_node_bad_level, 50) \
- x(btree_node_bad_min_key, 51) \
- x(btree_node_bad_max_key, 52) \
- x(btree_node_bad_format, 53) \
- x(btree_node_bkey_past_bset_end, 54) \
- x(btree_node_bkey_bad_format, 55) \
- x(btree_node_bad_bkey, 56) \
- x(btree_node_bkey_out_of_order, 57) \
- x(btree_root_bkey_invalid, 58) \
- x(btree_root_read_error, 59) \
- x(btree_root_bad_min_key, 60) \
- x(btree_root_bad_max_key, 61) \
- x(btree_node_read_error, 62) \
- x(btree_node_topology_bad_min_key, 63) \
- x(btree_node_topology_bad_max_key, 64) \
- x(btree_node_topology_overwritten_by_prev_node, 65) \
- x(btree_node_topology_overwritten_by_next_node, 66) \
- x(btree_node_topology_interior_node_empty, 67) \
- x(fs_usage_hidden_wrong, 68) \
- x(fs_usage_btree_wrong, 69) \
- x(fs_usage_data_wrong, 70) \
- x(fs_usage_cached_wrong, 71) \
- x(fs_usage_reserved_wrong, 72) \
- x(fs_usage_persistent_reserved_wrong, 73) \
- x(fs_usage_nr_inodes_wrong, 74) \
- x(fs_usage_replicas_wrong, 75) \
- x(dev_usage_buckets_wrong, 76) \
- x(dev_usage_sectors_wrong, 77) \
- x(dev_usage_fragmented_wrong, 78) \
- x(dev_usage_buckets_ec_wrong, 79) \
- x(bkey_version_in_future, 80) \
- x(bkey_u64s_too_small, 81) \
- x(bkey_invalid_type_for_btree, 82) \
- x(bkey_extent_size_zero, 83) \
- x(bkey_extent_size_greater_than_offset, 84) \
- x(bkey_size_nonzero, 85) \
- x(bkey_snapshot_nonzero, 86) \
- x(bkey_snapshot_zero, 87) \
- x(bkey_at_pos_max, 88) \
- x(bkey_before_start_of_btree_node, 89) \
- x(bkey_after_end_of_btree_node, 90) \
- x(bkey_val_size_nonzero, 91) \
- x(bkey_val_size_too_small, 92) \
- x(alloc_v1_val_size_bad, 93) \
- x(alloc_v2_unpack_error, 94) \
- x(alloc_v3_unpack_error, 95) \
- x(alloc_v4_val_size_bad, 96) \
- x(alloc_v4_backpointers_start_bad, 97) \
- x(alloc_key_data_type_bad, 98) \
- x(alloc_key_empty_but_have_data, 99) \
- x(alloc_key_dirty_sectors_0, 100) \
- x(alloc_key_data_type_inconsistency, 101) \
- x(alloc_key_to_missing_dev_bucket, 102) \
- x(alloc_key_cached_inconsistency, 103) \
- x(alloc_key_cached_but_read_time_zero, 104) \
- x(alloc_key_to_missing_lru_entry, 105) \
- x(alloc_key_data_type_wrong, 106) \
- x(alloc_key_gen_wrong, 107) \
- x(alloc_key_dirty_sectors_wrong, 108) \
- x(alloc_key_cached_sectors_wrong, 109) \
- x(alloc_key_stripe_wrong, 110) \
- x(alloc_key_stripe_redundancy_wrong, 111) \
- x(bucket_sector_count_overflow, 112) \
- x(bucket_metadata_type_mismatch, 113) \
- x(need_discard_key_wrong, 114) \
- x(freespace_key_wrong, 115) \
- x(freespace_hole_missing, 116) \
- x(bucket_gens_val_size_bad, 117) \
- x(bucket_gens_key_wrong, 118) \
- x(bucket_gens_hole_wrong, 119) \
- x(bucket_gens_to_invalid_dev, 120) \
- x(bucket_gens_to_invalid_buckets, 121) \
- x(bucket_gens_nonzero_for_invalid_buckets, 122) \
- x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
- x(need_discard_freespace_key_bad, 124) \
- x(backpointer_bucket_offset_wrong, 125) \
- x(backpointer_to_missing_device, 126) \
- x(backpointer_to_missing_alloc, 127) \
- x(backpointer_to_missing_ptr, 128) \
- x(lru_entry_at_time_0, 129) \
- x(lru_entry_to_invalid_bucket, 130) \
- x(lru_entry_bad, 131) \
- x(btree_ptr_val_too_big, 132) \
- x(btree_ptr_v2_val_too_big, 133) \
- x(btree_ptr_has_non_ptr, 134) \
- x(extent_ptrs_invalid_entry, 135) \
- x(extent_ptrs_no_ptrs, 136) \
- x(extent_ptrs_too_many_ptrs, 137) \
- x(extent_ptrs_redundant_crc, 138) \
- x(extent_ptrs_redundant_stripe, 139) \
- x(extent_ptrs_unwritten, 140) \
- x(extent_ptrs_written_and_unwritten, 141) \
- x(ptr_to_invalid_device, 142) \
- x(ptr_to_duplicate_device, 143) \
- x(ptr_after_last_bucket, 144) \
- x(ptr_before_first_bucket, 145) \
- x(ptr_spans_multiple_buckets, 146) \
- x(ptr_to_missing_backpointer, 147) \
- x(ptr_to_missing_alloc_key, 148) \
- x(ptr_to_missing_replicas_entry, 149) \
- x(ptr_to_missing_stripe, 150) \
- x(ptr_to_incorrect_stripe, 151) \
- x(ptr_gen_newer_than_bucket_gen, 152) \
- x(ptr_too_stale, 153) \
- x(stale_dirty_ptr, 154) \
- x(ptr_bucket_data_type_mismatch, 155) \
- x(ptr_cached_and_erasure_coded, 156) \
- x(ptr_crc_uncompressed_size_too_small, 157) \
- x(ptr_crc_csum_type_unknown, 158) \
- x(ptr_crc_compression_type_unknown, 159) \
- x(ptr_crc_redundant, 160) \
- x(ptr_crc_uncompressed_size_too_big, 161) \
- x(ptr_crc_nonce_mismatch, 162) \
- x(ptr_stripe_redundant, 163) \
- x(reservation_key_nr_replicas_invalid, 164) \
- x(reflink_v_refcount_wrong, 165) \
- x(reflink_p_to_missing_reflink_v, 166) \
- x(stripe_pos_bad, 167) \
- x(stripe_val_size_bad, 168) \
- x(stripe_sector_count_wrong, 169) \
- x(snapshot_tree_pos_bad, 170) \
- x(snapshot_tree_to_missing_snapshot, 171) \
- x(snapshot_tree_to_missing_subvol, 172) \
- x(snapshot_tree_to_wrong_subvol, 173) \
- x(snapshot_tree_to_snapshot_subvol, 174) \
- x(snapshot_pos_bad, 175) \
- x(snapshot_parent_bad, 176) \
- x(snapshot_children_not_normalized, 177) \
- x(snapshot_child_duplicate, 178) \
- x(snapshot_child_bad, 179) \
- x(snapshot_skiplist_not_normalized, 180) \
- x(snapshot_skiplist_bad, 181) \
- x(snapshot_should_not_have_subvol, 182) \
- x(snapshot_to_bad_snapshot_tree, 183) \
- x(snapshot_bad_depth, 184) \
- x(snapshot_bad_skiplist, 185) \
- x(subvol_pos_bad, 186) \
- x(subvol_not_master_and_not_snapshot, 187) \
- x(subvol_to_missing_root, 188) \
- x(subvol_root_wrong_bi_subvol, 189) \
- x(bkey_in_missing_snapshot, 190) \
- x(inode_pos_inode_nonzero, 191) \
- x(inode_pos_blockdev_range, 192) \
- x(inode_unpack_error, 193) \
- x(inode_str_hash_invalid, 194) \
- x(inode_v3_fields_start_bad, 195) \
- x(inode_snapshot_mismatch, 196) \
- x(inode_unlinked_but_clean, 197) \
- x(inode_unlinked_but_nlink_nonzero, 198) \
- x(inode_checksum_type_invalid, 199) \
- x(inode_compression_type_invalid, 200) \
- x(inode_subvol_root_but_not_dir, 201) \
- x(inode_i_size_dirty_but_clean, 202) \
- x(inode_i_sectors_dirty_but_clean, 203) \
- x(inode_i_sectors_wrong, 204) \
- x(inode_dir_wrong_nlink, 205) \
- x(inode_dir_multiple_links, 206) \
- x(inode_multiple_links_but_nlink_0, 207) \
- x(inode_wrong_backpointer, 208) \
- x(inode_wrong_nlink, 209) \
- x(inode_unreachable, 210) \
- x(deleted_inode_but_clean, 211) \
- x(deleted_inode_missing, 212) \
- x(deleted_inode_is_dir, 213) \
- x(deleted_inode_not_unlinked, 214) \
- x(extent_overlapping, 215) \
- x(extent_in_missing_inode, 216) \
- x(extent_in_non_reg_inode, 217) \
- x(extent_past_end_of_inode, 218) \
- x(dirent_empty_name, 219) \
- x(dirent_val_too_big, 220) \
- x(dirent_name_too_long, 221) \
- x(dirent_name_embedded_nul, 222) \
- x(dirent_name_dot_or_dotdot, 223) \
- x(dirent_name_has_slash, 224) \
- x(dirent_d_type_wrong, 225) \
- x(inode_bi_parent_wrong, 226) \
- x(dirent_in_missing_dir_inode, 227) \
- x(dirent_in_non_dir_inode, 228) \
- x(dirent_to_missing_inode, 229) \
- x(dirent_to_missing_subvol, 230) \
- x(dirent_to_itself, 231) \
- x(quota_type_invalid, 232) \
- x(xattr_val_size_too_small, 233) \
- x(xattr_val_size_too_big, 234) \
- x(xattr_invalid_type, 235) \
- x(xattr_name_invalid_chars, 236) \
- x(xattr_in_missing_inode, 237) \
- x(root_subvol_missing, 238) \
- x(root_dir_missing, 239) \
- x(root_inode_not_dir, 240) \
- x(dir_loop, 241) \
- x(hash_table_key_duplicate, 242) \
- x(hash_table_key_wrong_offset, 243) \
- x(unlinked_inode_not_on_deleted_list, 244) \
- x(reflink_p_front_pad_bad, 245) \
- x(journal_entry_dup_same_device, 246) \
- x(inode_bi_subvol_missing, 247) \
- x(inode_bi_subvol_wrong, 248) \
- x(inode_points_to_missing_dirent, 249) \
- x(inode_points_to_wrong_dirent, 250) \
- x(inode_bi_parent_nonzero, 251) \
- x(dirent_to_missing_parent_subvol, 252) \
- x(dirent_not_visible_in_parent_subvol, 253) \
- x(subvol_fs_path_parent_wrong, 254) \
- x(subvol_root_fs_path_parent_nonzero, 255) \
- x(subvol_children_not_set, 256) \
- x(subvol_children_bad, 257) \
- x(subvol_loop, 258) \
- x(subvol_unreachable, 259) \
- x(btree_node_bkey_bad_u64s, 260) \
- x(btree_node_topology_empty_interior_node, 261) \
- x(btree_ptr_v2_min_key_bad, 262) \
- x(btree_root_unreadable_and_scan_found_nothing, 263) \
- x(snapshot_node_missing, 264) \
- x(dup_backpointer_to_bad_csum_extent, 265) \
- x(btree_bitmap_not_marked, 266) \
- x(sb_clean_entry_overrun, 267) \
- x(btree_ptr_v2_written_0, 268) \
- x(subvol_snapshot_bad, 269) \
- x(subvol_inode_bad, 270)
+enum bch_fsck_flags {
+ FSCK_CAN_FIX = 1 << 0,
+ FSCK_CAN_IGNORE = 1 << 1,
+ FSCK_NEED_FSCK = 1 << 2,
+ FSCK_NO_RATELIMIT = 1 << 3,
+ FSCK_AUTOFIX = 1 << 4,
+};
+
+#define BCH_SB_ERRS() \
+ x(clean_but_journal_not_empty, 0, 0) \
+ x(dirty_but_no_journal_entries, 1, 0) \
+ x(dirty_but_no_journal_entries_post_drop_nonflushes, 2, 0) \
+ x(sb_clean_journal_seq_mismatch, 3, 0) \
+ x(sb_clean_btree_root_mismatch, 4, 0) \
+ x(sb_clean_missing, 5, 0) \
+ x(jset_unsupported_version, 6, 0) \
+ x(jset_unknown_csum, 7, 0) \
+ x(jset_last_seq_newer_than_seq, 8, 0) \
+ x(jset_past_bucket_end, 9, 0) \
+ x(jset_seq_blacklisted, 10, 0) \
+ x(journal_entries_missing, 11, 0) \
+ x(journal_entry_replicas_not_marked, 12, 0) \
+ x(journal_entry_past_jset_end, 13, 0) \
+ x(journal_entry_replicas_data_mismatch, 14, 0) \
+ x(journal_entry_bkey_u64s_0, 15, 0) \
+ x(journal_entry_bkey_past_end, 16, 0) \
+ x(journal_entry_bkey_bad_format, 17, 0) \
+ x(journal_entry_bkey_invalid, 18, 0) \
+ x(journal_entry_btree_root_bad_size, 19, 0) \
+ x(journal_entry_blacklist_bad_size, 20, 0) \
+ x(journal_entry_blacklist_v2_bad_size, 21, 0) \
+ x(journal_entry_blacklist_v2_start_past_end, 22, 0) \
+ x(journal_entry_usage_bad_size, 23, 0) \
+ x(journal_entry_data_usage_bad_size, 24, 0) \
+ x(journal_entry_clock_bad_size, 25, 0) \
+ x(journal_entry_clock_bad_rw, 26, 0) \
+ x(journal_entry_dev_usage_bad_size, 27, 0) \
+ x(journal_entry_dev_usage_bad_dev, 28, 0) \
+ x(journal_entry_dev_usage_bad_pad, 29, 0) \
+ x(btree_node_unreadable, 30, 0) \
+ x(btree_node_fault_injected, 31, 0) \
+ x(btree_node_bad_magic, 32, 0) \
+ x(btree_node_bad_seq, 33, 0) \
+ x(btree_node_unsupported_version, 34, 0) \
+ x(btree_node_bset_older_than_sb_min, 35, 0) \
+ x(btree_node_bset_newer_than_sb, 36, 0) \
+ x(btree_node_data_missing, 37, 0) \
+ x(btree_node_bset_after_end, 38, 0) \
+ x(btree_node_replicas_sectors_written_mismatch, 39, 0) \
+ x(btree_node_replicas_data_mismatch, 40, 0) \
+ x(bset_unknown_csum, 41, 0) \
+ x(bset_bad_csum, 42, 0) \
+ x(bset_past_end_of_btree_node, 43, 0) \
+ x(bset_wrong_sector_offset, 44, 0) \
+ x(bset_empty, 45, 0) \
+ x(bset_bad_seq, 46, 0) \
+ x(bset_blacklisted_journal_seq, 47, 0) \
+ x(first_bset_blacklisted_journal_seq, 48, 0) \
+ x(btree_node_bad_btree, 49, 0) \
+ x(btree_node_bad_level, 50, 0) \
+ x(btree_node_bad_min_key, 51, 0) \
+ x(btree_node_bad_max_key, 52, 0) \
+ x(btree_node_bad_format, 53, 0) \
+ x(btree_node_bkey_past_bset_end, 54, 0) \
+ x(btree_node_bkey_bad_format, 55, 0) \
+ x(btree_node_bad_bkey, 56, 0) \
+ x(btree_node_bkey_out_of_order, 57, 0) \
+ x(btree_root_bkey_invalid, 58, 0) \
+ x(btree_root_read_error, 59, 0) \
+ x(btree_root_bad_min_key, 60, 0) \
+ x(btree_root_bad_max_key, 61, 0) \
+ x(btree_node_read_error, 62, 0) \
+ x(btree_node_topology_bad_min_key, 63, 0) \
+ x(btree_node_topology_bad_max_key, 64, 0) \
+ x(btree_node_topology_overwritten_by_prev_node, 65, 0) \
+ x(btree_node_topology_overwritten_by_next_node, 66, 0) \
+ x(btree_node_topology_interior_node_empty, 67, 0) \
+ x(fs_usage_hidden_wrong, 68, FSCK_AUTOFIX) \
+ x(fs_usage_btree_wrong, 69, FSCK_AUTOFIX) \
+ x(fs_usage_data_wrong, 70, FSCK_AUTOFIX) \
+ x(fs_usage_cached_wrong, 71, FSCK_AUTOFIX) \
+ x(fs_usage_reserved_wrong, 72, FSCK_AUTOFIX) \
+ x(fs_usage_persistent_reserved_wrong, 73, FSCK_AUTOFIX) \
+ x(fs_usage_nr_inodes_wrong, 74, FSCK_AUTOFIX) \
+ x(fs_usage_replicas_wrong, 75, FSCK_AUTOFIX) \
+ x(dev_usage_buckets_wrong, 76, FSCK_AUTOFIX) \
+ x(dev_usage_sectors_wrong, 77, FSCK_AUTOFIX) \
+ x(dev_usage_fragmented_wrong, 78, FSCK_AUTOFIX) \
+ x(dev_usage_buckets_ec_wrong, 79, FSCK_AUTOFIX) \
+ x(bkey_version_in_future, 80, 0) \
+ x(bkey_u64s_too_small, 81, 0) \
+ x(bkey_invalid_type_for_btree, 82, 0) \
+ x(bkey_extent_size_zero, 83, 0) \
+ x(bkey_extent_size_greater_than_offset, 84, 0) \
+ x(bkey_size_nonzero, 85, 0) \
+ x(bkey_snapshot_nonzero, 86, 0) \
+ x(bkey_snapshot_zero, 87, 0) \
+ x(bkey_at_pos_max, 88, 0) \
+ x(bkey_before_start_of_btree_node, 89, 0) \
+ x(bkey_after_end_of_btree_node, 90, 0) \
+ x(bkey_val_size_nonzero, 91, 0) \
+ x(bkey_val_size_too_small, 92, 0) \
+ x(alloc_v1_val_size_bad, 93, 0) \
+ x(alloc_v2_unpack_error, 94, 0) \
+ x(alloc_v3_unpack_error, 95, 0) \
+ x(alloc_v4_val_size_bad, 96, 0) \
+ x(alloc_v4_backpointers_start_bad, 97, 0) \
+ x(alloc_key_data_type_bad, 98, 0) \
+ x(alloc_key_empty_but_have_data, 99, 0) \
+ x(alloc_key_dirty_sectors_0, 100, 0) \
+ x(alloc_key_data_type_inconsistency, 101, 0) \
+ x(alloc_key_to_missing_dev_bucket, 102, 0) \
+ x(alloc_key_cached_inconsistency, 103, 0) \
+ x(alloc_key_cached_but_read_time_zero, 104, 0) \
+ x(alloc_key_to_missing_lru_entry, 105, 0) \
+ x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \
+ x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \
+ x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \
+ x(alloc_key_cached_sectors_wrong, 109, FSCK_AUTOFIX) \
+ x(alloc_key_stripe_wrong, 110, FSCK_AUTOFIX) \
+ x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \
+ x(bucket_sector_count_overflow, 112, 0) \
+ x(bucket_metadata_type_mismatch, 113, 0) \
+ x(need_discard_key_wrong, 114, 0) \
+ x(freespace_key_wrong, 115, 0) \
+ x(freespace_hole_missing, 116, 0) \
+ x(bucket_gens_val_size_bad, 117, 0) \
+ x(bucket_gens_key_wrong, 118, 0) \
+ x(bucket_gens_hole_wrong, 119, 0) \
+ x(bucket_gens_to_invalid_dev, 120, 0) \
+ x(bucket_gens_to_invalid_buckets, 121, 0) \
+ x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \
+ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \
+ x(need_discard_freespace_key_bad, 124, 0) \
+ x(backpointer_bucket_offset_wrong, 125, 0) \
+ x(backpointer_to_missing_device, 126, 0) \
+ x(backpointer_to_missing_alloc, 127, 0) \
+ x(backpointer_to_missing_ptr, 128, 0) \
+ x(lru_entry_at_time_0, 129, 0) \
+ x(lru_entry_to_invalid_bucket, 130, 0) \
+ x(lru_entry_bad, 131, 0) \
+ x(btree_ptr_val_too_big, 132, 0) \
+ x(btree_ptr_v2_val_too_big, 133, 0) \
+ x(btree_ptr_has_non_ptr, 134, 0) \
+ x(extent_ptrs_invalid_entry, 135, 0) \
+ x(extent_ptrs_no_ptrs, 136, 0) \
+ x(extent_ptrs_too_many_ptrs, 137, 0) \
+ x(extent_ptrs_redundant_crc, 138, 0) \
+ x(extent_ptrs_redundant_stripe, 139, 0) \
+ x(extent_ptrs_unwritten, 140, 0) \
+ x(extent_ptrs_written_and_unwritten, 141, 0) \
+ x(ptr_to_invalid_device, 142, 0) \
+ x(ptr_to_duplicate_device, 143, 0) \
+ x(ptr_after_last_bucket, 144, 0) \
+ x(ptr_before_first_bucket, 145, 0) \
+ x(ptr_spans_multiple_buckets, 146, 0) \
+ x(ptr_to_missing_backpointer, 147, 0) \
+ x(ptr_to_missing_alloc_key, 148, 0) \
+ x(ptr_to_missing_replicas_entry, 149, 0) \
+ x(ptr_to_missing_stripe, 150, 0) \
+ x(ptr_to_incorrect_stripe, 151, 0) \
+ x(ptr_gen_newer_than_bucket_gen, 152, 0) \
+ x(ptr_too_stale, 153, 0) \
+ x(stale_dirty_ptr, 154, 0) \
+ x(ptr_bucket_data_type_mismatch, 155, 0) \
+ x(ptr_cached_and_erasure_coded, 156, 0) \
+ x(ptr_crc_uncompressed_size_too_small, 157, 0) \
+ x(ptr_crc_csum_type_unknown, 158, 0) \
+ x(ptr_crc_compression_type_unknown, 159, 0) \
+ x(ptr_crc_redundant, 160, 0) \
+ x(ptr_crc_uncompressed_size_too_big, 161, 0) \
+ x(ptr_crc_nonce_mismatch, 162, 0) \
+ x(ptr_stripe_redundant, 163, 0) \
+ x(reservation_key_nr_replicas_invalid, 164, 0) \
+ x(reflink_v_refcount_wrong, 165, 0) \
+ x(reflink_p_to_missing_reflink_v, 166, 0) \
+ x(stripe_pos_bad, 167, 0) \
+ x(stripe_val_size_bad, 168, 0) \
+ x(stripe_sector_count_wrong, 169, 0) \
+ x(snapshot_tree_pos_bad, 170, 0) \
+ x(snapshot_tree_to_missing_snapshot, 171, 0) \
+ x(snapshot_tree_to_missing_subvol, 172, 0) \
+ x(snapshot_tree_to_wrong_subvol, 173, 0) \
+ x(snapshot_tree_to_snapshot_subvol, 174, 0) \
+ x(snapshot_pos_bad, 175, 0) \
+ x(snapshot_parent_bad, 176, 0) \
+ x(snapshot_children_not_normalized, 177, 0) \
+ x(snapshot_child_duplicate, 178, 0) \
+ x(snapshot_child_bad, 179, 0) \
+ x(snapshot_skiplist_not_normalized, 180, 0) \
+ x(snapshot_skiplist_bad, 181, 0) \
+ x(snapshot_should_not_have_subvol, 182, 0) \
+ x(snapshot_to_bad_snapshot_tree, 183, 0) \
+ x(snapshot_bad_depth, 184, 0) \
+ x(snapshot_bad_skiplist, 185, 0) \
+ x(subvol_pos_bad, 186, 0) \
+ x(subvol_not_master_and_not_snapshot, 187, 0) \
+ x(subvol_to_missing_root, 188, 0) \
+ x(subvol_root_wrong_bi_subvol, 189, 0) \
+ x(bkey_in_missing_snapshot, 190, 0) \
+ x(inode_pos_inode_nonzero, 191, 0) \
+ x(inode_pos_blockdev_range, 192, 0) \
+ x(inode_unpack_error, 193, 0) \
+ x(inode_str_hash_invalid, 194, 0) \
+ x(inode_v3_fields_start_bad, 195, 0) \
+ x(inode_snapshot_mismatch, 196, 0) \
+ x(inode_unlinked_but_clean, 197, 0) \
+ x(inode_unlinked_but_nlink_nonzero, 198, 0) \
+ x(inode_checksum_type_invalid, 199, 0) \
+ x(inode_compression_type_invalid, 200, 0) \
+ x(inode_subvol_root_but_not_dir, 201, 0) \
+ x(inode_i_size_dirty_but_clean, 202, 0) \
+ x(inode_i_sectors_dirty_but_clean, 203, 0) \
+ x(inode_i_sectors_wrong, 204, 0) \
+ x(inode_dir_wrong_nlink, 205, 0) \
+ x(inode_dir_multiple_links, 206, 0) \
+ x(inode_multiple_links_but_nlink_0, 207, 0) \
+ x(inode_wrong_backpointer, 208, 0) \
+ x(inode_wrong_nlink, 209, 0) \
+ x(inode_unreachable, 210, 0) \
+ x(deleted_inode_but_clean, 211, 0) \
+ x(deleted_inode_missing, 212, 0) \
+ x(deleted_inode_is_dir, 213, 0) \
+ x(deleted_inode_not_unlinked, 214, 0) \
+ x(extent_overlapping, 215, 0) \
+ x(extent_in_missing_inode, 216, 0) \
+ x(extent_in_non_reg_inode, 217, 0) \
+ x(extent_past_end_of_inode, 218, 0) \
+ x(dirent_empty_name, 219, 0) \
+ x(dirent_val_too_big, 220, 0) \
+ x(dirent_name_too_long, 221, 0) \
+ x(dirent_name_embedded_nul, 222, 0) \
+ x(dirent_name_dot_or_dotdot, 223, 0) \
+ x(dirent_name_has_slash, 224, 0) \
+ x(dirent_d_type_wrong, 225, 0) \
+ x(inode_bi_parent_wrong, 226, 0) \
+ x(dirent_in_missing_dir_inode, 227, 0) \
+ x(dirent_in_non_dir_inode, 228, 0) \
+ x(dirent_to_missing_inode, 229, 0) \
+ x(dirent_to_missing_subvol, 230, 0) \
+ x(dirent_to_itself, 231, 0) \
+ x(quota_type_invalid, 232, 0) \
+ x(xattr_val_size_too_small, 233, 0) \
+ x(xattr_val_size_too_big, 234, 0) \
+ x(xattr_invalid_type, 235, 0) \
+ x(xattr_name_invalid_chars, 236, 0) \
+ x(xattr_in_missing_inode, 237, 0) \
+ x(root_subvol_missing, 238, 0) \
+ x(root_dir_missing, 239, 0) \
+ x(root_inode_not_dir, 240, 0) \
+ x(dir_loop, 241, 0) \
+ x(hash_table_key_duplicate, 242, 0) \
+ x(hash_table_key_wrong_offset, 243, 0) \
+ x(unlinked_inode_not_on_deleted_list, 244, 0) \
+ x(reflink_p_front_pad_bad, 245, 0) \
+ x(journal_entry_dup_same_device, 246, 0) \
+ x(inode_bi_subvol_missing, 247, 0) \
+ x(inode_bi_subvol_wrong, 248, 0) \
+ x(inode_points_to_missing_dirent, 249, 0) \
+ x(inode_points_to_wrong_dirent, 250, 0) \
+ x(inode_bi_parent_nonzero, 251, 0) \
+ x(dirent_to_missing_parent_subvol, 252, 0) \
+ x(dirent_not_visible_in_parent_subvol, 253, 0) \
+ x(subvol_fs_path_parent_wrong, 254, 0) \
+ x(subvol_root_fs_path_parent_nonzero, 255, 0) \
+ x(subvol_children_not_set, 256, 0) \
+ x(subvol_children_bad, 257, 0) \
+ x(subvol_loop, 258, 0) \
+ x(subvol_unreachable, 259, 0) \
+ x(btree_node_bkey_bad_u64s, 260, 0) \
+ x(btree_node_topology_empty_interior_node, 261, 0) \
+ x(btree_ptr_v2_min_key_bad, 262, 0) \
+ x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \
+ x(snapshot_node_missing, 264, 0) \
+ x(dup_backpointer_to_bad_csum_extent, 265, 0) \
+ x(btree_bitmap_not_marked, 266, 0) \
+ x(sb_clean_entry_overrun, 267, 0) \
+ x(btree_ptr_v2_written_0, 268, 0) \
+ x(subvol_snapshot_bad, 269, 0) \
+ x(subvol_inode_bad, 270, 0) \
+ x(alloc_key_stripe_sectors_wrong, 271, 0) \
+ x(accounting_mismatch, 272, 0) \
+ x(accounting_replicas_not_marked, 273, 0) \
+ x(invalid_btree_id, 274, 0) \
+ x(alloc_key_io_time_bad, 275, 0)
enum bch_sb_error_id {
-#define x(t, n) BCH_FSCK_ERR_##t = n,
+#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
BCH_SB_ERRS()
#undef x
BCH_SB_ERR_MAX
diff --git a/fs/bcachefs/seqmutex.h b/fs/bcachefs/seqmutex.h
index c1860d8163fb..c4b3d8d3f414 100644
--- a/fs/bcachefs/seqmutex.h
+++ b/fs/bcachefs/seqmutex.h
@@ -19,17 +19,14 @@ static inline bool seqmutex_trylock(struct seqmutex *lock)
static inline void seqmutex_lock(struct seqmutex *lock)
{
mutex_lock(&lock->lock);
-}
-
-static inline void seqmutex_unlock(struct seqmutex *lock)
-{
lock->seq++;
- mutex_unlock(&lock->lock);
}
-static inline u32 seqmutex_seq(struct seqmutex *lock)
+static inline u32 seqmutex_unlock(struct seqmutex *lock)
{
- return lock->seq;
+ u32 seq = lock->seq;
+ mutex_unlock(&lock->lock);
+ return seq;
}
static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq)
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 51918acfd726..24023d6a9698 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -168,6 +168,9 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
+ if (unlikely(new_bytes > INT_MAX))
+ return NULL;
+
new = kvzalloc(new_bytes, GFP_KERNEL);
if (!new)
return NULL;
@@ -1565,13 +1568,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
return 0;
- if (!test_bit(BCH_FS_started, &c->flags)) {
- ret = bch2_fs_read_write_early(c);
- bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
- if (ret)
- return ret;
- }
-
trans = bch2_trans_get(c);
/*
@@ -1687,6 +1683,8 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
+ set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
+
bch2_delete_dead_snapshots(c);
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
}
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index cbad9b27874f..c8c266cb5797 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -300,7 +300,7 @@ not_found:
if (!found && (flags & STR_HASH_must_replace)) {
ret = -BCH_ERR_ENOENT_str_hash_set_must_replace;
} else if (found && (flags & STR_HASH_must_create)) {
- ret = -EEXIST;
+ ret = -BCH_ERR_EEXIST_str_hash_set;
} else {
if (!found && slot.path)
swap(iter, slot);
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 055478d21e9e..b156fc85b8a3 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -649,9 +649,10 @@ reread:
bytes = vstruct_bytes(sb->sb);
- if (bytes > 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits)) {
- prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)",
- bytes, 512UL << sb->sb->layout.sb_max_size_bits);
+ u64 sb_size = 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits);
+ if (bytes > sb_size) {
+ prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %llu)",
+ bytes, sb_size);
return -BCH_ERR_invalid_sb_too_big;
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 65e239d32915..fb906467201e 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -536,7 +536,6 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
- bch2_fs_allocator_background_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
bch2_fs_snapshots_exit(c);
@@ -912,9 +911,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_io_clock_init(&c->io_clock[WRITE]) ?:
bch2_fs_journal_init(&c->journal) ?:
bch2_fs_replicas_init(c) ?:
+ bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_cache_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
- bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_interior_update_init(c) ?:
bch2_fs_buckets_waiting_for_journal_init(c) ?:
bch2_fs_btree_write_buffer_init(c) ?:
@@ -931,12 +930,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (ret)
goto err;
- for (i = 0; i < c->sb.nr_devices; i++)
- if (bch2_member_exists(c->disk_sb.sb, i) &&
- bch2_dev_alloc(c, i)) {
- ret = -EEXIST;
+ for (i = 0; i < c->sb.nr_devices; i++) {
+ if (!bch2_member_exists(c->disk_sb.sb, i))
+ continue;
+ ret = bch2_dev_alloc(c, i);
+ if (ret)
goto err;
- }
+ }
bch2_journal_entry_res_resize(&c->journal,
&c->btree_root_journal_res,
@@ -1194,6 +1194,7 @@ static void bch2_dev_free(struct bch_dev *ca)
kfree(ca->buckets_nouse);
bch2_free_super(&ca->disk_sb);
+ bch2_dev_allocator_background_exit(ca);
bch2_dev_journal_exit(ca);
free_percpu(ca->io_done);
@@ -1316,6 +1317,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
atomic_long_set(&ca->ref, 1);
#endif
+ bch2_dev_allocator_background_init(ca);
+
if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
!(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
@@ -1528,6 +1531,7 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
* The allocator thread itself allocates btree nodes, so stop it first:
*/
bch2_dev_allocator_remove(c, ca);
+ bch2_recalc_capacity(c);
bch2_dev_journal_stop(&c->journal, ca);
}
@@ -1539,6 +1543,7 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
+ bch2_dev_do_discards(ca);
}
int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 477f350a8bd0..e3a57196b0ee 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -741,7 +741,9 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
ret = btrfs_bio_csum(bbio);
if (ret)
goto fail_put_bio;
- } else if (use_append) {
+ } else if (use_append ||
+ (btrfs_is_zoned(fs_info) && inode &&
+ inode->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_alloc_dummy_sum(bbio);
if (ret)
goto fail_put_bio;
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 1e09aeea69c2..1a66be33bb04 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1785,6 +1785,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
struct btrfs_block_group *bg;
struct btrfs_space_info *space_info;
+ LIST_HEAD(retry_list);
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return;
@@ -1921,8 +1922,11 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
}
next:
- if (ret)
- btrfs_mark_bg_to_reclaim(bg);
+ if (ret) {
+ /* Refcount held by the reclaim_bgs list after splice. */
+ btrfs_get_block_group(bg);
+ list_add_tail(&bg->bg_list, &retry_list);
+ }
btrfs_put_block_group(bg);
mutex_unlock(&fs_info->reclaim_bgs_lock);
@@ -1942,6 +1946,9 @@ next:
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
end:
+ spin_lock(&fs_info->unused_bgs_lock);
+ list_splice_tail(&retry_list, &fs_info->reclaim_bgs);
+ spin_unlock(&fs_info->unused_bgs_lock);
btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3ab8dea5036b..dabc3d0793cf 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2697,7 +2697,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
u64 offset = bytenr - block_group->start;
u64 to_free, to_unusable;
int bg_reclaim_threshold = 0;
- bool initial = (size == block_group->length);
+ bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
u64 reclaimable_unusable;
WARN_ON(!initial && offset + size > block_group->zone_capacity);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fc2a7ea26354..bf0f81d59b6b 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1351,7 +1351,7 @@ static int flush_reservations(struct btrfs_fs_info *fs_info)
int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *quota_root;
+ struct btrfs_root *quota_root = NULL;
struct btrfs_trans_handle *trans = NULL;
int ret = 0;
@@ -1449,9 +1449,9 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
quota_root->node, 0, 1);
- btrfs_put_root(quota_root);
out:
+ btrfs_put_root(quota_root);
mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (ret && trans)
btrfs_end_transaction(trans);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index afd6932f5e89..d7caa3732f07 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1688,20 +1688,24 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
(i << fs_info->sectorsize_bits);
int err;
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
- fs_info, scrub_read_endio, stripe);
- bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
-
io_stripe.is_scrub = true;
+ stripe_len = (nr_sectors - i) << fs_info->sectorsize_bits;
+ /*
+ * For RST cases, we need to manually split the bbio to
+ * follow the RST boundary.
+ */
err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
- &stripe_len, &bioc, &io_stripe,
- &mirror);
+ &stripe_len, &bioc, &io_stripe, &mirror);
btrfs_put_bioc(bioc);
- if (err) {
- btrfs_bio_end_io(bbio,
- errno_to_blk_status(err));
- return;
+ if (err < 0) {
+ set_bit(i, &stripe->io_error_bitmap);
+ set_bit(i, &stripe->error_bitmap);
+ continue;
}
+
+ bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
+ fs_info, scrub_read_endio, stripe);
+ bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
}
__bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 26a2e5aa08e9..0bce1d45e252 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -138,6 +138,25 @@ static void wait_log_commit(struct btrfs_root *root, int transid);
* and once to do all the other items.
*/
+static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root)
+{
+ unsigned int nofs_flag;
+ struct inode *inode;
+
+ /*
+ * We're holding a transaction handle whether we are logging or
+ * replaying a log tree, so we must make sure NOFS semantics apply
+ * because btrfs_alloc_inode() may be triggered and it uses GFP_KERNEL
+ * to allocate an inode, which can recurse back into the filesystem and
+ * attempt a transaction commit, resulting in a deadlock.
+ */
+ nofs_flag = memalloc_nofs_save();
+ inode = btrfs_iget(root->fs_info->sb, objectid, root);
+ memalloc_nofs_restore(nofs_flag);
+
+ return inode;
+}
+
/*
* start a sub transaction and setup the log tree
* this increments the log tree writer count to make the people
@@ -600,7 +619,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
{
struct inode *inode;
- inode = btrfs_iget(root->fs_info->sb, objectid, root);
+ inode = btrfs_iget_logging(objectid, root);
if (IS_ERR(inode))
inode = NULL;
return inode;
@@ -5438,7 +5457,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
struct btrfs_root *root = start_inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
LIST_HEAD(dir_list);
struct btrfs_dir_list *dir_elem;
@@ -5499,7 +5517,7 @@ again:
continue;
btrfs_release_path(path);
- di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root);
+ di_inode = btrfs_iget_logging(di_key.objectid, root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
goto out;
@@ -5559,7 +5577,7 @@ again:
btrfs_add_delayed_iput(curr_inode);
curr_inode = NULL;
- vfs_inode = btrfs_iget(fs_info->sb, ino, root);
+ vfs_inode = btrfs_iget_logging(ino, root);
if (IS_ERR(vfs_inode)) {
ret = PTR_ERR(vfs_inode);
break;
@@ -5654,7 +5672,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans,
if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES)
return BTRFS_LOG_FORCE_COMMIT;
- inode = btrfs_iget(root->fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
/*
* If the other inode that had a conflicting dir entry was deleted in
* the current transaction then we either:
@@ -5755,7 +5773,6 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
/*
@@ -5786,7 +5803,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
list_del(&curr->list);
kfree(curr);
- inode = btrfs_iget(fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
/*
* If the other inode that had a conflicting dir entry was
* deleted in the current transaction, we need to log its parent
@@ -5797,7 +5814,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
if (ret != -ENOENT)
break;
- inode = btrfs_iget(fs_info->sb, parent, root);
+ inode = btrfs_iget_logging(parent, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
break;
@@ -6319,7 +6336,6 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
const bool orig_log_new_dentries = ctx->log_new_dentries;
- struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_item *item;
int ret = 0;
@@ -6345,7 +6361,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans,
if (key.type == BTRFS_ROOT_ITEM_KEY)
continue;
- di_inode = btrfs_iget(fs_info->sb, key.objectid, inode->root);
+ di_inode = btrfs_iget_logging(key.objectid, inode->root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
break;
@@ -6729,7 +6745,6 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_path *path;
struct btrfs_key key;
@@ -6794,8 +6809,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
cur_offset = item_size;
}
- dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid,
- root);
+ dir_inode = btrfs_iget_logging(inode_key.objectid, root);
/*
* If the parent inode was deleted, return an error to
* fallback to a transaction commit. This is to prevent
@@ -6857,7 +6871,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
while (true) {
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
int slot;
struct btrfs_key search_key;
@@ -6872,7 +6885,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
search_key.objectid = found_key.offset;
search_key.type = BTRFS_INODE_ITEM_KEY;
search_key.offset = 0;
- inode = btrfs_iget(fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bb2f583eb28b..90079ca134dd 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -141,8 +141,6 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t ret;
- VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
-
if (iov_iter_rw(iter) == READ)
ret = nfs_file_direct_read(iocb, iter, true);
else
diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c
index 62d2586d9902..529a75ecf22e 100644
--- a/fs/nfsd/netlink.c
+++ b/fs/nfsd/netlink.c
@@ -44,9 +44,7 @@ static const struct nla_policy nfsd_listener_set_nl_policy[NFSD_A_SERVER_SOCK_AD
static const struct genl_split_ops nfsd_nl_ops[] = {
{
.cmd = NFSD_CMD_RPC_STATUS_GET,
- .start = nfsd_nl_rpc_status_get_start,
.dumpit = nfsd_nl_rpc_status_get_dumpit,
- .done = nfsd_nl_rpc_status_get_done,
.flags = GENL_CMD_CAP_DUMP,
},
{
diff --git a/fs/nfsd/netlink.h b/fs/nfsd/netlink.h
index e3724637d64d..2e132ef328f8 100644
--- a/fs/nfsd/netlink.h
+++ b/fs/nfsd/netlink.h
@@ -15,9 +15,6 @@
extern const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1];
extern const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1];
-int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb);
-int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb);
-
int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 202140df8f82..c848ebe5d08f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1460,28 +1460,6 @@ static int create_proc_exports_entry(void)
unsigned int nfsd_net_id;
-/**
- * nfsd_nl_rpc_status_get_start - Prepare rpc_status_get dumpit
- * @cb: netlink metadata and command arguments
- *
- * Return values:
- * %0: The rpc_status_get command may proceed
- * %-ENODEV: There is no NFSD running in this namespace
- */
-int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb)
-{
- struct nfsd_net *nn = net_generic(sock_net(cb->skb->sk), nfsd_net_id);
- int ret = -ENODEV;
-
- mutex_lock(&nfsd_mutex);
- if (nn->nfsd_serv)
- ret = 0;
- else
- mutex_unlock(&nfsd_mutex);
-
- return ret;
-}
-
static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
struct netlink_callback *cb,
struct nfsd_genl_rqstp *rqstp)
@@ -1558,8 +1536,16 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct nfsd_net *nn = net_generic(sock_net(skb->sk), nfsd_net_id);
int i, ret, rqstp_index = 0;
+ struct nfsd_net *nn;
+
+ mutex_lock(&nfsd_mutex);
+
+ nn = net_generic(sock_net(skb->sk), nfsd_net_id);
+ if (!nn->nfsd_serv) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
rcu_read_lock();
@@ -1636,22 +1622,10 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
ret = skb->len;
out:
rcu_read_unlock();
-
- return ret;
-}
-
-/**
- * nfsd_nl_rpc_status_get_done - rpc_status_get dumpit post-processing
- * @cb: netlink metadata and command arguments
- *
- * Return values:
- * %0: Success
- */
-int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb)
-{
+out_unlock:
mutex_unlock(&nfsd_mutex);
- return 0;
+ return ret;
}
/**
@@ -2195,6 +2169,8 @@ static __net_init int nfsd_net_init(struct net *net)
nn->nfsd_svcstats.program = &nfsd_program;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
+ nn->nfsd_info.mutex = &nfsd_mutex;
+ nn->nfsd_serv = NULL;
nfsd4_init_leases_net(nn);
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cd9a6a1a9fc8..89d7918de7b1 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -672,7 +672,6 @@ int nfsd_create_serv(struct net *net)
return error;
}
spin_lock(&nfsd_notifier_lock);
- nn->nfsd_info.mutex = &nfsd_mutex;
nn->nfsd_serv = serv;
spin_unlock(&nfsd_notifier_lock);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f0467d3b3c88..6be175a1ab3c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2366,6 +2366,11 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
}
list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
+ ret = ocfs2_assure_trans_credits(handle, credits);
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
ret = ocfs2_mark_extent_written(inode, &et, handle,
ue->ue_cpos, 1,
ue->ue_phys,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 86807086b2df..530fba34f6d3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -446,6 +446,23 @@ bail:
}
/*
+ * Make sure handle has at least 'nblocks' credits available. If it does not
+ * have that many credits available, we will try to extend the handle to have
+ * enough credits. If that fails, we will restart transaction to have enough
+ * credits. Similar notes regarding data consistency and locking implications
+ * as for ocfs2_extend_trans() apply here.
+ */
+int ocfs2_assure_trans_credits(handle_t *handle, int nblocks)
+{
+ int old_nblks = jbd2_handle_buffer_credits(handle);
+
+ trace_ocfs2_assure_trans_credits(old_nblks);
+ if (old_nblks >= nblocks)
+ return 0;
+ return ocfs2_extend_trans(handle, nblocks - old_nblks);
+}
+
+/*
* If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA.
* If that fails, restart the transaction & regain write access for the
* buffer head which is used for metadata modifications.
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 41c9fe7e62f9..e3c3a35dc5e0 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -243,6 +243,8 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb,
int ocfs2_commit_trans(struct ocfs2_super *osb,
handle_t *handle);
int ocfs2_extend_trans(handle_t *handle, int nblocks);
+int ocfs2_assure_trans_credits(handle_t *handle,
+ int nblocks);
int ocfs2_allocate_extend_trans(handle_t *handle,
int thresh);
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index 60e208b01c8d..0511c69c9fde 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -2577,6 +2577,8 @@ DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_commit_cache_end);
DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans);
+DEFINE_OCFS2_INT_EVENT(ocfs2_assure_trans_credits);
+
DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart);
DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans);
diff --git a/fs/open.c b/fs/open.c
index 89cafb572061..50e45bc7c4d8 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -202,13 +202,13 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
return error;
}
-SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
+SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length)
{
return do_sys_ftruncate(fd, length, 1);
}
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
+COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length)
{
return do_sys_ftruncate(fd, length, 1);
}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 116f542442dd..ab65e98a1def 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -1314,10 +1314,6 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
int flags = file->f_flags | OVL_OPEN_FLAGS;
int err;
- err = ovl_copy_up(dentry->d_parent);
- if (err)
- return err;
-
old_cred = ovl_override_creds(dentry->d_sb);
err = ovl_setup_cred_for_create(dentry, inode, mode, old_cred);
if (err)
@@ -1360,6 +1356,10 @@ static int ovl_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
if (!OVL_FS(dentry->d_sb)->tmpfile)
return -EOPNOTSUPP;
+ err = ovl_copy_up(dentry->d_parent);
+ if (err)
+ return err;
+
err = ovl_want_write(dentry);
if (err)
return err;
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 063409069f56..5868cb222955 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -181,6 +181,10 @@ static int ovl_check_encode_origin(struct dentry *dentry)
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
bool decodable = ofs->config.nfs_export;
+ /* No upper layer? */
+ if (!ovl_upper_mnt(ofs))
+ return 1;
+
/* Lower file handle for non-upper non-decodable */
if (!ovl_dentry_upper(dentry) && !decodable)
return 1;
@@ -209,7 +213,7 @@ static int ovl_check_encode_origin(struct dentry *dentry)
* ovl_connect_layer() will try to make origin's layer "connected" by
* copying up a "connectable" ancestor.
*/
- if (d_is_dir(dentry) && ovl_upper_mnt(ofs) && decodable)
+ if (d_is_dir(dentry) && decodable)
return ovl_connect_layer(dentry);
/* Lower file handle for indexed and non-upper dir/non-dir */
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f8d35f993fe5..71e5039d940d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -707,6 +707,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
#ifdef CONFIG_X86_USER_SHADOW_STACK
[ilog2(VM_SHADOW_STACK)] = "ss",
#endif
+#ifdef CONFIG_64BIT
+ [ilog2(VM_SEALED)] = "sl",
+#endif
};
size_t i;
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index bb86fc0641d8..6397fdefd876 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -134,7 +134,7 @@ module_param(enable_oplocks, bool, 0644);
MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1");
module_param(enable_gcm_256, bool, 0644);
-MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: n/N/0");
+MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/0");
module_param(require_gcm_256, bool, 0644);
MODULE_PARM_DESC(require_gcm_256, "Require strongest (256 bit) GCM encryption. Default: n/N/0");
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 73482734a8d8..557b68e99d0a 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1494,6 +1494,8 @@ struct cifs_aio_ctx {
struct cifs_io_request {
struct netfs_io_request rreq;
struct cifsFileInfo *cfile;
+ struct TCP_Server_Info *server;
+ pid_t pid;
};
/* asynchronous read support */
@@ -1504,7 +1506,6 @@ struct cifs_io_subrequest {
struct cifs_io_request *req;
};
ssize_t got_bytes;
- pid_t pid;
unsigned int xid;
int result;
bool have_xid;
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 25e9ab947c17..595c4b673707 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1345,8 +1345,8 @@ cifs_async_readv(struct cifs_io_subrequest *rdata)
if (rc)
return rc;
- smb->hdr.Pid = cpu_to_le16((__u16)rdata->pid);
- smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16));
+ smb->hdr.Pid = cpu_to_le16((__u16)rdata->req->pid);
+ smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->req->pid >> 16));
smb->AndXCommand = 0xFF; /* none */
smb->Fid = rdata->req->cfile->fid.netfid;
@@ -1689,8 +1689,8 @@ cifs_async_writev(struct cifs_io_subrequest *wdata)
if (rc)
goto async_writev_out;
- smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid);
- smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16));
+ smb->hdr.Pid = cpu_to_le16((__u16)wdata->req->pid);
+ smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->req->pid >> 16));
smb->AndXCommand = 0xFF; /* none */
smb->Fid = wdata->req->cfile->fid.netfid;
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 9d5c2440abfc..f1f2573bb18d 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -134,17 +134,15 @@ fail:
static bool cifs_clamp_length(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
- struct TCP_Server_Info *server;
struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq);
struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq);
+ struct TCP_Server_Info *server = req->server;
struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
size_t rsize = 0;
int rc;
rdata->xid = get_xid();
rdata->have_xid = true;
-
- server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses);
rdata->server = server;
if (cifs_sb->ctx->rsize == 0)
@@ -179,15 +177,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq)
struct netfs_io_request *rreq = subreq->rreq;
struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq);
struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq);
- struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
- pid_t pid;
int rc = 0;
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
- pid = req->cfile->pid;
- else
- pid = current->tgid; // Ummm... This may be a workqueue
-
cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n",
__func__, rreq->debug_id, subreq->debug_index, rreq->mapping,
subreq->transferred, subreq->len);
@@ -201,16 +192,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq)
}
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
- rdata->pid = pid;
-
- rc = adjust_credits(rdata->server, &rdata->credits, rdata->subreq.len);
- if (!rc) {
- if (rdata->req->cfile->invalidHandle)
- rc = -EAGAIN;
- else
- rc = rdata->server->ops->async_readv(rdata);
- }
+ rc = rdata->server->ops->async_readv(rdata);
out:
if (rc)
netfs_subreq_terminated(subreq, rc, false);
@@ -245,11 +228,15 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file)
rreq->rsize = cifs_sb->ctx->rsize;
rreq->wsize = cifs_sb->ctx->wsize;
+ req->pid = current->tgid; // Ummm... This may be a workqueue
if (file) {
open_file = file->private_data;
rreq->netfs_priv = file->private_data;
req->cfile = cifsFileInfo_get(open_file);
+ req->server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
+ req->pid = req->cfile->pid;
} else if (rreq->origin != NETFS_WRITEBACK) {
WARN_ON_ONCE(1);
return -EIO;
@@ -3200,8 +3187,6 @@ static int cifs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t ret;
- WARN_ON_ONCE(iov_iter_count(iter) != PAGE_SIZE);
-
if (iov_iter_rw(iter) == READ)
ret = netfs_unbuffered_read_iter_locked(iocb, iter);
else
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 38a06e8a0f90..2ae2dbb6202b 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4484,6 +4484,16 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
return rc;
}
+static void smb2_readv_worker(struct work_struct *work)
+{
+ struct cifs_io_subrequest *rdata =
+ container_of(work, struct cifs_io_subrequest, subreq.work);
+
+ netfs_subreq_terminated(&rdata->subreq,
+ (rdata->result == 0 || rdata->result == -EAGAIN) ?
+ rdata->got_bytes : rdata->result, true);
+}
+
static void
smb2_readv_callback(struct mid_q_entry *mid)
{
@@ -4578,9 +4588,8 @@ smb2_readv_callback(struct mid_q_entry *mid)
rdata->result = 0;
}
rdata->credits.value = 0;
- netfs_subreq_terminated(&rdata->subreq,
- (rdata->result == 0 || rdata->result == -EAGAIN) ?
- rdata->got_bytes : rdata->result, true);
+ INIT_WORK(&rdata->subreq.work, smb2_readv_worker);
+ queue_work(cifsiod_wq, &rdata->subreq.work);
release_mid(mid);
add_credits(server, &credits, 0);
}
@@ -4612,7 +4621,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata)
io_parms.length = rdata->subreq.len;
io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid;
io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid;
- io_parms.pid = rdata->pid;
+ io_parms.pid = rdata->req->pid;
rc = smb2_new_read_req(
(void **) &buf, &total_len, &io_parms, rdata, 0, 0);
@@ -4864,7 +4873,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata)
.length = wdata->subreq.len,
.persistent_fid = wdata->req->cfile->fid.persistent_fid,
.volatile_fid = wdata->req->cfile->fid.volatile_fid,
- .pid = wdata->pid,
+ .pid = wdata->req->pid,
};
io_parms = &_io_parms;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 58fb7a5062e1..f36091e1e7f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2548,11 +2548,26 @@ xfs_ifree_cluster(
* This buffer may not have been correctly initialised as we
* didn't read it from disk. That's not important because we are
* only using to mark the buffer as stale in the log, and to
- * attach stale cached inodes on it. That means it will never be
- * dispatched for IO. If it is, we want to know about it, and we
- * want it to fail. We can acheive this by adding a write
- * verifier to the buffer.
+ * attach stale cached inodes on it.
+ *
+ * For the inode that triggered the cluster freeing, this
+ * attachment may occur in xfs_inode_item_precommit() after we
+ * have marked this buffer stale. If this buffer was not in
+ * memory before xfs_ifree_cluster() started, it will not be
+ * marked XBF_DONE and this will cause problems later in
+ * xfs_inode_item_precommit() when we trip over a (stale, !done)
+ * buffer to attached to the transaction.
+ *
+ * Hence we have to mark the buffer as XFS_DONE here. This is
+ * safe because we are also marking the buffer as XBF_STALE and
+ * XFS_BLI_STALE. That means it will never be dispatched for
+ * IO and it won't be unlocked until the cluster freeing has
+ * been committed to the journal and the buffer unpinned. If it
+ * is written, we want to know about it, and we want it to
+ * fail. We can acheive this by adding a write verifier to the
+ * buffer.
*/
+ bp->b_flags |= XBF_DONE;
bp->b_ops = &xfs_inode_buf_ops;
/*