aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c288
1 files changed, 190 insertions, 98 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index aaadee0969c9..b49ccf5c1d75 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -268,7 +268,7 @@ xlog_grant_head_wait(
__set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock(&head->lock);
- XFS_STATS_INC(xs_sleep_logspace);
+ XFS_STATS_INC(log->l_mp, xs_sleep_logspace);
trace_xfs_log_grant_sleep(log, tic);
schedule();
@@ -379,7 +379,7 @@ xfs_log_regrant(
if (XLOG_FORCED_SHUTDOWN(log))
return -EIO;
- XFS_STATS_INC(xs_try_logspace);
+ XFS_STATS_INC(mp, xs_try_logspace);
/*
* This is a new transaction on the ticket, so we need to change the
@@ -448,7 +448,7 @@ xfs_log_reserve(
if (XLOG_FORCED_SHUTDOWN(log))
return -EIO;
- XFS_STATS_INC(xs_try_logspace);
+ XFS_STATS_INC(mp, xs_try_logspace);
ASSERT(*ticp == NULL);
tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
@@ -1188,10 +1188,16 @@ xlog_iodone(xfs_buf_t *bp)
int aborted = 0;
/*
- * Race to shutdown the filesystem if we see an error.
+ * Race to shutdown the filesystem if we see an error or the iclog is in
+ * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
+ * CRC errors into log recovery.
*/
- if (XFS_TEST_ERROR(bp->b_error, l->l_mp,
- XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
+ if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
+ XFS_RANDOM_IODONE_IOERR) ||
+ iclog->ic_state & XLOG_STATE_IOABORT) {
+ if (iclog->ic_state & XLOG_STATE_IOABORT)
+ iclog->ic_state &= ~XLOG_STATE_IOABORT;
+
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_stale(bp);
xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
@@ -1206,7 +1212,7 @@ xlog_iodone(xfs_buf_t *bp)
}
/* log I/O is always issued ASYNC */
- ASSERT(XFS_BUF_ISASYNC(bp));
+ ASSERT(bp->b_flags & XBF_ASYNC);
xlog_state_done_syncing(iclog, aborted);
/*
@@ -1768,7 +1774,7 @@ xlog_sync(
int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
int size;
- XFS_STATS_INC(xs_log_writes);
+ XFS_STATS_INC(log->l_mp, xs_log_writes);
ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
/* Add for LR header */
@@ -1805,7 +1811,7 @@ xlog_sync(
bp = iclog->ic_bp;
XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
- XFS_STATS_ADD(xs_log_blocks, BTOBB(count));
+ XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
/* Do we need to split this write into 2 parts? */
if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
@@ -1838,12 +1844,28 @@ xlog_sync(
/* calculcate the checksum */
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
iclog->ic_datap, size);
+#ifdef DEBUG
+ /*
+ * Intentionally corrupt the log record CRC based on the error injection
+ * frequency, if defined. This facilitates testing log recovery in the
+ * event of torn writes. Hence, set the IOABORT state to abort the log
+ * write on I/O completion and shutdown the fs. The subsequent mount
+ * detects the bad CRC and attempts to recover.
+ */
+ if (log->l_badcrc_factor &&
+ (prandom_u32() % log->l_badcrc_factor == 0)) {
+ iclog->ic_header.h_crc &= 0xAAAAAAAA;
+ iclog->ic_state |= XLOG_STATE_IOABORT;
+ xfs_warn(log->l_mp,
+ "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
+ be64_to_cpu(iclog->ic_header.h_lsn));
+ }
+#endif
bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
- XFS_BUF_ZEROFLAGS(bp);
- XFS_BUF_ASYNC(bp);
- bp->b_flags |= XBF_SYNCIO;
+ bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
+ bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
bp->b_flags |= XBF_FUA;
@@ -1870,12 +1892,11 @@ xlog_sync(
/* account for log which doesn't start at block #0 */
XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
+
/*
* Don't call xfs_bwrite here. We do log-syncs even when the filesystem
* is shutting down.
*/
- XFS_BUF_WRITE(bp);
-
error = xlog_bdstrat(bp);
if (error) {
xfs_buf_ioerror_alert(bp, "xlog_sync");
@@ -1887,9 +1908,8 @@ xlog_sync(
xfs_buf_associate_memory(bp,
(char *)&iclog->ic_header + count, split);
bp->b_fspriv = iclog;
- XFS_BUF_ZEROFLAGS(bp);
- XFS_BUF_ASYNC(bp);
- bp->b_flags |= XBF_SYNCIO;
+ bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
+ bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
bp->b_flags |= XBF_FUA;
@@ -1898,7 +1918,6 @@ xlog_sync(
/* account for internal log which doesn't start at block #0 */
XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
- XFS_BUF_WRITE(bp);
error = xlog_bdstrat(bp);
if (error) {
xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
@@ -1989,75 +2008,81 @@ xlog_print_tic_res(
uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
/* match with XLOG_REG_TYPE_* in xfs_log.h */
- static char *res_type_str[XLOG_REG_TYPE_MAX] = {
- "bformat",
- "bchunk",
- "efi_format",
- "efd_format",
- "iformat",
- "icore",
- "iext",
- "ibroot",
- "ilocal",
- "iattr_ext",
- "iattr_broot",
- "iattr_local",
- "qformat",
- "dquot",
- "quotaoff",
- "LR header",
- "unmount",
- "commit",
- "trans header"
+#define REG_TYPE_STR(type, str) [XLOG_REG_TYPE_##type] = str
+ static char *res_type_str[XLOG_REG_TYPE_MAX + 1] = {
+ REG_TYPE_STR(BFORMAT, "bformat"),
+ REG_TYPE_STR(BCHUNK, "bchunk"),
+ REG_TYPE_STR(EFI_FORMAT, "efi_format"),
+ REG_TYPE_STR(EFD_FORMAT, "efd_format"),
+ REG_TYPE_STR(IFORMAT, "iformat"),
+ REG_TYPE_STR(ICORE, "icore"),
+ REG_TYPE_STR(IEXT, "iext"),
+ REG_TYPE_STR(IBROOT, "ibroot"),
+ REG_TYPE_STR(ILOCAL, "ilocal"),
+ REG_TYPE_STR(IATTR_EXT, "iattr_ext"),
+ REG_TYPE_STR(IATTR_BROOT, "iattr_broot"),
+ REG_TYPE_STR(IATTR_LOCAL, "iattr_local"),
+ REG_TYPE_STR(QFORMAT, "qformat"),
+ REG_TYPE_STR(DQUOT, "dquot"),
+ REG_TYPE_STR(QUOTAOFF, "quotaoff"),
+ REG_TYPE_STR(LRHEADER, "LR header"),
+ REG_TYPE_STR(UNMOUNT, "unmount"),
+ REG_TYPE_STR(COMMIT, "commit"),
+ REG_TYPE_STR(TRANSHDR, "trans header"),
+ REG_TYPE_STR(ICREATE, "inode create")
};
+#undef REG_TYPE_STR
+#define TRANS_TYPE_STR(type) [XFS_TRANS_##type] = #type
static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
- "SETATTR_NOT_SIZE",
- "SETATTR_SIZE",
- "INACTIVE",
- "CREATE",
- "CREATE_TRUNC",
- "TRUNCATE_FILE",
- "REMOVE",
- "LINK",
- "RENAME",
- "MKDIR",
- "RMDIR",
- "SYMLINK",
- "SET_DMATTRS",
- "GROWFS",
- "STRAT_WRITE",
- "DIOSTRAT",
- "WRITE_SYNC",
- "WRITEID",
- "ADDAFORK",
- "ATTRINVAL",
- "ATRUNCATE",
- "ATTR_SET",
- "ATTR_RM",
- "ATTR_FLAG",
- "CLEAR_AGI_BUCKET",
- "QM_SBCHANGE",
- "DUMMY1",
- "DUMMY2",
- "QM_QUOTAOFF",
- "QM_DQALLOC",
- "QM_SETQLIM",
- "QM_DQCLUSTER",
- "QM_QINOCREATE",
- "QM_QUOTAOFF_END",
- "SB_UNIT",
- "FSYNC_TS",
- "GROWFSRT_ALLOC",
- "GROWFSRT_ZERO",
- "GROWFSRT_FREE",
- "SWAPEXT"
+ TRANS_TYPE_STR(SETATTR_NOT_SIZE),
+ TRANS_TYPE_STR(SETATTR_SIZE),
+ TRANS_TYPE_STR(INACTIVE),
+ TRANS_TYPE_STR(CREATE),
+ TRANS_TYPE_STR(CREATE_TRUNC),
+ TRANS_TYPE_STR(TRUNCATE_FILE),
+ TRANS_TYPE_STR(REMOVE),
+ TRANS_TYPE_STR(LINK),
+ TRANS_TYPE_STR(RENAME),
+ TRANS_TYPE_STR(MKDIR),
+ TRANS_TYPE_STR(RMDIR),
+ TRANS_TYPE_STR(SYMLINK),
+ TRANS_TYPE_STR(SET_DMATTRS),
+ TRANS_TYPE_STR(GROWFS),
+ TRANS_TYPE_STR(STRAT_WRITE),
+ TRANS_TYPE_STR(DIOSTRAT),
+ TRANS_TYPE_STR(WRITEID),
+ TRANS_TYPE_STR(ADDAFORK),
+ TRANS_TYPE_STR(ATTRINVAL),
+ TRANS_TYPE_STR(ATRUNCATE),
+ TRANS_TYPE_STR(ATTR_SET),
+ TRANS_TYPE_STR(ATTR_RM),
+ TRANS_TYPE_STR(ATTR_FLAG),
+ TRANS_TYPE_STR(CLEAR_AGI_BUCKET),
+ TRANS_TYPE_STR(SB_CHANGE),
+ TRANS_TYPE_STR(DUMMY1),
+ TRANS_TYPE_STR(DUMMY2),
+ TRANS_TYPE_STR(QM_QUOTAOFF),
+ TRANS_TYPE_STR(QM_DQALLOC),
+ TRANS_TYPE_STR(QM_SETQLIM),
+ TRANS_TYPE_STR(QM_DQCLUSTER),
+ TRANS_TYPE_STR(QM_QINOCREATE),
+ TRANS_TYPE_STR(QM_QUOTAOFF_END),
+ TRANS_TYPE_STR(FSYNC_TS),
+ TRANS_TYPE_STR(GROWFSRT_ALLOC),
+ TRANS_TYPE_STR(GROWFSRT_ZERO),
+ TRANS_TYPE_STR(GROWFSRT_FREE),
+ TRANS_TYPE_STR(SWAPEXT),
+ TRANS_TYPE_STR(CHECKPOINT),
+ TRANS_TYPE_STR(ICREATE),
+ TRANS_TYPE_STR(CREATE_TMPFILE)
};
+#undef TRANS_TYPE_STR
xfs_warn(mp, "xlog_write: reservation summary:");
xfs_warn(mp, " trans type = %s (%u)",
((ticket->t_trans_type <= 0 ||
ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
- "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
+ "bad-trans-type" : trans_type_str[ticket->t_trans_type]),
ticket->t_trans_type);
xfs_warn(mp, " unit res = %d bytes",
ticket->t_unit_res);
@@ -2076,7 +2101,7 @@ xlog_print_tic_res(
uint r_type = ticket->t_res_arr[i].r_type;
xfs_warn(mp, "region[%u]: %s - %u bytes", i,
((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
- "bad-rtype" : res_type_str[r_type-1]),
+ "bad-rtype" : res_type_str[r_type]),
ticket->t_res_arr[i].r_len);
}
@@ -2422,11 +2447,20 @@ xlog_write(
&partial_copy_len);
xlog_verify_dest_ptr(log, ptr);
- /* copy region */
+ /*
+ * Copy region.
+ *
+ * Unmount records just log an opheader, so can have
+ * empty payloads with no data region to copy. Hence we
+ * only copy the payload if the vector says it has data
+ * to copy.
+ */
ASSERT(copy_len >= 0);
- memcpy(ptr, reg->i_addr + copy_off, copy_len);
- xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
-
+ if (copy_len > 0) {
+ memcpy(ptr, reg->i_addr + copy_off, copy_len);
+ xlog_write_adv_cnt(&ptr, &len, &log_offset,
+ copy_len);
+ }
copy_len += start_rec_copy + sizeof(xlog_op_header_t);
record_cnt++;
data_cnt += contwr ? copy_len : 0;
@@ -2782,11 +2816,19 @@ xlog_state_do_callback(
}
} while (!ioerrors && loopdidcallbacks);
+#ifdef DEBUG
/*
- * make one last gasp attempt to see if iclogs are being left in
- * limbo..
+ * Make one last gasp attempt to see if iclogs are being left in limbo.
+ * If the above loop finds an iclog earlier than the current iclog and
+ * in one of the syncing states, the current iclog is put into
+ * DO_CALLBACK and the callbacks are deferred to the completion of the
+ * earlier iclog. Walk the iclogs in order and make sure that no iclog
+ * is in DO_CALLBACK unless an earlier iclog is in one of the syncing
+ * states.
+ *
+ * Note that SYNCING|IOABORT is a valid state so we cannot just check
+ * for ic_state == SYNCING.
*/
-#ifdef DEBUG
if (funcdidcallbacks) {
first_iclog = iclog = log->l_iclog;
do {
@@ -2801,7 +2843,7 @@ xlog_state_do_callback(
* IOERROR - give up hope all ye who enter here
*/
if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
- iclog->ic_state == XLOG_STATE_SYNCING ||
+ iclog->ic_state & XLOG_STATE_SYNCING ||
iclog->ic_state == XLOG_STATE_DONE_SYNC ||
iclog->ic_state == XLOG_STATE_IOERROR )
break;
@@ -2913,7 +2955,7 @@ restart:
iclog = log->l_iclog;
if (iclog->ic_state != XLOG_STATE_ACTIVE) {
- XFS_STATS_INC(xs_log_noiclogs);
+ XFS_STATS_INC(log->l_mp, xs_log_noiclogs);
/* Wait for log writes to have flushed */
xlog_wait(&log->l_flush_wait, &log->l_icloglock);
@@ -3165,11 +3207,19 @@ xlog_state_switch_iclogs(
}
if (log->l_curr_block >= log->l_logBBsize) {
+ /*
+ * Rewind the current block before the cycle is bumped to make
+ * sure that the combined LSN never transiently moves forward
+ * when the log wraps to the next cycle. This is to support the
+ * unlocked sample of these fields from xlog_valid_lsn(). Most
+ * other cases should acquire l_icloglock.
+ */
+ log->l_curr_block -= log->l_logBBsize;
+ ASSERT(log->l_curr_block >= 0);
+ smp_wmb();
log->l_curr_cycle++;
if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM)
log->l_curr_cycle++;
- log->l_curr_block -= log->l_logBBsize;
- ASSERT(log->l_curr_block >= 0);
}
ASSERT(iclog == log->l_iclog);
log->l_iclog = iclog->ic_next;
@@ -3212,7 +3262,7 @@ _xfs_log_force(
struct xlog_in_core *iclog;
xfs_lsn_t lsn;
- XFS_STATS_INC(xs_log_force);
+ XFS_STATS_INC(mp, xs_log_force);
xlog_cil_force(log);
@@ -3297,7 +3347,7 @@ maybe_sleep:
spin_unlock(&log->l_icloglock);
return -EIO;
}
- XFS_STATS_INC(xs_log_force_sleep);
+ XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
@@ -3362,7 +3412,7 @@ _xfs_log_force_lsn(
ASSERT(lsn != 0);
- XFS_STATS_INC(xs_log_force);
+ XFS_STATS_INC(mp, xs_log_force);
lsn = xlog_cil_force_lsn(log, lsn);
if (lsn == NULLCOMMITLSN)
@@ -3411,7 +3461,7 @@ try_again:
(XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
- XFS_STATS_INC(xs_log_force_sleep);
+ XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_prev->ic_write_wait,
&log->l_icloglock);
@@ -3441,7 +3491,7 @@ try_again:
spin_unlock(&log->l_icloglock);
return -EIO;
}
- XFS_STATS_INC(xs_log_force_sleep);
+ XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
@@ -3929,7 +3979,7 @@ xfs_log_force_umount(
log->l_flags & XLOG_ACTIVE_RECOVERY) {
mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
if (mp->m_sb_bp)
- XFS_BUF_DONE(mp->m_sb_bp);
+ mp->m_sb_bp->b_flags |= XBF_DONE;
return 0;
}
@@ -3959,7 +4009,7 @@ xfs_log_force_umount(
spin_lock(&log->l_icloglock);
mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
if (mp->m_sb_bp)
- XFS_BUF_DONE(mp->m_sb_bp);
+ mp->m_sb_bp->b_flags |= XBF_DONE;
/*
* Mark the log and the iclogs with IO error flags to prevent any
@@ -4023,3 +4073,45 @@ xlog_iclogs_empty(
return 1;
}
+/*
+ * Verify that an LSN stamped into a piece of metadata is valid. This is
+ * intended for use in read verifiers on v5 superblocks.
+ */
+bool
+xfs_log_check_lsn(
+ struct xfs_mount *mp,
+ xfs_lsn_t lsn)
+{
+ struct xlog *log = mp->m_log;
+ bool valid;
+
+ /*
+ * norecovery mode skips mount-time log processing and unconditionally
+ * resets the in-core LSN. We can't validate in this mode, but
+ * modifications are not allowed anyways so just return true.
+ */
+ if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+ return true;
+
+ /*
+ * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is
+ * handled by recovery and thus safe to ignore here.
+ */
+ if (lsn == NULLCOMMITLSN)
+ return true;
+
+ valid = xlog_valid_lsn(mp->m_log, lsn);
+
+ /* warn the user about what's gone wrong before verifier failure */
+ if (!valid) {
+ spin_lock(&log->l_icloglock);
+ xfs_warn(mp,
+"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). "
+"Please unmount and run xfs_repair (>= v4.3) to resolve.",
+ CYCLE_LSN(lsn), BLOCK_LSN(lsn),
+ log->l_curr_cycle, log->l_curr_block);
+ spin_unlock(&log->l_icloglock);
+ }
+
+ return valid;
+}