From 780f6a9afe8b0e303406a39f6968cf1daa6c3d51 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 4 Jan 2023 13:20:52 -0800
Subject: lib: zstd: Fix -Wstringop-overflow warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following -Wstringop-overflow warning when building with GCC 11+:

lib/zstd/decompress/huf_decompress.c: In function ‘HUF_readDTableX2_wksp’:
lib/zstd/decompress/huf_decompress.c:700:5: warning: ‘HUF_fillDTableX2.constprop’ accessing 624 bytes in a region of size 52 [-Wstringop-overflow=]
  700 |     HUF_fillDTableX2(dt, maxTableLog,
      |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  701 |                    wksp->sortedSymbol, sizeOfSort,
      |                    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  702 |                    wksp->rankStart0, wksp->rankVal, maxW,
      |                    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  703 |                    tableLog+1,
      |                    ~~~~~~~~~~~
  704 |                    wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
      |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
lib/zstd/decompress/huf_decompress.c:700:5: note: referencing argument 6 of type ‘U32 (*)[13]’ {aka ‘unsigned int (*)[13]’}
lib/zstd/decompress/huf_decompress.c:571:13: note: in a call to function ‘HUF_fillDTableX2.constprop’
  571 | static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
      |             ^~~~~~~~~~~~~~~~

by using pointer notation instead of array notation.

This is one of the last remaining warnings to be fixed before globally
enabling -Wstringop-overflow.

Co-developed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Nick Terrell <terrelln@fb.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 lib/zstd/decompress/huf_decompress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c
index 89b269a641c7..60958afebc41 100644
--- a/lib/zstd/decompress/huf_decompress.c
+++ b/lib/zstd/decompress/huf_decompress.c
@@ -985,7 +985,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32
 
 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
                            const sortedSymbol_t* sortedList,
-                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32* rankStart, rankValCol_t *rankValOrigin, const U32 maxWeight,
                            const U32 nbBitsBaseline)
 {
     U32* const rankVal = rankValOrigin[0];
-- 
cgit v1.2.3


From 038505c41f0aad26ef101f4f7f6e111531c3914f Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Wed, 15 Feb 2023 15:19:17 -0800
Subject: lib: zstd: Backport fix for in-place decompression

Backport the relevant part of upstream commit 5b266196 [0].

This fixes in-place decompression for x86-64 kernel decompression. It
uses a bound of 131072 + (uncompressed_size >> 8), which can be violated
after upstream commit 6a7ede3d [1], as zstd can use part of the output
buffer as temporary storage, and without this patch needs a bound of
~262144.

The fix is for zstd to detect that the input and output buffers overlap,
so that zstd knows it can't use the overlapping portion of the output
buffer as tempoary storage. If the margin is not large enough, this will
ensure that zstd will fail the decompression, rather than overwriting
part of the input data, and causing corruption.

This fix has been landed upstream and is in release v1.5.4. That commit
also adds unit and fuzz tests to verify that the margin we use is
respected, and correct. That means that the fix is well tested upstream.

I have not been able to reproduce the potential bug in x86-64 kernel
decompression locally, nor have I recieved reports of failures to
decompress the kernel. It is possible that compression saves enough
space to make it very hard for the issue to appear.

I've boot tested the zstd compressed kernel on x86-64 and i386 with this
patch, which uses in-place decompression, and sanity tested zstd compression
in btrfs / squashfs to make sure that we don't see any issues, but other
uses of zstd shouldn't be affected, because they don't use in-place
decompression.

Thanks to Vasily Gorbik <gor@linux.ibm.com> for debugging a related issue
on s390, which was triggered by the same commit, but was a bug in how
__decompress() was called [2]. And to Sasha Levin <sashal@kernel.org>
for the CC alerting me of the issue.

[0] https://github.com/facebook/zstd/commit/5b266196a41e6a15e21bd4f0eeab43b938db1d90
[1] https://github.com/facebook/zstd/commit/6a7ede3dfccbf3e0a5928b4224a039c260dcff72
[2] https://lore.kernel.org/r/patch-1.thread-41c676.git-41c676c2d153.your-ad-here.call-01675030179-ext-9637@work.hours

CC: Vasily Gorbik <gor@linux.ibm.com>
CC: Heiko Carstens <hca@linux.ibm.com>
CC: Sasha Levin <sashal@kernel.org>
CC: Yann Collet <cyan@fb.com>
Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 lib/zstd/decompress/zstd_decompress.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
index b9b935a9f5c0..6b3177c94711 100644
--- a/lib/zstd/decompress/zstd_decompress.c
+++ b/lib/zstd/decompress/zstd_decompress.c
@@ -798,7 +798,7 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
         if (srcSize == 0) return 0;
         RETURN_ERROR(dstBuffer_null, "");
     }
-    ZSTD_memcpy(dst, src, srcSize);
+    ZSTD_memmove(dst, src, srcSize);
     return srcSize;
 }
 
@@ -858,6 +858,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
 
     /* Loop on each block */
     while (1) {
+        BYTE* oBlockEnd = oend;
         size_t decodedSize;
         blockProperties_t blockProperties;
         size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
@@ -867,16 +868,34 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         remainingSrcSize -= ZSTD_blockHeaderSize;
         RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
 
+        if (ip >= op && ip < oBlockEnd) {
+            /* We are decompressing in-place. Limit the output pointer so that we
+             * don't overwrite the block that we are currently reading. This will
+             * fail decompression if the input & output pointers aren't spaced
+             * far enough apart.
+             *
+             * This is important to set, even when the pointers are far enough
+             * apart, because ZSTD_decompressBlock_internal() can decide to store
+             * literals in the output buffer, after the block it is decompressing.
+             * Since we don't want anything to overwrite our input, we have to tell
+             * ZSTD_decompressBlock_internal to never write past ip.
+             *
+             * See ZSTD_allocateLiteralsBuffer() for reference.
+             */
+            oBlockEnd = op + (ip - op);
+        }
+
         switch(blockProperties.blockType)
         {
         case bt_compressed:
-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming);
             break;
         case bt_raw :
+            /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
             decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
             break;
         case bt_rle :
-            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
+            decodedSize = ZSTD_setRleBlock(op, (size_t)(oBlockEnd-op), *ip, blockProperties.origSize);
             break;
         case bt_reserved :
         default:
-- 
cgit v1.2.3


From 6906598f1ce93761716d780b6e3f171e13f0f4ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sun, 29 Jan 2023 14:14:36 +0100
Subject: zstd: Fix definition of assert()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

assert(x) should emit a warning if x is false. WARN_ON(x) emits a
warning if x is true. Thus, assert(x) should be defined as WARN_ON(!x)
rather than WARN_ON(x).

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 lib/zstd/common/zstd_deps.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h
index 7a5bf44839c9..f06df065dec0 100644
--- a/lib/zstd/common/zstd_deps.h
+++ b/lib/zstd/common/zstd_deps.h
@@ -84,7 +84,7 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
 
 #include <linux/kernel.h>
 
-#define assert(x) WARN_ON((x))
+#define assert(x) WARN_ON(!(x))
 
 #endif /* ZSTD_DEPS_ASSERT */
 #endif /* ZSTD_DEPS_NEED_ASSERT */
-- 
cgit v1.2.3


From 1470afefc3c42df5d1662f87d079b46651bdc95b Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 15 Mar 2023 17:31:02 -0700
Subject: cpumask: introduce for_each_cpu_or

Equivalent of for_each_cpu_and, except it ORs the two masks together
so it iterates all the CPUs present in either mask.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 lib/find_bit.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib')

diff --git a/lib/find_bit.c b/lib/find_bit.c
index c10920e66788..32f99e9a670e 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -182,6 +182,15 @@ unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned l
 EXPORT_SYMBOL(_find_next_andnot_bit);
 #endif
 
+#ifndef find_next_or_bit
+unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2,
+					unsigned long nbits, unsigned long start)
+{
+	return FIND_NEXT_BIT(addr1[idx] | addr2[idx], /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_or_bit);
+#endif
+
 #ifndef find_next_zero_bit
 unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
 					 unsigned long start)
-- 
cgit v1.2.3


From 8b57b11cca88f397035a95b9e12b03511847b0e8 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 15 Mar 2023 17:31:02 -0700
Subject: pcpcntrs: fix dying cpu summation race

In commit f689054aace2 ("percpu_counter: add percpu_counter_sum_all
interface") a race condition between a cpu dying and
percpu_counter_sum() iterating online CPUs was identified. The
solution was to iterate all possible CPUs for summation via
percpu_counter_sum_all().

We recently had a percpu_counter_sum() call in XFS trip over this
same race condition and it fired a debug assert because the
filesystem was unmounting and the counter *should* be zero just
before we destroy it. That was reported here:

https://lore.kernel.org/linux-kernel/20230314090649.326642-1-yebin@huaweicloud.com/

likely as a result of running generic/648 which exercises
filesystems in the presence of CPU online/offline events.

The solution to use percpu_counter_sum_all() is an awful one. We
use percpu counters and percpu_counter_sum() for accurate and
reliable threshold detection for space management, so a summation
race condition during these operations can result in overcommit of
available space and that may result in filesystem shutdowns.

As percpu_counter_sum_all() iterates all possible CPUs rather than
just those online or even those present, the mask can include CPUs
that aren't even installed in the machine, or in the case of
machines that can hot-plug CPU capable nodes, even have physical
sockets present in the machine.

Fundamentally, this race condition is caused by the CPU being
offlined being removed from the cpu_online_mask before the notifier
that cleans up per-cpu state is run. Hence percpu_counter_sum() will
not sum the count for a cpu currently being taken offline,
regardless of whether the notifier has run or not. This is
the root cause of the bug.

The percpu counter notifier iterates all the registered counters,
locks the counter and moves the percpu count to the global sum.
This is serialised against other operations that move the percpu
counter to the global sum as well as percpu_counter_sum() operations
that sum the percpu counts while holding the counter lock.

Hence the notifier is safe to run concurrently with sum operations,
and the only thing we actually need to care about is that
percpu_counter_sum() iterates dying CPUs. That's trivial to do,
and when there are no CPUs dying, it has no addition overhead except
for a cpumask_or() operation.

This change makes percpu_counter_sum() always do the right thing in
the presence of CPU hot unplug events and makes
percpu_counter_sum_all() unnecessary. This, in turn, means that
filesystems like XFS, ext4, and btrfs don't have to work out when
they should use percpu_counter_sum() vs percpu_counter_sum_all() in
their space accounting algorithms

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 lib/percpu_counter.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index dba56c5c1837..0e096311e0c0 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -131,7 +131,7 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc,
 
 	raw_spin_lock_irqsave(&fbc->lock, flags);
 	ret = fbc->count;
-	for_each_cpu(cpu, cpu_mask) {
+	for_each_cpu_or(cpu, cpu_online_mask, cpu_mask) {
 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
 	}
@@ -141,11 +141,20 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc,
 
 /*
  * Add up all the per-cpu counts, return the result.  This is a more accurate
- * but much slower version of percpu_counter_read_positive()
+ * but much slower version of percpu_counter_read_positive().
+ *
+ * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums
+ * from CPUs that are in the process of being taken offline. Dying cpus have
+ * been removed from the online mask, but may not have had the hotplug dead
+ * notifier called to fold the percpu count back into the global counter sum.
+ * By including dying CPUs in the iteration mask, we avoid this race condition
+ * so __percpu_counter_sum() just does the right thing when CPUs are being taken
+ * offline.
  */
 s64 __percpu_counter_sum(struct percpu_counter *fbc)
 {
-	return __percpu_counter_sum_mask(fbc, cpu_online_mask);
+
+	return __percpu_counter_sum_mask(fbc, cpu_dying_mask);
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
 
-- 
cgit v1.2.3


From e9b60c7f97130795c7aa81a649ae4b93a172a277 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 15 Mar 2023 17:31:03 -0700
Subject: pcpcntr: remove percpu_counter_sum_all()

percpu_counter_sum_all() is now redundant as the race condition it
was invented to handle is now dealt with by percpu_counter_sum()
directly and all users of percpu_counter_sum_all() have been
removed.

Remove it.

This effectively reverts the changes made in f689054aace2
("percpu_counter: add percpu_counter_sum_all interface") except for
the cpumask iteration that fixes percpu_counter_sum() made earlier
in this series.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 lib/percpu_counter.c | 40 +++++++++++-----------------------------
 1 file changed, 11 insertions(+), 29 deletions(-)

(limited to 'lib')

diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 0e096311e0c0..5004463c4f9f 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -122,23 +122,6 @@ void percpu_counter_sync(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(percpu_counter_sync);
 
-static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc,
-			      const struct cpumask *cpu_mask)
-{
-	s64 ret;
-	int cpu;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&fbc->lock, flags);
-	ret = fbc->count;
-	for_each_cpu_or(cpu, cpu_online_mask, cpu_mask) {
-		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
-		ret += *pcount;
-	}
-	raw_spin_unlock_irqrestore(&fbc->lock, flags);
-	return ret;
-}
-
 /*
  * Add up all the per-cpu counts, return the result.  This is a more accurate
  * but much slower version of percpu_counter_read_positive().
@@ -153,22 +136,21 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc,
  */
 s64 __percpu_counter_sum(struct percpu_counter *fbc)
 {
+	s64 ret;
+	int cpu;
+	unsigned long flags;
 
-	return __percpu_counter_sum_mask(fbc, cpu_dying_mask);
+	raw_spin_lock_irqsave(&fbc->lock, flags);
+	ret = fbc->count;
+	for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) {
+		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
+		ret += *pcount;
+	}
+	raw_spin_unlock_irqrestore(&fbc->lock, flags);
+	return ret;
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
 
-/*
- * This is slower version of percpu_counter_sum as it traverses all possible
- * cpus. Use this only in the cases where accurate data is needed in the
- * presense of CPUs getting offlined.
- */
-s64 percpu_counter_sum_all(struct percpu_counter *fbc)
-{
-	return __percpu_counter_sum_mask(fbc, cpu_possible_mask);
-}
-EXPORT_SYMBOL(percpu_counter_sum_all);
-
 int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 			  struct lock_class_key *key)
 {
-- 
cgit v1.2.3


From 0fa99fdfe1b38da396d0b2d1496a823bcd0ebea0 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Tue, 7 Mar 2023 13:02:46 -0500
Subject: maple_tree: fix mas_skip_node() end slot detection

Patch series "Fix mas_skip_node() for mas_empty_area()", v2.

mas_empty_area() was incorrectly returning an error when there was room.
The issue was tracked down to mas_skip_node() using the incorrect
end-of-slot count.  Instead of using the nodes hard limit, the limit of
data should be used.

mas_skip_node() was also setting the min and max to that of the child
node, which was unnecessary.  Within these limits being set, there was
also a bug that corrupted the maple state's max if the offset was set to
the maximum node pivot.  The bug was without consequence unless there was
a sufficient gap in the next child node which would cause an error to be
returned.

This patch set fixes these errors by removing the limit setting from
mas_skip_node() and uses the mas_data_end() for slot limits, and adds
tests for all failures discovered.


This patch (of 2):

mas_skip_node() is used to move the maple state to the node with a higher
limit.  It does this by walking up the tree and increasing the slot count.
Since slot count may not be able to be increased, it may need to walk up
multiple times to find room to walk right to a higher limit node.  The
limit of slots that was being used was the node limit and not the last
location of data in the node.  This would cause the maple state to be
shifted outside actual data and enter an error state, thus returning
-EBUSY.

The result of the incorrect error state means that mas_awalk() would
return an error instead of finding the allocation space.

The fix is to use mas_data_end() in mas_skip_node() to detect the nodes
data end point and continue walking the tree up until it is safe to move
to a node with a higher limit.

The walk up the tree also sets the maple state limits so remove the buggy
code from mas_skip_node().  Setting the limits had the unfortunate side
effect of triggering another bug if the parent node was full and the there
was no suitable gap in the second last child, but room in the next child.

mas_skip_node() may also be passed a maple state in an error state from
mas_anode_descend() when no allocations are available.  Return on such an
error state immediately.

Link: https://lkml.kernel.org/r/20230307180247.2220303-1-Liam.Howlett@oracle.com
Link: https://lkml.kernel.org/r/20230307180247.2220303-2-Liam.Howlett@oracle.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reported-by: Snild Dolkow <snild@sony.com>
  Link: https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/
Tested-by: Snild Dolkow <snild@sony.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/maple_tree.c | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

(limited to 'lib')

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 646297cae5d1..9e2735cbc2b4 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5099,35 +5099,21 @@ static inline bool mas_rewind_node(struct ma_state *mas)
  */
 static inline bool mas_skip_node(struct ma_state *mas)
 {
-	unsigned char slot, slot_count;
-	unsigned long *pivots;
-	enum maple_type mt;
+	if (mas_is_err(mas))
+		return false;
 
-	mt = mte_node_type(mas->node);
-	slot_count = mt_slots[mt] - 1;
 	do {
 		if (mte_is_root(mas->node)) {
-			slot = mas->offset;
-			if (slot > slot_count) {
+			if (mas->offset >= mas_data_end(mas)) {
 				mas_set_err(mas, -EBUSY);
 				return false;
 			}
 		} else {
 			mas_ascend(mas);
-			slot = mas->offset;
-			mt = mte_node_type(mas->node);
-			slot_count = mt_slots[mt] - 1;
 		}
-	} while (slot > slot_count);
-
-	mas->offset = ++slot;
-	pivots = ma_pivots(mas_mn(mas), mt);
-	if (slot > 0)
-		mas->min = pivots[slot - 1] + 1;
-
-	if (slot <= slot_count)
-		mas->max = pivots[slot];
+	} while (mas->offset >= mas_data_end(mas));
 
+	mas->offset++;
 	return true;
 }
 
-- 
cgit v1.2.3


From 4bd6dded6318dc8e2514d74868c1f8fb38b61a60 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Tue, 7 Mar 2023 13:02:47 -0500
Subject: test_maple_tree: add more testing for mas_empty_area()

Test robust filling of an entire area of the tree, then test one beyond.
This is to test the walking back up the tree at the end of nodes and error
condition.  Test inspired by the reproducer code provided by Snild Dolkow.

The last test in the function tests for the case of a corrupted maple
state caused by the incorrect limits set during mas_skip_node().  There
needs to be a gap in the second last child and last child, but the search
must rule out the second last child's gap.  This would avoid correcting
the maple state to the correct max limit and return an error.

Link: https://lkml.kernel.org/r/20230307180247.2220303-3-Liam.Howlett@oracle.com
Cc: Snild Dolkow <snild@sony.com>
Link: https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/
Fixes: e15e06a83923 ("lib/test_maple_tree: add testing for maple tree")
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/test_maple_tree.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'lib')

diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index 3d19b1f78d71..f1db333270e9 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -2670,6 +2670,49 @@ static noinline void check_empty_area_window(struct maple_tree *mt)
 	rcu_read_unlock();
 }
 
+static noinline void check_empty_area_fill(struct maple_tree *mt)
+{
+	const unsigned long max = 0x25D78000;
+	unsigned long size;
+	int loop, shift;
+	MA_STATE(mas, mt, 0, 0);
+
+	mt_set_non_kernel(99999);
+	for (shift = 12; shift <= 16; shift++) {
+		loop = 5000;
+		size = 1 << shift;
+		while (loop--) {
+			mas_set(&mas, 0);
+			mas_lock(&mas);
+			MT_BUG_ON(mt, mas_empty_area(&mas, 0, max, size) != 0);
+			MT_BUG_ON(mt, mas.last != mas.index + size - 1);
+			mas_store_gfp(&mas, (void *)size, GFP_KERNEL);
+			mas_unlock(&mas);
+			mas_reset(&mas);
+		}
+	}
+
+	/* No space left. */
+	size = 0x1000;
+	rcu_read_lock();
+	MT_BUG_ON(mt, mas_empty_area(&mas, 0, max, size) != -EBUSY);
+	rcu_read_unlock();
+
+	/* Fill a depth 3 node to the maximum */
+	for (unsigned long i = 629440511; i <= 629440800; i += 6)
+		mtree_store_range(mt, i, i + 5, (void *)i, GFP_KERNEL);
+	/* Make space in the second-last depth 4 node */
+	mtree_erase(mt, 631668735);
+	/* Make space in the last depth 4 node */
+	mtree_erase(mt, 629506047);
+	mas_reset(&mas);
+	/* Search from just after the gap in the second-last depth 4 */
+	rcu_read_lock();
+	MT_BUG_ON(mt, mas_empty_area(&mas, 629506048, 690000000, 0x5000) != 0);
+	rcu_read_unlock();
+	mt_set_non_kernel(0);
+}
+
 static DEFINE_MTREE(tree);
 static int maple_tree_seed(void)
 {
@@ -2926,6 +2969,11 @@ static int maple_tree_seed(void)
 	check_empty_area_window(&tree);
 	mtree_destroy(&tree);
 
+	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+	check_empty_area_fill(&tree);
+	mtree_destroy(&tree);
+
+
 #if defined(BENCH)
 skip:
 #endif
-- 
cgit v1.2.3


From 13684e966d46283e0e89b6a4941596dc52b18bf3 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 15 Mar 2023 15:28:17 +0100
Subject: lib: dhry: fix unstable smp_processor_id(_) usage

When running the in-kernel Dhrystone benchmark with
CONFIG_DEBUG_PREEMPT=y:

    BUG: using smp_processor_id() in preemptible [00000000] code: bash/938

Fix this by not using smp_processor_id() directly, but instead wrapping
the whole benchmark inside a get_cpu()/put_cpu() pair.  This makes sure
the whole benchmark is run on the same CPU core, and the reported values
are consistent.

Link: https://lkml.kernel.org/r/b0d29932bb24ad82cea7f821e295c898e9657be0.1678890070.git.geert+renesas@glider.be
Fixes: d5528cc16893f1f6 ("lib: add Dhrystone benchmark test")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reported-by: Tobias Klausmann <klausman@schwarzvogel.de>
  Link: https://bugzilla.kernel.org/show_bug.cgi?id=217179
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/dhry_run.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/dhry_run.c b/lib/dhry_run.c
index f9d33efa6d09..f15ac666e9d3 100644
--- a/lib/dhry_run.c
+++ b/lib/dhry_run.c
@@ -31,6 +31,7 @@ MODULE_PARM_DESC(iterations,
 
 static void dhry_benchmark(void)
 {
+	unsigned int cpu = get_cpu();
 	int i, n;
 
 	if (iterations > 0) {
@@ -45,9 +46,10 @@ static void dhry_benchmark(void)
 	}
 
 report:
+	put_cpu();
 	if (n >= 0)
-		pr_info("CPU%u: Dhrystones per Second: %d (%d DMIPS)\n",
-			smp_processor_id(), n, n / DHRY_VAX);
+		pr_info("CPU%u: Dhrystones per Second: %d (%d DMIPS)\n", cpu,
+			n, n / DHRY_VAX);
 	else if (n == -EAGAIN)
 		pr_err("Please increase the number of iterations\n");
 	else
-- 
cgit v1.2.3