From a1d5f71259e40235fdf32b3e41cb14bb6b001f64 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Thu, 2 Apr 2026 10:42:57 +0100 Subject: [PATCH] fix: Do not mutate shared _gemm_output_3d in CpuGemmConv2d::run() CpuGemmConv2d::run() was mutating the shared member _gemm_output_3d by extending its padding before soft_init()/import_memory(). When the same operator instance is reused across runs, this can cause later extend_padding() calls to fail. It is also unsafe when the operator is used from multiple threads. Use a local TensorInfo copy in run() for padding extension and soft_init()/import_memory(), leaving _gemm_output_3d unchanged. Added a new test: RepeatedRunDoesNotReuseImportedGemm3dTensorInfo. Change-Id: I3e4e2d25cabf85724ecf126b1c93df6733ee7d48 Signed-off-by: Pablo Marquez Tello --- src/cpu/operators/CpuGemmConv2d.cpp | 6 +- tests/validation/NEON/ConvolutionLayer.cpp | 64 +++++++++++++++++++++- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/cpu/operators/CpuGemmConv2d.cpp b/src/cpu/operators/CpuGemmConv2d.cpp index c856456de4..f51cb761cc 100644 --- a/src/cpu/operators/CpuGemmConv2d.cpp +++ b/src/cpu/operators/CpuGemmConv2d.cpp @@ -939,8 +939,10 @@ void CpuGemmConv2d::run(ITensorPack &tensors) // Handle the case where output has top/bottom padding const ITensor *out_to_use = out_has_padding ? gemm_output.get() : dst; Tensor gemm3d; - _gemm_output_3d.extend_padding(out_to_use->info()->padding()); - gemm3d.allocator()->soft_init(_gemm_output_3d); + TensorInfo gemm3d_info(_gemm_output_3d); + gemm3d_info.set_is_resizable(true); + gemm3d_info.extend_padding(out_to_use->info()->padding()); + gemm3d.allocator()->soft_init(gemm3d_info); gemm3d.allocator()->import_memory(out_to_use->buffer()); auto gemm_output_to_use = gemm_output.get(); diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index c8948391f9..5d3817959c 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -1437,7 +1437,69 @@ TEST_CASE(MemoryInjection, framework::DatasetMode::ALL) } } -/** Test case for memory injection in @ref NEGEMMConvolutionLayer. +/** Regression test for repeated runs in cpu::CpuGemmConv2d. + * + * Configure the operator once and execute it twice with injected memory. + * + * Checks performed in order: + * - The first run does not throw + * - The second run does not throw + * - Both runs compute the same output + */ +TEST_CASE(RepeatedRunDoesNotReuseImportedGemm3dTensorInfo, framework::DatasetMode::ALL) +{ + auto conv = std::make_unique(); + const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW); + const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW); + const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW); + auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW); + const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR); + WeightsInfo weights_info(false, 3U, 3U, 1U); + conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info); + + auto src = create_tensor(src_info); + auto weight = create_tensor(weight_info); + auto bias = create_tensor(bias_info); + src.allocator()->allocate(); + weight.allocator()->allocate(); + bias.allocator()->allocate(); + + ITensorPack run_pack{ + {TensorType::ACL_SRC_0, &src}, {TensorType::ACL_SRC_1, &weight}, {TensorType::ACL_SRC_2, &bias}}; + ITensorPack prep_pack{{TensorType::ACL_SRC_1, &weight}, {TensorType::ACL_SRC_2, &bias}}; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace(conv->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&](Tensor &dst) -> bool + { + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(weight), 2.f); + library->fill_tensor_value(Accessor(bias), 3.f); + conv->prepare(prep_pack); + conv->run(run_pack); + return true; + }; + + auto result_0 = create_tensor(dst_info); + auto result_1 = create_tensor(dst_info); + result_0.allocator()->allocate(); + result_1.allocator()->allocate(); + + ARM_COMPUTE_EXPECT_NO_THROW(run_conv(result_0), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT_NO_THROW(run_conv(result_1), framework::LogLevel::ERRORS); + + for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(reinterpret_cast(result_0.buffer())[i] == + reinterpret_cast(result_1.buffer())[i], + framework::LogLevel::ERRORS); + } +} + +/** Test case for memory injection in NEGEMMConvolutionLayer. * * Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API. *