Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/cpu/operators/CpuGemmConv2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -939,8 +939,10 @@ void CpuGemmConv2d::run(ITensorPack &tensors)
// Handle the case where output has top/bottom padding
const ITensor *out_to_use = out_has_padding ? gemm_output.get() : dst;
Tensor gemm3d;
_gemm_output_3d.extend_padding(out_to_use->info()->padding());
gemm3d.allocator()->soft_init(_gemm_output_3d);
TensorInfo gemm3d_info(_gemm_output_3d);
gemm3d_info.set_is_resizable(true);
gemm3d_info.extend_padding(out_to_use->info()->padding());
gemm3d.allocator()->soft_init(gemm3d_info);
gemm3d.allocator()->import_memory(out_to_use->buffer());
auto gemm_output_to_use = gemm_output.get();

Expand Down
64 changes: 63 additions & 1 deletion tests/validation/NEON/ConvolutionLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1437,7 +1437,69 @@ TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
}
}

/** Test case for memory injection in @ref NEGEMMConvolutionLayer.
/** Regression test for repeated runs in cpu::CpuGemmConv2d.
*
* Configure the operator once and execute it twice with injected memory.
*
* Checks performed in order:
* - The first run does not throw
* - The second run does not throw
* - Both runs compute the same output
*/
TEST_CASE(RepeatedRunDoesNotReuseImportedGemm3dTensorInfo, framework::DatasetMode::ALL)
Copy link
Copy Markdown
Contributor

@gunes-arm gunes-arm Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really failing w/o this fix? I've been testing it but couldn't make it fail. I think it's happening because TensorAllocator's destructor is marking the TensorInfo as resizable again and when run() finishes the allocator that's been soft initialized is destructed and it marks the information object resizable again. This makes this problem only visible in situations where the usage of the same object is multi-threaded.

{
auto conv = std::make_unique<cpu::CpuGemmConv2d>();
const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
WeightsInfo weights_info(false, 3U, 3U, 1U);
conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info);

auto src = create_tensor<Tensor>(src_info);
auto weight = create_tensor<Tensor>(weight_info);
auto bias = create_tensor<Tensor>(bias_info);
src.allocator()->allocate();
weight.allocator()->allocate();
bias.allocator()->allocate();

ITensorPack run_pack{
{TensorType::ACL_SRC_0, &src}, {TensorType::ACL_SRC_1, &weight}, {TensorType::ACL_SRC_2, &bias}};
ITensorPack prep_pack{{TensorType::ACL_SRC_1, &weight}, {TensorType::ACL_SRC_2, &bias}};

auto mg = MemoryGroup{};
auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);

auto run_conv = [&](Tensor &dst) -> bool
{
run_pack.add_tensor(TensorType::ACL_DST, &dst);

library->fill_tensor_value(Accessor(src), 1.f);
library->fill_tensor_value(Accessor(weight), 2.f);
library->fill_tensor_value(Accessor(bias), 3.f);
conv->prepare(prep_pack);
conv->run(run_pack);
return true;
};

auto result_0 = create_tensor<Tensor>(dst_info);
auto result_1 = create_tensor<Tensor>(dst_info);
result_0.allocator()->allocate();
result_1.allocator()->allocate();

ARM_COMPUTE_EXPECT_NO_THROW(run_conv(result_0), framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT_NO_THROW(run_conv(result_1), framework::LogLevel::ERRORS);

for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
{
ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] ==
reinterpret_cast<float *>(result_1.buffer())[i],
framework::LogLevel::ERRORS);
}
}

/** Test case for memory injection in NEGEMMConvolutionLayer.
*
* Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API.
*
Expand Down