diff --git a/core/stop/residual_norm.cpp b/core/stop/residual_norm.cpp index c962784033a..499665331a0 100644 --- a/core/stop/residual_norm.cpp +++ b/core/stop/residual_norm.cpp @@ -1,10 +1,14 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "ginkgo/core/stop/residual_norm.hpp" +#include + #include +#include +#include #include "core/base/dispatch_helper.hpp" #include "core/components/fill_array_kernels.hpp" @@ -183,9 +187,15 @@ bool ResidualNormBase::check_impl( auto exec = this->get_executor(); norm_dispatch( [&](auto dense_b, auto dense_x) { - auto dense_r = dense_b->clone(); - system_matrix_->apply(neg_one_, dense_x, one_, dense_r); - dense_r->compute_norm2(u_dense_tau_, reduction_tmp_); + using dense_b_type = std::remove_cv_t< + std::remove_reference_t>; + if (!this->rhs_) { + this->rhs_ = dense_b_type::create_with_config_of(dense_b); + } + auto rhs = as(this->rhs_); + rhs->copy_from(dense_b); + system_matrix_->apply(neg_one_, dense_x, one_, rhs); + rhs->compute_norm2(u_dense_tau_, reduction_tmp_); }, b_.get(), updater.solution_); dense_tau = u_dense_tau_.get(); diff --git a/include/ginkgo/core/stop/residual_norm.hpp b/include/ginkgo/core/stop/residual_norm.hpp index c7f240950fa..210d01676f4 100644 --- a/include/ginkgo/core/stop/residual_norm.hpp +++ b/include/ginkgo/core/stop/residual_norm.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -86,6 +86,8 @@ class ResidualNormBase std::shared_ptr neg_one_{}; // workspace for reduction mutable gko::array reduction_tmp_; + // temporary rhs for residual computation + mutable std::shared_ptr rhs_{}; }; diff --git a/reference/test/stop/residual_norm_kernels.cpp b/reference/test/stop/residual_norm_kernels.cpp index e7eef0565d2..bcf723cadbf 100644 --- a/reference/test/stop/residual_norm_kernels.cpp +++ b/reference/test/stop/residual_norm_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -13,7 +13,19 @@ #include "core/test/utils.hpp" -namespace { +class AllocationLogger : public gko::log::Logger { +public: + mutable int count = 0; + +protected: + void on_allocation_completed(const gko::Executor* exec, + const gko::size_type& num_bytes, + const gko::uintptr& location) const override + { + std::cout << num_bytes << std::endl; + ++count; + } +}; template @@ -42,7 +54,7 @@ class ResidualNorm : public ::testing::Test { std::unique_ptr::Factory> rhs_factory_; std::unique_ptr::Factory> rel_factory_; std::unique_ptr::Factory> abs_factory_; - std::shared_ptr exec_; + std::shared_ptr exec_; }; TYPED_TEST_SUITE(ResidualNorm, gko::test::ValueTypes, TypenameNameGenerator); @@ -417,6 +429,111 @@ TYPED_TEST(ResidualNorm, SelfCalculatesAndWaitsTillResidualGoal) } +TYPED_TEST(ResidualNorm, SelfCalculatesWithoutReallocation) +{ + using Mtx = typename TestFixture::Mtx; + using NormVector = typename TestFixture::NormVector; + using T = TypeParam; + using T_nc = gko::remove_complex; + auto initial_res = gko::initialize({100.0}, this->exec_); + auto system_mtx = share(gko::initialize({1.0}, this->exec_)); + + T rhs_val = 10.0; + std::shared_ptr rhs = + gko::initialize({rhs_val}, this->exec_); + auto rhs_criterion = this->rhs_factory_->generate(system_mtx, rhs, nullptr, + initial_res.get()); + auto rel_criterion = this->rel_factory_->generate(system_mtx, rhs, nullptr, + initial_res.get()); + auto abs_criterion = this->abs_factory_->generate(system_mtx, rhs, nullptr, + initial_res.get()); + { + auto solution = gko::initialize({rhs_val - T{10.0}}, this->exec_); + auto rhs_norm = gko::initialize({100.0}, this->exec_); + gko::as(rhs)->compute_norm2(rhs_norm); + constexpr gko::uint8 RelativeStoppingId{1}; + bool one_changed{}; + gko::array stop_status(this->exec_, 1); + stop_status.get_data()[0].reset(); + auto logger = std::make_shared(); + + ASSERT_FALSE(rhs_criterion->update().solution(solution).check( + RelativeStoppingId, true, &stop_status, &one_changed)); + + solution->at(0) = rhs_val - r::value * T{1.1} * rhs_norm->at(0); + ASSERT_FALSE(rhs_criterion->update().solution(solution).check( + RelativeStoppingId, true, &stop_status, &one_changed)); + ASSERT_EQ(stop_status.get_data()[0].has_converged(), false); + ASSERT_EQ(one_changed, false); + + solution->at(0) = rhs_val - r::value * T{0.5} * rhs_norm->at(0); + this->exec_->add_logger(logger); + ASSERT_TRUE(rhs_criterion->update().solution(solution).check( + RelativeStoppingId, true, &stop_status, &one_changed)); + ASSERT_EQ(stop_status.get_data()[0].has_converged(), true); + ASSERT_EQ(one_changed, true); + ASSERT_EQ(logger->count, 0); + this->exec_->remove_logger(logger); + } + { + T initial_norm = 100.0; + auto solution = + gko::initialize({rhs_val - initial_norm}, this->exec_); + constexpr gko::uint8 RelativeStoppingId{1}; + bool one_changed{}; + gko::array stop_status(this->exec_, 1); + stop_status.get_data()[0].reset(); + auto logger = std::make_shared(); + + ASSERT_FALSE(rel_criterion->update().solution(solution).check( + RelativeStoppingId, true, &stop_status, &one_changed)); + + solution->at(0) = rhs_val - r::value * T{1.1} * initial_norm; + ASSERT_FALSE(rel_criterion->update().solution(solution).check( + RelativeStoppingId, true, &stop_status, &one_changed)); + ASSERT_EQ(stop_status.get_data()[0].has_converged(), false); + ASSERT_EQ(one_changed, false); + + solution->at(0) = rhs_val - r::value * T{0.5} * initial_norm; + this->exec_->add_logger(logger); + ASSERT_TRUE(rel_criterion->update().solution(solution).check( + RelativeStoppingId, true, &stop_status, &one_changed)); + ASSERT_EQ(stop_status.get_data()[0].has_converged(), true); + ASSERT_EQ(one_changed, true); + ASSERT_EQ(logger->count, 0); + this->exec_->remove_logger(logger); + } + { + auto solution = gko::initialize({rhs_val - T{100.0}}, this->exec_); + constexpr gko::uint8 RelativeStoppingId{1}; + bool one_changed{}; + gko::array stop_status(this->exec_, 1); + stop_status.get_data()[0].reset(); + auto logger = std::make_shared(); + + ASSERT_FALSE(abs_criterion->update().solution(solution).check( + RelativeStoppingId, true, &stop_status, &one_changed)); + + // TODO FIXME: NVHPC calculates different result of rhs - r*1.2 from + // rhs - tmp = rhs - (r * 1.2). https://godbolt.org/z/GrGE9PE67 + solution->at(0) = rhs_val - r::value * T{1.4}; + ASSERT_FALSE(abs_criterion->update().solution(solution).check( + RelativeStoppingId, true, &stop_status, &one_changed)); + ASSERT_EQ(stop_status.get_data()[0].has_converged(), false); + ASSERT_EQ(one_changed, false); + + solution->at(0) = rhs_val - r::value * T{0.5}; + this->exec_->add_logger(logger); + ASSERT_TRUE(abs_criterion->update().solution(solution).check( + RelativeStoppingId, true, &stop_status, &one_changed)); + ASSERT_EQ(stop_status.get_data()[0].has_converged(), true); + ASSERT_EQ(one_changed, true); + ASSERT_EQ(logger->count, 0); + this->exec_->remove_logger(logger); + } +} + + TYPED_TEST(ResidualNorm, WaitsTillResidualGoalMultipleRHS) { using Mtx = typename TestFixture::Mtx; @@ -1079,6 +1196,3 @@ TYPED_TEST(ResidualNormWithAbsolute, WaitsTillResidualGoalMultipleRHS) ASSERT_EQ(stop_status.get_data()[1].has_converged(), true); ASSERT_EQ(one_changed, true); } - - -} // namespace