aboutsummaryrefslogtreecommitdiff
path: root/absl/container/internal/layout_benchmark.cc
diff options
context:
space:
mode:
authorAbseil Team <absl-team@google.com>2024-03-18 07:11:24 -0700
committerCopybara-Service <copybara-worker@google.com>2024-03-18 07:12:47 -0700
commit5839a14828f31f1c2876003677e0ce8e28544b1f (patch)
treec0285fce5021925046aea550c72989ada735204f /absl/container/internal/layout_benchmark.cc
parent56d3f227154af7a646302ae4a0891f14a2d3616b (diff)
downloadabseil-5839a14828f31f1c2876003677e0ce8e28544b1f.tar.gz
abseil-5839a14828f31f1c2876003677e0ce8e28544b1f.tar.bz2
abseil-5839a14828f31f1c2876003677e0ce8e28544b1f.zip
Add a feature to container_internal::Layout that lets you specify some array sizes at compile-time as template parameters. This can make offset and size calculations faster.
In particular, it seems to always improve the performance of AllocSize(), and it sometimes improves the performance of other functions, e.g. when the Layout object outlives the function that created it. PiperOrigin-RevId: 616817169 Change-Id: Id1d318d7d2af68783f9f59090d89c642be6ae558
Diffstat (limited to 'absl/container/internal/layout_benchmark.cc')
-rw-r--r--absl/container/internal/layout_benchmark.cc178
1 files changed, 176 insertions, 2 deletions
diff --git a/absl/container/internal/layout_benchmark.cc b/absl/container/internal/layout_benchmark.cc
index 3af35e33..a8fbfa7b 100644
--- a/absl/container/internal/layout_benchmark.cc
+++ b/absl/container/internal/layout_benchmark.cc
@@ -15,6 +15,9 @@
// Every benchmark should have the same performance as the corresponding
// headroom benchmark.
+#include <cstddef>
+#include <cstdint>
+
#include "absl/base/internal/raw_logging.h"
#include "absl/container/internal/layout.h"
#include "benchmark/benchmark.h"
@@ -28,6 +31,8 @@ using ::benchmark::DoNotOptimize;
using Int128 = int64_t[2];
+constexpr size_t MyAlign(size_t n, size_t m) { return (n + m - 1) & ~(m - 1); }
+
// This benchmark provides the upper bound on performance for BM_OffsetConstant.
template <size_t Offset, class... Ts>
void BM_OffsetConstantHeadroom(benchmark::State& state) {
@@ -37,6 +42,15 @@ void BM_OffsetConstantHeadroom(benchmark::State& state) {
}
template <size_t Offset, class... Ts>
+void BM_OffsetConstantStatic(benchmark::State& state) {
+ using L = typename Layout<Ts...>::template WithStaticSizes<3, 5, 7>;
+ ABSL_RAW_CHECK(L::Partial().template Offset<3>() == Offset, "Invalid offset");
+ for (auto _ : state) {
+ DoNotOptimize(L::Partial().template Offset<3>());
+ }
+}
+
+template <size_t Offset, class... Ts>
void BM_OffsetConstant(benchmark::State& state) {
using L = Layout<Ts...>;
ABSL_RAW_CHECK(L::Partial(3, 5, 7).template Offset<3>() == Offset,
@@ -46,14 +60,75 @@ void BM_OffsetConstant(benchmark::State& state) {
}
}
+template <size_t Offset, class... Ts>
+void BM_OffsetConstantIndirect(benchmark::State& state) {
+ using L = Layout<Ts...>;
+ auto p = L::Partial(3, 5, 7);
+ ABSL_RAW_CHECK(p.template Offset<3>() == Offset, "Invalid offset");
+ for (auto _ : state) {
+ DoNotOptimize(p);
+ DoNotOptimize(p.template Offset<3>());
+ }
+}
+
+template <class... Ts>
+size_t PartialOffset(size_t k);
+
+template <>
+size_t PartialOffset<int8_t, int16_t, int32_t, Int128>(size_t k) {
+ constexpr size_t o = MyAlign(MyAlign(3 * 1, 2) + 5 * 2, 4);
+ // return Align(o + k * 4, 8);
+ return (o + k * 4 + 7) & ~7U;
+}
+
+template <>
+size_t PartialOffset<Int128, int32_t, int16_t, int8_t>(size_t k) {
+ // No alignment is necessary.
+ return 3 * 16 + 5 * 4 + k * 2;
+}
+
+// This benchmark provides the upper bound on performance for BM_OffsetVariable.
+template <size_t Offset, class... Ts>
+void BM_OffsetPartialHeadroom(benchmark::State& state) {
+ size_t k = 7;
+ ABSL_RAW_CHECK(PartialOffset<Ts...>(k) == Offset, "Invalid offset");
+ for (auto _ : state) {
+ DoNotOptimize(k);
+ DoNotOptimize(PartialOffset<Ts...>(k));
+ }
+}
+
+template <size_t Offset, class... Ts>
+void BM_OffsetPartialStatic(benchmark::State& state) {
+ using L = typename Layout<Ts...>::template WithStaticSizes<3, 5>;
+ size_t k = 7;
+ ABSL_RAW_CHECK(L::Partial(k).template Offset<3>() == Offset,
+ "Invalid offset");
+ for (auto _ : state) {
+ DoNotOptimize(k);
+ DoNotOptimize(L::Partial(k).template Offset<3>());
+ }
+}
+
+template <size_t Offset, class... Ts>
+void BM_OffsetPartial(benchmark::State& state) {
+ using L = Layout<Ts...>;
+ size_t k = 7;
+ ABSL_RAW_CHECK(L::Partial(3, 5, k).template Offset<3>() == Offset,
+ "Invalid offset");
+ for (auto _ : state) {
+ DoNotOptimize(k);
+ DoNotOptimize(L::Partial(3, 5, k).template Offset<3>());
+ }
+}
+
template <class... Ts>
size_t VariableOffset(size_t n, size_t m, size_t k);
template <>
size_t VariableOffset<int8_t, int16_t, int32_t, Int128>(size_t n, size_t m,
size_t k) {
- auto Align = [](size_t n, size_t m) { return (n + m - 1) & ~(m - 1); };
- return Align(Align(Align(n * 1, 2) + m * 2, 4) + k * 4, 8);
+ return MyAlign(MyAlign(MyAlign(n * 1, 2) + m * 2, 4) + k * 4, 8);
}
template <>
@@ -94,6 +169,75 @@ void BM_OffsetVariable(benchmark::State& state) {
}
}
+template <class... Ts>
+size_t AllocSize(size_t x);
+
+template <>
+size_t AllocSize<int8_t, int16_t, int32_t, Int128>(size_t x) {
+ constexpr size_t o =
+ Layout<int8_t, int16_t, int32_t, Int128>::Partial(3, 5, 7)
+ .template Offset<Int128>();
+ return o + sizeof(Int128) * x;
+}
+
+template <>
+size_t AllocSize<Int128, int32_t, int16_t, int8_t>(size_t x) {
+ constexpr size_t o =
+ Layout<Int128, int32_t, int16_t, int8_t>::Partial(3, 5, 7)
+ .template Offset<int8_t>();
+ return o + sizeof(int8_t) * x;
+}
+
+// This benchmark provides the upper bound on performance for BM_AllocSize
+template <size_t Size, class... Ts>
+void BM_AllocSizeHeadroom(benchmark::State& state) {
+ size_t x = 9;
+ ABSL_RAW_CHECK(AllocSize<Ts...>(x) == Size, "Invalid size");
+ for (auto _ : state) {
+ DoNotOptimize(x);
+ DoNotOptimize(AllocSize<Ts...>(x));
+ }
+}
+
+template <size_t Size, class... Ts>
+void BM_AllocSizeStatic(benchmark::State& state) {
+ using L = typename Layout<Ts...>::template WithStaticSizes<3, 5, 7>;
+ size_t x = 9;
+ ABSL_RAW_CHECK(L(x).AllocSize() == Size, "Invalid offset");
+ for (auto _ : state) {
+ DoNotOptimize(x);
+ DoNotOptimize(L(x).AllocSize());
+ }
+}
+
+template <size_t Size, class... Ts>
+void BM_AllocSize(benchmark::State& state) {
+ using L = Layout<Ts...>;
+ size_t n = 3;
+ size_t m = 5;
+ size_t k = 7;
+ size_t x = 9;
+ ABSL_RAW_CHECK(L(n, m, k, x).AllocSize() == Size, "Invalid offset");
+ for (auto _ : state) {
+ DoNotOptimize(n);
+ DoNotOptimize(m);
+ DoNotOptimize(k);
+ DoNotOptimize(x);
+ DoNotOptimize(L(n, m, k, x).AllocSize());
+ }
+}
+
+template <size_t Size, class... Ts>
+void BM_AllocSizeIndirect(benchmark::State& state) {
+ using L = Layout<Ts...>;
+ auto l = L(3, 5, 7, 9);
+ ABSL_RAW_CHECK(l.AllocSize() == Size, "Invalid offset");
+ for (auto _ : state) {
+ DoNotOptimize(l);
+ DoNotOptimize(l.AllocSize());
+ }
+}
+
// Run all benchmarks in two modes:
//
// Layout with padding: int8_t[3], int16_t[5], int32_t[7], Int128[?].
@@ -106,16 +250,46 @@ void BM_OffsetVariable(benchmark::State& state) {
OFFSET_BENCHMARK(BM_OffsetConstantHeadroom, 48, int8_t, int16_t, int32_t,
Int128);
+OFFSET_BENCHMARK(BM_OffsetConstantStatic, 48, int8_t, int16_t, int32_t, Int128);
OFFSET_BENCHMARK(BM_OffsetConstant, 48, int8_t, int16_t, int32_t, Int128);
+OFFSET_BENCHMARK(BM_OffsetConstantIndirect, 48, int8_t, int16_t, int32_t,
+ Int128);
+
OFFSET_BENCHMARK(BM_OffsetConstantHeadroom, 82, Int128, int32_t, int16_t,
int8_t);
+OFFSET_BENCHMARK(BM_OffsetConstantStatic, 82, Int128, int32_t, int16_t, int8_t);
OFFSET_BENCHMARK(BM_OffsetConstant, 82, Int128, int32_t, int16_t, int8_t);
+OFFSET_BENCHMARK(BM_OffsetConstantIndirect, 82, Int128, int32_t, int16_t,
+ int8_t);
+
+OFFSET_BENCHMARK(BM_OffsetPartialHeadroom, 48, int8_t, int16_t, int32_t,
+ Int128);
+OFFSET_BENCHMARK(BM_OffsetPartialStatic, 48, int8_t, int16_t, int32_t, Int128);
+OFFSET_BENCHMARK(BM_OffsetPartial, 48, int8_t, int16_t, int32_t, Int128);
+
+OFFSET_BENCHMARK(BM_OffsetPartialHeadroom, 82, Int128, int32_t, int16_t,
+ int8_t);
+OFFSET_BENCHMARK(BM_OffsetPartialStatic, 82, Int128, int32_t, int16_t, int8_t);
+OFFSET_BENCHMARK(BM_OffsetPartial, 82, Int128, int32_t, int16_t, int8_t);
+
OFFSET_BENCHMARK(BM_OffsetVariableHeadroom, 48, int8_t, int16_t, int32_t,
Int128);
OFFSET_BENCHMARK(BM_OffsetVariable, 48, int8_t, int16_t, int32_t, Int128);
+
OFFSET_BENCHMARK(BM_OffsetVariableHeadroom, 82, Int128, int32_t, int16_t,
int8_t);
OFFSET_BENCHMARK(BM_OffsetVariable, 82, Int128, int32_t, int16_t, int8_t);
+
+OFFSET_BENCHMARK(BM_AllocSizeHeadroom, 192, int8_t, int16_t, int32_t, Int128);
+OFFSET_BENCHMARK(BM_AllocSizeStatic, 192, int8_t, int16_t, int32_t, Int128);
+OFFSET_BENCHMARK(BM_AllocSize, 192, int8_t, int16_t, int32_t, Int128);
+OFFSET_BENCHMARK(BM_AllocSizeIndirect, 192, int8_t, int16_t, int32_t, Int128);
+
+OFFSET_BENCHMARK(BM_AllocSizeHeadroom, 91, Int128, int32_t, int16_t, int8_t);
+OFFSET_BENCHMARK(BM_AllocSizeStatic, 91, Int128, int32_t, int16_t, int8_t);
+OFFSET_BENCHMARK(BM_AllocSize, 91, Int128, int32_t, int16_t, int8_t);
+OFFSET_BENCHMARK(BM_AllocSizeIndirect, 91, Int128, int32_t, int16_t, int8_t);
+
} // namespace
} // namespace container_internal
ABSL_NAMESPACE_END