aboutsummaryrefslogtreecommitdiff
path: root/absl/crc/internal/crc_x86_arm_combined.cc
diff options
context:
space:
mode:
Diffstat (limited to 'absl/crc/internal/crc_x86_arm_combined.cc')
-rw-r--r--absl/crc/internal/crc_x86_arm_combined.cc18
1 files changed, 11 insertions, 7 deletions
diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc
index 51eff4ed..79dace34 100644
--- a/absl/crc/internal/crc_x86_arm_combined.cc
+++ b/absl/crc/internal/crc_x86_arm_combined.cc
@@ -101,13 +101,17 @@ constexpr size_t kMediumCutoff = 2048;
namespace {
uint32_t multiply(uint32_t a, uint32_t b) {
- V128 shifts = V128_From2x64(0, 1);
- V128 power = V128_From2x64(0, a);
- V128 crc = V128_From2x64(0, b);
+ V128 power = V128_From64WithZeroFill(a);
+ V128 crc = V128_From64WithZeroFill(b);
V128 res = V128_PMulLow(power, crc);
- // Combine crc values
- res = V128_ShiftLeft64(res, shifts);
+ // Combine crc values.
+ //
+ // Adding res to itself is equivalent to multiplying by 2,
+ // or shifting left by 1. Addition is used as not all compilers
+ // are able to generate optimal code without this hint.
+ // https://godbolt.org/z/rr3fMnf39
+ res = V128_Add64(res, res);
return static_cast<uint32_t>(V128_Extract32<1>(res)) ^
CRC32_u32(0, static_cast<uint32_t>(V128_Low64(res)));
}
@@ -444,11 +448,11 @@ class CRC32AcceleratedX86ARMCombinedMultipleStreams
V128 magic = *(reinterpret_cast<const V128*>(kClmulConstants) + bs - 1);
- V128 tmp = V128_From2x64(0, l64);
+ V128 tmp = V128_From64WithZeroFill(l64);
V128 res1 = V128_PMulLow(tmp, magic);
- tmp = V128_From2x64(0, l641);
+ tmp = V128_From64WithZeroFill(l641);
V128 res2 = V128_PMul10(tmp, magic);
V128 x = V128_Xor(res1, res2);