diff options
Diffstat (limited to 'absl/strings/internal/cord_internal.h')
-rw-r--r-- | absl/strings/internal/cord_internal.h | 400 |
1 files changed, 330 insertions, 70 deletions
diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index b50fb79a..e6f0d544 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -27,9 +27,20 @@ #include "absl/base/internal/invoke.h" #include "absl/base/optimization.h" #include "absl/container/internal/compressed_tuple.h" +#include "absl/container/internal/container_memory.h" #include "absl/meta/type_traits.h" #include "absl/strings/string_view.h" +// We can only add poisoning if we can detect consteval executions. +#if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \ + (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER)) +#define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1 +#endif + +#define ABSL_CORD_INTERNAL_NO_SANITIZE \ + ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY + namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { @@ -91,6 +102,46 @@ enum Constants { // Emits a fatal error "Unexpected node type: xyz" and aborts the program. ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep); +// Fast implementation of memmove for up to 15 bytes. This implementation is +// safe for overlapping regions. If nullify_tail is true, the destination is +// padded with '\0' up to 15 bytes. +template <bool nullify_tail = false> +inline void SmallMemmove(char* dst, const char* src, size_t n) { + if (n >= 8) { + assert(n <= 15); + uint64_t buf1; + uint64_t buf2; + memcpy(&buf1, src, 8); + memcpy(&buf2, src + n - 8, 8); + if (nullify_tail) { + memset(dst + 7, 0, 8); + } + memcpy(dst, &buf1, 8); + memcpy(dst + n - 8, &buf2, 8); + } else if (n >= 4) { + uint32_t buf1; + uint32_t buf2; + memcpy(&buf1, src, 4); + memcpy(&buf2, src + n - 4, 4); + if (nullify_tail) { + memset(dst + 4, 0, 4); + memset(dst + 7, 0, 8); + } + memcpy(dst, &buf1, 4); + memcpy(dst + n - 4, &buf2, 4); + } else { + if (n != 0) { + dst[0] = src[0]; + dst[n / 2] = src[n / 2]; + dst[n - 1] = src[n - 1]; + } + if (nullify_tail) { + memset(dst + 7, 0, 8); + memset(dst + n, 0, 8); + } + } +} + // Compact class for tracking the reference count and state flags for CordRep // instances. Data is stored in an atomic int32_t for compactness and speed. class RefcountAndFlags { @@ -129,8 +180,9 @@ class RefcountAndFlags { } // Returns the current reference count using acquire semantics. - inline int32_t Get() const { - return count_.load(std::memory_order_acquire) >> kNumFlags; + inline size_t Get() const { + return static_cast<size_t>(count_.load(std::memory_order_acquire) >> + kNumFlags); } // Returns whether the atomic integer is 1. @@ -224,7 +276,11 @@ struct CordRep { : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} // The following three fields have to be less than 32 bytes since - // that is the smallest supported flat node size. + // that is the smallest supported flat node size. Some code optimizations rely + // on the specific layout of these fields. Notably: the non-trivial field + // `refcount` being preceded by `length`, and being tailed by POD data + // members only. + // # LINT.IfChange size_t length; RefcountAndFlags refcount; // If tag < FLAT, it represents CordRepKind and indicates the type of node. @@ -240,6 +296,7 @@ struct CordRep { // allocate room for these in the derived class, as not all compilers reuse // padding space from the base class (clang and gcc do, MSVC does not, etc) uint8_t storage[3]; + // # LINT.ThenChange(cord_rep_btree.h:copy_raw) // Returns true if this instance's tag matches the requested type. constexpr bool IsRing() const { return tag == RING; } @@ -422,25 +479,25 @@ constexpr char GetOrNull(absl::string_view data, size_t pos) { return pos < data.size() ? data[pos] : '\0'; } -// We store cordz_info as 64 bit pointer value in big endian format. This -// guarantees that the least significant byte of cordz_info matches the last -// byte of the inline data representation in as_chars_, which holds the inlined +// We store cordz_info as 64 bit pointer value in little endian format. This +// guarantees that the least significant byte of cordz_info matches the first +// byte of the inline data representation in `data`, which holds the inlined // size or the 'is_tree' bit. using cordz_info_t = int64_t; // Assert that the `cordz_info` pointer value perfectly overlaps the last half -// of `as_chars_` and can hold a pointer value. +// of `data` and can hold a pointer value. static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, ""); static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), ""); -// BigEndianByte() creates a big endian representation of 'value', i.e.: a big -// endian value where the last byte in the host's representation holds 'value`, -// with all other bytes being 0. -static constexpr cordz_info_t BigEndianByte(unsigned char value) { +// LittleEndianByte() creates a little endian representation of 'value', i.e.: +// a little endian value where the first byte in the host's representation +// holds 'value`, with all other bytes being 0. +static constexpr cordz_info_t LittleEndianByte(unsigned char value) { #if defined(ABSL_IS_BIG_ENDIAN) - return value; -#else return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8); +#else + return value; #endif } @@ -449,38 +506,80 @@ class InlineData { // DefaultInitType forces the use of the default initialization constructor. enum DefaultInitType { kDefaultInit }; - // kNullCordzInfo holds the big endian representation of intptr_t(1) + // kNullCordzInfo holds the little endian representation of intptr_t(1) // This is the 'null' / initial value of 'cordz_info'. The null value // is specifically big endian 1 as with 64-bit pointers, the last // byte of cordz_info overlaps with the last byte holding the tag. - static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1); - - constexpr InlineData() : as_chars_{0} {} - explicit InlineData(DefaultInitType) {} - explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {} - explicit constexpr InlineData(absl::string_view chars) - : as_chars_{ - GetOrNull(chars, 0), GetOrNull(chars, 1), - GetOrNull(chars, 2), GetOrNull(chars, 3), - GetOrNull(chars, 4), GetOrNull(chars, 5), - GetOrNull(chars, 6), GetOrNull(chars, 7), - GetOrNull(chars, 8), GetOrNull(chars, 9), - GetOrNull(chars, 10), GetOrNull(chars, 11), - GetOrNull(chars, 12), GetOrNull(chars, 13), - GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {} + static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1); + + // kTagOffset contains the offset of the control byte / tag. This constant is + // intended mostly for debugging purposes: do not remove this constant as it + // is actively inspected and used by gdb pretty printing code. + static constexpr size_t kTagOffset = 0; + + // Implement `~InlineData()` conditionally: we only need this destructor to + // unpoison poisoned instances under *SAN, and it will only compile correctly + // if the current compiler supports `absl::is_constant_evaluated()`. +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + ~InlineData() noexcept { unpoison(); } +#endif + + constexpr InlineData() noexcept { poison_this(); } + + explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) { + poison_this(); + } + + explicit InlineData(CordRep* rep) noexcept : rep_(rep) { + ABSL_ASSERT(rep != nullptr); + } + + // Explicit constexpr constructor to create a constexpr InlineData + // value. Creates an inlined SSO value if `rep` is null, otherwise + // creates a tree instance value. + constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept + : rep_(rep ? Rep(rep) : Rep(sv)) { + poison(); + } + + constexpr InlineData(const InlineData& rhs) noexcept; + InlineData& operator=(const InlineData& rhs) noexcept; + + friend bool operator==(const InlineData& lhs, const InlineData& rhs) { +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + const Rep l = lhs.rep_.SanitizerSafeCopy(); + const Rep r = rhs.rep_.SanitizerSafeCopy(); + return memcmp(&l, &r, sizeof(l)) == 0; +#else + return memcmp(&lhs, &rhs, sizeof(lhs)) == 0; +#endif + } + friend bool operator!=(const InlineData& lhs, const InlineData& rhs) { + return !operator==(lhs, rhs); + } + + // Poisons the unused inlined SSO data if the current instance + // is inlined, else un-poisons the entire instance. + constexpr void poison(); + + // Un-poisons this instance. + constexpr void unpoison(); + + // Poisons the current instance. This is used on default initialization. + constexpr void poison_this(); // Returns true if the current instance is empty. // The 'empty value' is an inlined data value of zero length. - bool is_empty() const { return tag() == 0; } + bool is_empty() const { return rep_.tag() == 0; } // Returns true if the current instance holds a tree value. - bool is_tree() const { return (tag() & 1) != 0; } + bool is_tree() const { return (rep_.tag() & 1) != 0; } // Returns true if the current instance holds a cordz_info value. // Requires the current instance to hold a tree value. bool is_profiled() const { assert(is_tree()); - return as_tree_.cordz_info != kNullCordzInfo; + return rep_.cordz_info() != kNullCordzInfo; } // Returns true if either of the provided instances hold a cordz_info value. @@ -489,7 +588,7 @@ class InlineData { static bool is_either_profiled(const InlineData& data1, const InlineData& data2) { assert(data1.is_tree() && data2.is_tree()); - return (data1.as_tree_.cordz_info | data2.as_tree_.cordz_info) != + return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) != kNullCordzInfo; } @@ -498,8 +597,8 @@ class InlineData { // Requires the current instance to hold a tree value. CordzInfo* cordz_info() const { assert(is_tree()); - intptr_t info = static_cast<intptr_t>( - absl::big_endian::ToHost64(static_cast<uint64_t>(as_tree_.cordz_info))); + intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64( + static_cast<uint64_t>(rep_.cordz_info()))); assert(info & 1); return reinterpret_cast<CordzInfo*>(info - 1); } @@ -510,21 +609,21 @@ class InlineData { void set_cordz_info(CordzInfo* cordz_info) { assert(is_tree()); uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1; - as_tree_.cordz_info = - static_cast<cordz_info_t>(absl::big_endian::FromHost64(info)); + rep_.set_cordz_info( + static_cast<cordz_info_t>(absl::little_endian::FromHost64(info))); } // Resets the current cordz_info to null / empty. void clear_cordz_info() { assert(is_tree()); - as_tree_.cordz_info = kNullCordzInfo; + rep_.set_cordz_info(kNullCordzInfo); } // Returns a read only pointer to the character data inside this instance. // Requires the current instance to hold inline data. const char* as_chars() const { assert(!is_tree()); - return as_chars_; + return rep_.as_chars(); } // Returns a mutable pointer to the character data inside this instance. @@ -542,20 +641,33 @@ class InlineData { // // It's an error to read from the returned pointer without a preceding write // if the current instance does not hold inline data, i.e.: is_tree() == true. - char* as_chars() { return as_chars_; } + char* as_chars() { return rep_.as_chars(); } // Returns the tree value of this value. // Requires the current instance to hold a tree value. CordRep* as_tree() const { assert(is_tree()); - return as_tree_.rep; + return rep_.tree(); + } + + void set_inline_data(const char* data, size_t n) { + ABSL_ASSERT(n <= kMaxInline); + unpoison(); + rep_.set_tag(static_cast<int8_t>(n << 1)); + SmallMemmove<true>(rep_.as_chars(), data, n); + poison(); + } + + void copy_max_inline_to(char* dst) const { + assert(!is_tree()); + memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline); } // Initialize this instance to holding the tree value `rep`, // initializing the cordz_info to null, i.e.: 'not profiled'. void make_tree(CordRep* rep) { - as_tree_.rep = rep; - as_tree_.cordz_info = kNullCordzInfo; + unpoison(); + rep_.make_tree(rep); } // Set the tree value of this instance to 'rep`. @@ -563,54 +675,202 @@ class InlineData { // Does not affect the value of cordz_info. void set_tree(CordRep* rep) { assert(is_tree()); - as_tree_.rep = rep; + rep_.set_tree(rep); } // Returns the size of the inlined character data inside this instance. // Requires the current instance to hold inline data. - size_t inline_size() const { - assert(!is_tree()); - return tag() >> 1; - } + size_t inline_size() const { return rep_.inline_size(); } // Sets the size of the inlined character data inside this instance. // Requires `size` to be <= kMaxInline. // See the documentation on 'as_chars()' for more information and examples. void set_inline_size(size_t size) { - ABSL_ASSERT(size <= kMaxInline); - tag() = static_cast<char>(size << 1); + unpoison(); + rep_.set_inline_size(size); + poison(); + } + + // Compares 'this' inlined data with rhs. The comparison is a straightforward + // lexicographic comparison. `Compare()` returns values as follows: + // + // -1 'this' InlineData instance is smaller + // 0 the InlineData instances are equal + // 1 'this' InlineData instance larger + int Compare(const InlineData& rhs) const { + return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy()); } private: - // See cordz_info_t for forced alignment and size of `cordz_info` details. - struct AsTree { - explicit constexpr AsTree(absl::cord_internal::CordRep* tree) - : rep(tree), cordz_info(kNullCordzInfo) {} - // This union uses up extra space so that whether rep is 32 or 64 bits, - // cordz_info will still start at the eighth byte, and the last - // byte of cordz_info will still be the last byte of InlineData. - union { + struct Rep { + // See cordz_info_t for forced alignment and size of `cordz_info` details. + struct AsTree { + explicit constexpr AsTree(absl::cord_internal::CordRep* tree) + : rep(tree) {} + cordz_info_t cordz_info = kNullCordzInfo; absl::cord_internal::CordRep* rep; - cordz_info_t unused_aligner; }; - cordz_info_t cordz_info; - }; - char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; } - char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; } + explicit Rep(DefaultInitType) {} + constexpr Rep() : data{0} {} + constexpr Rep(const Rep&) = default; + constexpr Rep& operator=(const Rep&) = default; + + explicit constexpr Rep(CordRep* rep) : as_tree(rep) {} + + explicit constexpr Rep(absl::string_view chars) + : data{static_cast<char>((chars.size() << 1)), + GetOrNull(chars, 0), + GetOrNull(chars, 1), + GetOrNull(chars, 2), + GetOrNull(chars, 3), + GetOrNull(chars, 4), + GetOrNull(chars, 5), + GetOrNull(chars, 6), + GetOrNull(chars, 7), + GetOrNull(chars, 8), + GetOrNull(chars, 9), + GetOrNull(chars, 10), + GetOrNull(chars, 11), + GetOrNull(chars, 12), + GetOrNull(chars, 13), + GetOrNull(chars, 14)} {} + + // Disable sanitizer as we must always be able to read `tag`. + ABSL_CORD_INTERNAL_NO_SANITIZE + int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; } + void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; } + + char* as_chars() { return data + 1; } + const char* as_chars() const { return data + 1; } + + bool is_tree() const { return (tag() & 1) != 0; } + + size_t inline_size() const { + ABSL_ASSERT(!is_tree()); + return static_cast<size_t>(tag()) >> 1; + } + + void set_inline_size(size_t size) { + ABSL_ASSERT(size <= kMaxInline); + set_tag(static_cast<int8_t>(size << 1)); + } + + CordRep* tree() const { return as_tree.rep; } + void set_tree(CordRep* rhs) { as_tree.rep = rhs; } + + cordz_info_t cordz_info() const { return as_tree.cordz_info; } + void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; } + + void make_tree(CordRep* tree) { + as_tree.rep = tree; + as_tree.cordz_info = kNullCordzInfo; + } + +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + constexpr Rep SanitizerSafeCopy() const { + if (!absl::is_constant_evaluated()) { + Rep res; + if (is_tree()) { + res = *this; + } else { + res.set_tag(tag()); + memcpy(res.as_chars(), as_chars(), inline_size()); + } + return res; + } else { + return *this; + } + } +#else + constexpr const Rep& SanitizerSafeCopy() const { return *this; } +#endif - // If the data has length <= kMaxInline, we store it in `as_chars_`, and - // store the size in the last char of `as_chars_` shifted left + 1. - // Else we store it in a tree and store a pointer to that tree in - // `as_tree_.rep` and store a tag in `tagged_size`. - union { - char as_chars_[kMaxInline + 1]; - AsTree as_tree_; + // If the data has length <= kMaxInline, we store it in `data`, and + // store the size in the first char of `data` shifted left + 1. + // Else we store it in a tree and store a pointer to that tree in + // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero. + union { + char data[kMaxInline + 1]; + AsTree as_tree; + }; }; + + // Private implementation of `Compare()` + static inline int Compare(const Rep& lhs, const Rep& rhs) { + uint64_t x, y; + memcpy(&x, lhs.as_chars(), sizeof(x)); + memcpy(&y, rhs.as_chars(), sizeof(y)); + if (x == y) { + memcpy(&x, lhs.as_chars() + 7, sizeof(x)); + memcpy(&y, rhs.as_chars() + 7, sizeof(y)); + if (x == y) { + if (lhs.inline_size() == rhs.inline_size()) return 0; + return lhs.inline_size() < rhs.inline_size() ? -1 : 1; + } + } + x = absl::big_endian::FromHost64(x); + y = absl::big_endian::FromHost64(y); + return x < y ? -1 : 1; + } + + Rep rep_; }; static_assert(sizeof(InlineData) == kMaxInline + 1, ""); +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + +constexpr InlineData::InlineData(const InlineData& rhs) noexcept + : rep_(rhs.rep_.SanitizerSafeCopy()) { + poison(); +} + +inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept { + unpoison(); + rep_ = rhs.rep_.SanitizerSafeCopy(); + poison(); + return *this; +} + +constexpr void InlineData::poison_this() { + if (!absl::is_constant_evaluated()) { + container_internal::SanitizerPoisonObject(this); + } +} + +constexpr void InlineData::unpoison() { + if (!absl::is_constant_evaluated()) { + container_internal::SanitizerUnpoisonObject(this); + } +} + +constexpr void InlineData::poison() { + if (!absl::is_constant_evaluated()) { + if (is_tree()) { + container_internal::SanitizerUnpoisonObject(this); + } else if (const size_t size = inline_size()) { + if (size < kMaxInline) { + const char* end = rep_.as_chars() + size; + container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size); + } + } else { + container_internal::SanitizerPoisonObject(this); + } + } +} + +#else // ABSL_INTERNAL_CORD_HAVE_SANITIZER + +constexpr InlineData::InlineData(const InlineData&) noexcept = default; +inline InlineData& InlineData::operator=(const InlineData&) noexcept = default; + +constexpr void InlineData::poison_this() {} +constexpr void InlineData::unpoison() {} +constexpr void InlineData::poison() {} + +#endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER + inline CordRepSubstring* CordRep::substring() { assert(IsSubstring()); return static_cast<CordRepSubstring*>(this); |