1 files changed, 145 insertions, 8 deletions
diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc
index c0a9a28e..eeb21081 100644
--- a/absl/strings/internal/str_format/arg.cc
+++ b/absl/strings/internal/str_format/arg.cc
@@ -18,15 +18,28 @@
 //
 #include "absl/strings/internal/str_format/arg.h"
 
+#include <algorithm>
 #include <cassert>
-#include <cerrno>
+#include <cstddef>
+#include <cstdint>
 #include <cstdlib>
+#include <cstring>
+#include <cwchar>
 #include <string>
 #include <type_traits>
 
-#include "absl/base/port.h"
+#include "absl/base/config.h"
+#include "absl/base/optimization.h"
+#include "absl/container/fixed_array.h"
+#include "absl/numeric/int128.h"
+#include "absl/strings/internal/str_format/extension.h"
 #include "absl/strings/internal/str_format/float_conversion.h"
 #include "absl/strings/numbers.h"
+#include "absl/strings/string_view.h"
+
+#if defined(ABSL_HAVE_STD_STRING_VIEW)
+#include <string_view>
+#endif
 
 namespace absl {
 ABSL_NAMESPACE_BEGIN
@@ -298,6 +311,83 @@ inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv,
                                conv.has_left_flag());
 }
 
+struct ShiftState {
+  bool saw_high_surrogate = false;
+  uint8_t bits = 0;
+};
+
+// Converts `v` from UTF-16 or UTF-32 to UTF-8 and writes to `buf`. `buf` is
+// assumed to have enough space for the output. `s` is used to carry state
+// between successive calls with a UTF-16 surrogate pair. Returns the number of
+// chars written, or `static_cast<size_t>(-1)` on failure.
+//
+// This is basically std::wcrtomb(), but always outputting UTF-8 instead of
+// respecting the current locale.
+inline size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s) {
+  const auto v = static_cast<uint32_t>(wc);
+  if (v < 0x80) {
+    *buf = static_cast<char>(v);
+    return 1;
+  } else if (v < 0x800) {
+    *buf++ = static_cast<char>(0xc0 | (v >> 6));
+    *buf = static_cast<char>(0x80 | (v & 0x3f));
+    return 2;
+  } else if (v < 0xd800 || (v - 0xe000) < 0x2000) {
+    *buf++ = static_cast<char>(0xe0 | (v >> 12));
+    *buf++ = static_cast<char>(0x80 | ((v >> 6) & 0x3f));
+    *buf = static_cast<char>(0x80 | (v & 0x3f));
+    return 3;
+  } else if ((v - 0x10000) < 0x100000) {
+    *buf++ = static_cast<char>(0xf0 | (v >> 18));
+    *buf++ = static_cast<char>(0x80 | ((v >> 12) & 0x3f));
+    *buf++ = static_cast<char>(0x80 | ((v >> 6) & 0x3f));
+    *buf = static_cast<char>(0x80 | (v & 0x3f));
+    return 4;
+  } else if (v < 0xdc00) {
+    s.saw_high_surrogate = true;
+    s.bits = static_cast<uint8_t>(v & 0x3);
+    const uint8_t high_bits = ((v >> 6) & 0xf) + 1;
+    *buf++ = static_cast<char>(0xf0 | (high_bits >> 2));
+    *buf =
+        static_cast<char>(0x80 | static_cast<uint8_t>((high_bits & 0x3) << 4) |
+                          static_cast<uint8_t>((v >> 2) & 0xf));
+    return 2;
+  } else if (v < 0xe000 && s.saw_high_surrogate) {
+    *buf++ = static_cast<char>(0x80 | static_cast<uint8_t>(s.bits << 4) |
+                               static_cast<uint8_t>((v >> 6) & 0xf));
+    *buf = static_cast<char>(0x80 | (v & 0x3f));
+    s.saw_high_surrogate = false;
+    s.bits = 0;
+    return 2;
+  } else {
+    return static_cast<size_t>(-1);
+  }
+}
+
+inline bool ConvertStringArg(const wchar_t *v,
+                             size_t len,
+                             const FormatConversionSpecImpl conv,
+                             FormatSinkImpl *sink) {
+  FixedArray<char> mb(len * 4);
+  ShiftState s;
+  size_t chars_written = 0;
+  for (size_t i = 0; i < len; ++i) {
+    const size_t chars = WideToUtf8(v[i], &mb[chars_written], s);
+    if (chars == static_cast<size_t>(-1)) { return false; }
+    chars_written += chars;
+  }
+  return ConvertStringArg(string_view(mb.data(), chars_written), conv, sink);
+}
+
+bool ConvertWCharTImpl(wchar_t v, const FormatConversionSpecImpl conv,
+                       FormatSinkImpl *sink) {
+  char mb[4];
+  ShiftState s;
+  const size_t chars_written = WideToUtf8(v, mb, s);
+  return chars_written != static_cast<size_t>(-1) && !s.saw_high_surrogate &&
+         ConvertStringArg(string_view(mb, chars_written), conv, sink);
+}
+
 }  // namespace
 
 bool ConvertBoolArg(bool v, FormatSinkImpl *sink) {
@@ -316,11 +406,14 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
 
   // This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes
   // it to complain about a switch/case type mismatch, even though both are
-  // FormatConverionChar.  Likely this is because at this point
+  // FormatConversionChar.  Likely this is because at this point
   // FormatConversionChar is declared, but not defined.
   switch (static_cast<uint8_t>(conv.conversion_char())) {
     case static_cast<uint8_t>(FormatConversionCharInternal::c):
-      return ConvertCharImpl(static_cast<char>(v), conv, sink);
+      return (std::is_same<T, wchar_t>::value ||
+              (conv.length_mod() == LengthMod::l))
+                 ? ConvertWCharTImpl(static_cast<wchar_t>(v), conv, sink)
+                 : ConvertCharImpl(static_cast<char>(v), conv, sink);
 
     case static_cast<uint8_t>(FormatConversionCharInternal::o):
       as_digits.PrintAsOct(static_cast<U>(v));
@@ -372,6 +465,8 @@ template bool ConvertIntArg<signed char>(signed char v,
 template bool ConvertIntArg<unsigned char>(unsigned char v,
                                            FormatConversionSpecImpl conv,
                                            FormatSinkImpl *sink);
+template bool ConvertIntArg<wchar_t>(wchar_t v, FormatConversionSpecImpl conv,
+                                     FormatSinkImpl *sink);
 template bool ConvertIntArg<short>(short v,  // NOLINT
                                    FormatConversionSpecImpl conv,
                                    FormatSinkImpl *sink);
@@ -403,16 +498,29 @@ StringConvertResult FormatConvertImpl(const std::string &v,
   return {ConvertStringArg(v, conv, sink)};
 }
 
+StringConvertResult FormatConvertImpl(const std::wstring &v,
+                                      const FormatConversionSpecImpl conv,
+                                      FormatSinkImpl *sink) {
+  return {ConvertStringArg(v.data(), v.size(), conv, sink)};
+}
+
 StringConvertResult FormatConvertImpl(string_view v,
                                       const FormatConversionSpecImpl conv,
                                       FormatSinkImpl *sink) {
   return {ConvertStringArg(v, conv, sink)};
 }
 
-ArgConvertResult<FormatConversionCharSetUnion(
-    FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)>
-FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv,
-                  FormatSinkImpl *sink) {
+#if defined(ABSL_HAVE_STD_STRING_VIEW)
+StringConvertResult FormatConvertImpl(std::wstring_view v,
+                                      const FormatConversionSpecImpl conv,
+                                      FormatSinkImpl* sink) {
+  return {ConvertStringArg(v.data(), v.size(), conv, sink)};
+}
+#endif
+
+StringPtrConvertResult FormatConvertImpl(const char* v,
+                                         const FormatConversionSpecImpl conv,
+                                         FormatSinkImpl* sink) {
   if (conv.conversion_char() == FormatConversionCharInternal::p)
     return {FormatConvertImpl(VoidPtr(v), conv, sink).value};
   size_t len;
@@ -427,6 +535,30 @@ FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv,
   return {ConvertStringArg(string_view(v, len), conv, sink)};
 }
 
+StringPtrConvertResult FormatConvertImpl(const wchar_t* v,
+                                         const FormatConversionSpecImpl conv,
+                                         FormatSinkImpl* sink) {
+  if (conv.conversion_char() == FormatConversionCharInternal::p) {
+    return {FormatConvertImpl(VoidPtr(v), conv, sink).value};
+  }
+  size_t len;
+  if (v == nullptr) {
+    len = 0;
+  } else if (conv.precision() < 0) {
+    len = std::wcslen(v);
+  } else {
+    // If precision is set, we look for the NUL-terminator on the valid range.
+    len = static_cast<size_t>(std::find(v, v + conv.precision(), L'\0') - v);
+  }
+  return {ConvertStringArg(v, len, conv, sink)};
+}
+
+StringPtrConvertResult FormatConvertImpl(std::nullptr_t,
+                                         const FormatConversionSpecImpl conv,
+                                         FormatSinkImpl* sink) {
+  return FormatConvertImpl(static_cast<const char*>(nullptr), conv, sink);
+}
+
 // ==================== Raw pointers ====================
 ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
     VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
@@ -461,6 +593,11 @@ CharConvertResult FormatConvertImpl(char v, const FormatConversionSpecImpl conv,
                                     FormatSinkImpl *sink) {
   return {ConvertIntArg(v, conv, sink)};
 }
+CharConvertResult FormatConvertImpl(wchar_t v,
+                                    const FormatConversionSpecImpl conv,
+                                    FormatSinkImpl* sink) {
+  return {ConvertIntArg(v, conv, sink)};
+}
 
 // ==================== Ints ====================
 IntegralConvertResult FormatConvertImpl(signed char v,