winapi_utf8
convert.cpp
1 // Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com>
2 // This file is part of the "winapi-utf8" project.
3 // For details, see https://github.com/egor-tensin/winapi-utf8.
4 // Distributed under the MIT License.
5 
6 #include <winapi/utf8.hpp>
7 
8 #include <windows.h>
9 
10 #include <cstddef>
11 #include <cstdint>
12 #include <sstream>
13 #include <stdexcept>
14 #include <string>
15 #include <vector>
16 
17 namespace winapi {
18 namespace {
19 
20 std::runtime_error error(const char* function, DWORD code) {
21  std::ostringstream oss;
22  oss << "Function " << function << " failed with error code " << code;
23  return std::runtime_error{oss.str()};
24 }
25 
26 bool size_t_to_int(std::size_t src, int32_t& dest) {
27  if (src > static_cast<uint32_t>(INT32_MAX))
28  return false;
29  dest = static_cast<int32_t>(src);
30  return true;
31 }
32 
33 bool int_to_size_t(int32_t src, std::size_t& dest) {
34  if (src < 0 || static_cast<uint32_t>(src) > SIZE_MAX)
35  return false;
36  dest = static_cast<std::size_t>(src);
37  return true;
38 }
39 
40 int32_t convert_input_bytes_to_bytes(std::size_t nb) {
41  int32_t real_nb = 0;
42 
43  if (!size_t_to_int(nb, real_nb)) {
44  std::ostringstream oss;
45  oss << "Input buffer is too large at " << nb << " bytes";
46  throw std::runtime_error{oss.str()};
47  }
48 
49  return real_nb;
50 }
51 
52 int32_t convert_input_bytes_to_chars(std::size_t nb) {
53  if (nb % sizeof(WCHAR) != 0) {
54  std::ostringstream oss;
55  oss << "Buffer size invalid at " << nb << " bytes";
56  throw std::runtime_error{oss.str()};
57  }
58 
59  const std::size_t nch = nb / sizeof(WCHAR);
60  int32_t real_nch = 0;
61 
62  if (!size_t_to_int(nch, real_nch)) {
63  std::ostringstream oss;
64  oss << "Input buffer is too large at " << nch << " characters";
65  throw std::runtime_error{oss.str()};
66  }
67 
68  return real_nch;
69 }
70 
71 template <typename CharT>
72 std::vector<CharT> output_buffer(int32_t size) {
73  std::size_t real_size = 0;
74 
75  if (!int_to_size_t(size, real_size)) {
76  std::ostringstream oss;
77  oss << "Buffer size invalid at " << size << " bytes";
78  throw std::runtime_error{oss.str()};
79  }
80 
81  std::vector<CharT> buffer;
82  buffer.resize(real_size);
83  return buffer;
84 }
85 
86 template <typename CharT>
87 void verify_output(const std::vector<CharT>& expected, int32_t _actual_size) {
88  std::size_t actual_size = 0;
89 
90  if (!int_to_size_t(_actual_size, actual_size) || expected.size() != actual_size) {
91  std::ostringstream oss;
92  oss << "Expected output length " << expected.size() << ", got " << _actual_size;
93  throw std::runtime_error{oss.str()};
94  }
95 }
96 
97 } // namespace
98 
99 std::wstring widen(const std::string& src) {
100  return widen(src.c_str(), src.size());
101 }
102 
103 std::wstring widen(const void* src, std::size_t in_nb) {
104  const DWORD flags = MB_ERR_INVALID_CHARS;
105 
106  const auto in_data = reinterpret_cast<const char*>(src);
107  const auto real_in_nb = convert_input_bytes_to_bytes(in_nb);
108 
109  auto out_nch = ::MultiByteToWideChar(CP_UTF8, flags, in_data, real_in_nb, NULL, 0);
110 
111  if (out_nch == 0) {
112  throw error("MultiByteToWideChar", GetLastError());
113  }
114 
115  static_assert(sizeof(wchar_t) == sizeof(WCHAR), "wchar_t != WCHAR");
116  auto out = output_buffer<wchar_t>(out_nch);
117 
118  out_nch = ::MultiByteToWideChar(CP_UTF8, flags, in_data, real_in_nb, out.data(), out_nch);
119 
120  if (out_nch == 0) {
121  throw error("MultiByteToWideChar", GetLastError());
122  }
123 
124  verify_output(out, out_nch);
125  return {out.data(), out.size()};
126 }
127 
128 std::string narrow(const std::wstring& src) {
129  static_assert(sizeof(wchar_t) == sizeof(WCHAR), "wchar_t != WCHAR");
130  return narrow(src.c_str(), src.size() * sizeof(std::wstring::value_type));
131 }
132 
133 std::string narrow(const std::u16string& src) {
134  return narrow(src.c_str(), src.size() * sizeof(std::u16string::value_type));
135 }
136 
137 std::string narrow(const void* src, std::size_t in_nb) {
138  const DWORD flags = WC_ERR_INVALID_CHARS;
139 
140  const auto in_data = reinterpret_cast<const wchar_t*>(src);
141  const auto in_nch = convert_input_bytes_to_chars(in_nb);
142 
143  auto out_nb = ::WideCharToMultiByte(CP_UTF8, flags, in_data, in_nch, NULL, 0, NULL, NULL);
144 
145  if (out_nb == 0) {
146  throw error("WideCharToMultiByte", GetLastError());
147  }
148 
149  auto out = output_buffer<char>(out_nb);
150 
151  out_nb = ::WideCharToMultiByte(CP_UTF8, flags, in_data, in_nch, out.data(), out_nb, NULL, NULL);
152 
153  if (out_nb == 0) {
154  throw error("WideCharToMultiByte", GetLastError());
155  }
156 
157  verify_output(out, out_nb);
158  return {out.data(), out.size()};
159 }
160 
161 } // namespace winapi
UTF-8 <-> UTF-16 conversion functions.
std::wstring widen(const std::string &)
Definition: convert.cpp:99
std::string narrow(const std::wstring &)
Definition: convert.cpp:128