codecvt.h 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. // Locale support (codecvt) -*- C++ -*-
  2. // Copyright (C) 2000-2015 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /** @file bits/codecvt.h
  21. * This is an internal header file, included by other library headers.
  22. * Do not attempt to use it directly. @headername{locale}
  23. */
  24. //
  25. // ISO C++ 14882: 22.2.1.5 Template class codecvt
  26. //
  27. // Written by Benjamin Kosnik <bkoz@redhat.com>
  28. #ifndef _CODECVT_H
  29. #define _CODECVT_H 1
  30. #pragma GCC system_header
  31. namespace std _GLIBCXX_VISIBILITY(default)
  32. {
  33. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  34. /// Empty base class for codecvt facet [22.2.1.5].
  35. class codecvt_base
  36. {
  37. public:
  38. enum result
  39. {
  40. ok,
  41. partial,
  42. error,
  43. noconv
  44. };
  45. };
  46. /**
  47. * @brief Common base for codecvt functions.
  48. *
  49. * This template class provides implementations of the public functions
  50. * that forward to the protected virtual functions.
  51. *
  52. * This template also provides abstract stubs for the protected virtual
  53. * functions.
  54. */
  55. template<typename _InternT, typename _ExternT, typename _StateT>
  56. class __codecvt_abstract_base
  57. : public locale::facet, public codecvt_base
  58. {
  59. public:
  60. // Types:
  61. typedef codecvt_base::result result;
  62. typedef _InternT intern_type;
  63. typedef _ExternT extern_type;
  64. typedef _StateT state_type;
  65. // 22.2.1.5.1 codecvt members
  66. /**
  67. * @brief Convert from internal to external character set.
  68. *
  69. * Converts input string of intern_type to output string of
  70. * extern_type. This is analogous to wcsrtombs. It does this by
  71. * calling codecvt::do_out.
  72. *
  73. * The source and destination character sets are determined by the
  74. * facet's locale, internal and external types.
  75. *
  76. * The characters in [from,from_end) are converted and written to
  77. * [to,to_end). from_next and to_next are set to point to the
  78. * character following the last successfully converted character,
  79. * respectively. If the result needed no conversion, from_next and
  80. * to_next are not affected.
  81. *
  82. * The @a state argument should be initialized if the input is at the
  83. * beginning and carried from a previous call if continuing
  84. * conversion. There are no guarantees about how @a state is used.
  85. *
  86. * The result returned is a member of codecvt_base::result. If
  87. * all the input is converted, returns codecvt_base::ok. If no
  88. * conversion is necessary, returns codecvt_base::noconv. If
  89. * the input ends early or there is insufficient space in the
  90. * output, returns codecvt_base::partial. Otherwise the
  91. * conversion failed and codecvt_base::error is returned.
  92. *
  93. * @param __state Persistent conversion state data.
  94. * @param __from Start of input.
  95. * @param __from_end End of input.
  96. * @param __from_next Returns start of unconverted data.
  97. * @param __to Start of output buffer.
  98. * @param __to_end End of output buffer.
  99. * @param __to_next Returns start of unused output area.
  100. * @return codecvt_base::result.
  101. */
  102. result
  103. out(state_type& __state, const intern_type* __from,
  104. const intern_type* __from_end, const intern_type*& __from_next,
  105. extern_type* __to, extern_type* __to_end,
  106. extern_type*& __to_next) const
  107. {
  108. return this->do_out(__state, __from, __from_end, __from_next,
  109. __to, __to_end, __to_next);
  110. }
  111. /**
  112. * @brief Reset conversion state.
  113. *
  114. * Writes characters to output that would restore @a state to initial
  115. * conditions. The idea is that if a partial conversion occurs, then
  116. * the converting the characters written by this function would leave
  117. * the state in initial conditions, rather than partial conversion
  118. * state. It does this by calling codecvt::do_unshift().
  119. *
  120. * For example, if 4 external characters always converted to 1 internal
  121. * character, and input to in() had 6 external characters with state
  122. * saved, this function would write two characters to the output and
  123. * set the state to initialized conditions.
  124. *
  125. * The source and destination character sets are determined by the
  126. * facet's locale, internal and external types.
  127. *
  128. * The result returned is a member of codecvt_base::result. If the
  129. * state could be reset and data written, returns codecvt_base::ok. If
  130. * no conversion is necessary, returns codecvt_base::noconv. If the
  131. * output has insufficient space, returns codecvt_base::partial.
  132. * Otherwise the reset failed and codecvt_base::error is returned.
  133. *
  134. * @param __state Persistent conversion state data.
  135. * @param __to Start of output buffer.
  136. * @param __to_end End of output buffer.
  137. * @param __to_next Returns start of unused output area.
  138. * @return codecvt_base::result.
  139. */
  140. result
  141. unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
  142. extern_type*& __to_next) const
  143. { return this->do_unshift(__state, __to,__to_end,__to_next); }
  144. /**
  145. * @brief Convert from external to internal character set.
  146. *
  147. * Converts input string of extern_type to output string of
  148. * intern_type. This is analogous to mbsrtowcs. It does this by
  149. * calling codecvt::do_in.
  150. *
  151. * The source and destination character sets are determined by the
  152. * facet's locale, internal and external types.
  153. *
  154. * The characters in [from,from_end) are converted and written to
  155. * [to,to_end). from_next and to_next are set to point to the
  156. * character following the last successfully converted character,
  157. * respectively. If the result needed no conversion, from_next and
  158. * to_next are not affected.
  159. *
  160. * The @a state argument should be initialized if the input is at the
  161. * beginning and carried from a previous call if continuing
  162. * conversion. There are no guarantees about how @a state is used.
  163. *
  164. * The result returned is a member of codecvt_base::result. If
  165. * all the input is converted, returns codecvt_base::ok. If no
  166. * conversion is necessary, returns codecvt_base::noconv. If
  167. * the input ends early or there is insufficient space in the
  168. * output, returns codecvt_base::partial. Otherwise the
  169. * conversion failed and codecvt_base::error is returned.
  170. *
  171. * @param __state Persistent conversion state data.
  172. * @param __from Start of input.
  173. * @param __from_end End of input.
  174. * @param __from_next Returns start of unconverted data.
  175. * @param __to Start of output buffer.
  176. * @param __to_end End of output buffer.
  177. * @param __to_next Returns start of unused output area.
  178. * @return codecvt_base::result.
  179. */
  180. result
  181. in(state_type& __state, const extern_type* __from,
  182. const extern_type* __from_end, const extern_type*& __from_next,
  183. intern_type* __to, intern_type* __to_end,
  184. intern_type*& __to_next) const
  185. {
  186. return this->do_in(__state, __from, __from_end, __from_next,
  187. __to, __to_end, __to_next);
  188. }
  189. int
  190. encoding() const throw()
  191. { return this->do_encoding(); }
  192. bool
  193. always_noconv() const throw()
  194. { return this->do_always_noconv(); }
  195. int
  196. length(state_type& __state, const extern_type* __from,
  197. const extern_type* __end, size_t __max) const
  198. { return this->do_length(__state, __from, __end, __max); }
  199. int
  200. max_length() const throw()
  201. { return this->do_max_length(); }
  202. protected:
  203. explicit
  204. __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
  205. virtual
  206. ~__codecvt_abstract_base() { }
  207. /**
  208. * @brief Convert from internal to external character set.
  209. *
  210. * Converts input string of intern_type to output string of
  211. * extern_type. This function is a hook for derived classes to change
  212. * the value returned. @see out for more information.
  213. */
  214. virtual result
  215. do_out(state_type& __state, const intern_type* __from,
  216. const intern_type* __from_end, const intern_type*& __from_next,
  217. extern_type* __to, extern_type* __to_end,
  218. extern_type*& __to_next) const = 0;
  219. virtual result
  220. do_unshift(state_type& __state, extern_type* __to,
  221. extern_type* __to_end, extern_type*& __to_next) const = 0;
  222. virtual result
  223. do_in(state_type& __state, const extern_type* __from,
  224. const extern_type* __from_end, const extern_type*& __from_next,
  225. intern_type* __to, intern_type* __to_end,
  226. intern_type*& __to_next) const = 0;
  227. virtual int
  228. do_encoding() const throw() = 0;
  229. virtual bool
  230. do_always_noconv() const throw() = 0;
  231. virtual int
  232. do_length(state_type&, const extern_type* __from,
  233. const extern_type* __end, size_t __max) const = 0;
  234. virtual int
  235. do_max_length() const throw() = 0;
  236. };
  237. /**
  238. * @brief Primary class template codecvt.
  239. * @ingroup locales
  240. *
  241. * NB: Generic, mostly useless implementation.
  242. *
  243. */
  244. template<typename _InternT, typename _ExternT, typename _StateT>
  245. class codecvt
  246. : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
  247. {
  248. public:
  249. // Types:
  250. typedef codecvt_base::result result;
  251. typedef _InternT intern_type;
  252. typedef _ExternT extern_type;
  253. typedef _StateT state_type;
  254. protected:
  255. __c_locale _M_c_locale_codecvt;
  256. public:
  257. static locale::id id;
  258. explicit
  259. codecvt(size_t __refs = 0)
  260. : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs),
  261. _M_c_locale_codecvt(0)
  262. { }
  263. explicit
  264. codecvt(__c_locale __cloc, size_t __refs = 0);
  265. protected:
  266. virtual
  267. ~codecvt() { }
  268. virtual result
  269. do_out(state_type& __state, const intern_type* __from,
  270. const intern_type* __from_end, const intern_type*& __from_next,
  271. extern_type* __to, extern_type* __to_end,
  272. extern_type*& __to_next) const;
  273. virtual result
  274. do_unshift(state_type& __state, extern_type* __to,
  275. extern_type* __to_end, extern_type*& __to_next) const;
  276. virtual result
  277. do_in(state_type& __state, const extern_type* __from,
  278. const extern_type* __from_end, const extern_type*& __from_next,
  279. intern_type* __to, intern_type* __to_end,
  280. intern_type*& __to_next) const;
  281. virtual int
  282. do_encoding() const throw();
  283. virtual bool
  284. do_always_noconv() const throw();
  285. virtual int
  286. do_length(state_type&, const extern_type* __from,
  287. const extern_type* __end, size_t __max) const;
  288. virtual int
  289. do_max_length() const throw();
  290. };
  291. template<typename _InternT, typename _ExternT, typename _StateT>
  292. locale::id codecvt<_InternT, _ExternT, _StateT>::id;
  293. /// class codecvt<char, char, mbstate_t> specialization.
  294. template<>
  295. class codecvt<char, char, mbstate_t>
  296. : public __codecvt_abstract_base<char, char, mbstate_t>
  297. {
  298. friend class messages<char>;
  299. public:
  300. // Types:
  301. typedef char intern_type;
  302. typedef char extern_type;
  303. typedef mbstate_t state_type;
  304. protected:
  305. __c_locale _M_c_locale_codecvt;
  306. public:
  307. static locale::id id;
  308. explicit
  309. codecvt(size_t __refs = 0);
  310. explicit
  311. codecvt(__c_locale __cloc, size_t __refs = 0);
  312. protected:
  313. virtual
  314. ~codecvt();
  315. virtual result
  316. do_out(state_type& __state, const intern_type* __from,
  317. const intern_type* __from_end, const intern_type*& __from_next,
  318. extern_type* __to, extern_type* __to_end,
  319. extern_type*& __to_next) const;
  320. virtual result
  321. do_unshift(state_type& __state, extern_type* __to,
  322. extern_type* __to_end, extern_type*& __to_next) const;
  323. virtual result
  324. do_in(state_type& __state, const extern_type* __from,
  325. const extern_type* __from_end, const extern_type*& __from_next,
  326. intern_type* __to, intern_type* __to_end,
  327. intern_type*& __to_next) const;
  328. virtual int
  329. do_encoding() const throw();
  330. virtual bool
  331. do_always_noconv() const throw();
  332. virtual int
  333. do_length(state_type&, const extern_type* __from,
  334. const extern_type* __end, size_t __max) const;
  335. virtual int
  336. do_max_length() const throw();
  337. };
  338. #ifdef _GLIBCXX_USE_WCHAR_T
  339. /** @brief Class codecvt<wchar_t, char, mbstate_t> specialization.
  340. *
  341. * Converts between narrow and wide characters in the native character set
  342. */
  343. template<>
  344. class codecvt<wchar_t, char, mbstate_t>
  345. : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
  346. {
  347. friend class messages<wchar_t>;
  348. public:
  349. // Types:
  350. typedef wchar_t intern_type;
  351. typedef char extern_type;
  352. typedef mbstate_t state_type;
  353. protected:
  354. __c_locale _M_c_locale_codecvt;
  355. public:
  356. static locale::id id;
  357. explicit
  358. codecvt(size_t __refs = 0);
  359. explicit
  360. codecvt(__c_locale __cloc, size_t __refs = 0);
  361. protected:
  362. virtual
  363. ~codecvt();
  364. virtual result
  365. do_out(state_type& __state, const intern_type* __from,
  366. const intern_type* __from_end, const intern_type*& __from_next,
  367. extern_type* __to, extern_type* __to_end,
  368. extern_type*& __to_next) const;
  369. virtual result
  370. do_unshift(state_type& __state,
  371. extern_type* __to, extern_type* __to_end,
  372. extern_type*& __to_next) const;
  373. virtual result
  374. do_in(state_type& __state,
  375. const extern_type* __from, const extern_type* __from_end,
  376. const extern_type*& __from_next,
  377. intern_type* __to, intern_type* __to_end,
  378. intern_type*& __to_next) const;
  379. virtual
  380. int do_encoding() const throw();
  381. virtual
  382. bool do_always_noconv() const throw();
  383. virtual
  384. int do_length(state_type&, const extern_type* __from,
  385. const extern_type* __end, size_t __max) const;
  386. virtual int
  387. do_max_length() const throw();
  388. };
  389. #endif //_GLIBCXX_USE_WCHAR_T
  390. #if __cplusplus >= 201103L
  391. #ifdef _GLIBCXX_USE_C99_STDINT_TR1
  392. /** @brief Class codecvt<char16_t, char, mbstate_t> specialization.
  393. *
  394. * Converts between UTF-16 and UTF-8.
  395. */
  396. template<>
  397. class codecvt<char16_t, char, mbstate_t>
  398. : public __codecvt_abstract_base<char16_t, char, mbstate_t>
  399. {
  400. public:
  401. // Types:
  402. typedef char16_t intern_type;
  403. typedef char extern_type;
  404. typedef mbstate_t state_type;
  405. public:
  406. static locale::id id;
  407. explicit
  408. codecvt(size_t __refs = 0)
  409. : __codecvt_abstract_base<char16_t, char, mbstate_t>(__refs) { }
  410. protected:
  411. virtual
  412. ~codecvt();
  413. virtual result
  414. do_out(state_type& __state, const intern_type* __from,
  415. const intern_type* __from_end, const intern_type*& __from_next,
  416. extern_type* __to, extern_type* __to_end,
  417. extern_type*& __to_next) const;
  418. virtual result
  419. do_unshift(state_type& __state,
  420. extern_type* __to, extern_type* __to_end,
  421. extern_type*& __to_next) const;
  422. virtual result
  423. do_in(state_type& __state,
  424. const extern_type* __from, const extern_type* __from_end,
  425. const extern_type*& __from_next,
  426. intern_type* __to, intern_type* __to_end,
  427. intern_type*& __to_next) const;
  428. virtual
  429. int do_encoding() const throw();
  430. virtual
  431. bool do_always_noconv() const throw();
  432. virtual
  433. int do_length(state_type&, const extern_type* __from,
  434. const extern_type* __end, size_t __max) const;
  435. virtual int
  436. do_max_length() const throw();
  437. };
  438. /** @brief Class codecvt<char32_t, char, mbstate_t> specialization.
  439. *
  440. * Converts between UTF-32 and UTF-8.
  441. */
  442. template<>
  443. class codecvt<char32_t, char, mbstate_t>
  444. : public __codecvt_abstract_base<char32_t, char, mbstate_t>
  445. {
  446. public:
  447. // Types:
  448. typedef char32_t intern_type;
  449. typedef char extern_type;
  450. typedef mbstate_t state_type;
  451. public:
  452. static locale::id id;
  453. explicit
  454. codecvt(size_t __refs = 0)
  455. : __codecvt_abstract_base<char32_t, char, mbstate_t>(__refs) { }
  456. protected:
  457. virtual
  458. ~codecvt();
  459. virtual result
  460. do_out(state_type& __state, const intern_type* __from,
  461. const intern_type* __from_end, const intern_type*& __from_next,
  462. extern_type* __to, extern_type* __to_end,
  463. extern_type*& __to_next) const;
  464. virtual result
  465. do_unshift(state_type& __state,
  466. extern_type* __to, extern_type* __to_end,
  467. extern_type*& __to_next) const;
  468. virtual result
  469. do_in(state_type& __state,
  470. const extern_type* __from, const extern_type* __from_end,
  471. const extern_type*& __from_next,
  472. intern_type* __to, intern_type* __to_end,
  473. intern_type*& __to_next) const;
  474. virtual
  475. int do_encoding() const throw();
  476. virtual
  477. bool do_always_noconv() const throw();
  478. virtual
  479. int do_length(state_type&, const extern_type* __from,
  480. const extern_type* __end, size_t __max) const;
  481. virtual int
  482. do_max_length() const throw();
  483. };
  484. #endif // _GLIBCXX_USE_C99_STDINT_TR1
  485. #endif // C++11
  486. /// class codecvt_byname [22.2.1.6].
  487. template<typename _InternT, typename _ExternT, typename _StateT>
  488. class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
  489. {
  490. public:
  491. explicit
  492. codecvt_byname(const char* __s, size_t __refs = 0)
  493. : codecvt<_InternT, _ExternT, _StateT>(__refs)
  494. {
  495. if (__builtin_strcmp(__s, "C") != 0
  496. && __builtin_strcmp(__s, "POSIX") != 0)
  497. {
  498. this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
  499. this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
  500. }
  501. }
  502. #if __cplusplus >= 201103L
  503. explicit
  504. codecvt_byname(const string& __s, size_t __refs = 0)
  505. : codecvt_byname(__s.c_str(), __refs) { }
  506. #endif
  507. protected:
  508. virtual
  509. ~codecvt_byname() { }
  510. };
  511. #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1)
  512. template<>
  513. class codecvt_byname<char16_t, char, mbstate_t>
  514. : public codecvt<char16_t, char, mbstate_t>
  515. {
  516. public:
  517. explicit
  518. codecvt_byname(const char* __s, size_t __refs = 0)
  519. : codecvt<char16_t, char, mbstate_t>(__refs) { }
  520. explicit
  521. codecvt_byname(const string& __s, size_t __refs = 0)
  522. : codecvt_byname(__s.c_str(), __refs) { }
  523. protected:
  524. virtual
  525. ~codecvt_byname() { }
  526. };
  527. template<>
  528. class codecvt_byname<char32_t, char, mbstate_t>
  529. : public codecvt<char32_t, char, mbstate_t>
  530. {
  531. public:
  532. explicit
  533. codecvt_byname(const char* __s, size_t __refs = 0)
  534. : codecvt<char32_t, char, mbstate_t>(__refs) { }
  535. explicit
  536. codecvt_byname(const string& __s, size_t __refs = 0)
  537. : codecvt_byname(__s.c_str(), __refs) { }
  538. protected:
  539. virtual
  540. ~codecvt_byname() { }
  541. };
  542. #endif
  543. // Inhibit implicit instantiations for required instantiations,
  544. // which are defined via explicit instantiations elsewhere.
  545. #if _GLIBCXX_EXTERN_TEMPLATE
  546. extern template class codecvt_byname<char, char, mbstate_t>;
  547. extern template
  548. const codecvt<char, char, mbstate_t>&
  549. use_facet<codecvt<char, char, mbstate_t> >(const locale&);
  550. extern template
  551. bool
  552. has_facet<codecvt<char, char, mbstate_t> >(const locale&);
  553. #ifdef _GLIBCXX_USE_WCHAR_T
  554. extern template class codecvt_byname<wchar_t, char, mbstate_t>;
  555. extern template
  556. const codecvt<wchar_t, char, mbstate_t>&
  557. use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
  558. extern template
  559. bool
  560. has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
  561. #endif
  562. #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1)
  563. extern template class codecvt_byname<char16_t, char, mbstate_t>;
  564. extern template class codecvt_byname<char32_t, char, mbstate_t>;
  565. #endif
  566. #endif
  567. _GLIBCXX_END_NAMESPACE_VERSION
  568. } // namespace std
  569. #endif // _CODECVT_H