libstdc++
|
00001 // Locale support (codecvt) -*- C++ -*- 00002 00003 // Copyright (C) 2000-2013 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 // 00026 // ISO C++ 14882: 22.2.1.5 Template class codecvt 00027 // 00028 00029 // Written by Benjamin Kosnik <bkoz@redhat.com> 00030 00031 /** @file ext/codecvt_specializations.h 00032 * This file is a GNU extension to the Standard C++ Library. 00033 */ 00034 00035 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H 00036 #define _EXT_CODECVT_SPECIALIZATIONS_H 1 00037 00038 #include <bits/c++config.h> 00039 #include <locale> 00040 #include <iconv.h> 00041 00042 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default) 00043 { 00044 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00045 00046 /// Extension to use iconv for dealing with character encodings. 00047 // This includes conversions and comparisons between various character 00048 // sets. This object encapsulates data that may need to be shared between 00049 // char_traits, codecvt and ctype. 00050 class encoding_state 00051 { 00052 public: 00053 // Types: 00054 // NB: A conversion descriptor subsumes and enhances the 00055 // functionality of a simple state type such as mbstate_t. 00056 typedef iconv_t descriptor_type; 00057 00058 protected: 00059 // Name of internal character set encoding. 00060 std::string _M_int_enc; 00061 00062 // Name of external character set encoding. 00063 std::string _M_ext_enc; 00064 00065 // Conversion descriptor between external encoding to internal encoding. 00066 descriptor_type _M_in_desc; 00067 00068 // Conversion descriptor between internal encoding to external encoding. 00069 descriptor_type _M_out_desc; 00070 00071 // The byte-order marker for the external encoding, if necessary. 00072 int _M_ext_bom; 00073 00074 // The byte-order marker for the internal encoding, if necessary. 00075 int _M_int_bom; 00076 00077 // Number of external bytes needed to construct one complete 00078 // character in the internal encoding. 00079 // NB: -1 indicates variable, or stateful, encodings. 00080 int _M_bytes; 00081 00082 public: 00083 explicit 00084 encoding_state() 00085 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0) 00086 { } 00087 00088 explicit 00089 encoding_state(const char* __int, const char* __ext, 00090 int __ibom = 0, int __ebom = 0, int __bytes = 1) 00091 : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), 00092 _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes) 00093 { init(); } 00094 00095 // 21.1.2 traits typedefs 00096 // p4 00097 // typedef STATE_T state_type 00098 // requires: state_type shall meet the requirements of 00099 // CopyConstructible types (20.1.3) 00100 // NB: This does not preserve the actual state of the conversion 00101 // descriptor member, but it does duplicate the encoding 00102 // information. 00103 encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0) 00104 { construct(__obj); } 00105 00106 // Need assignment operator as well. 00107 encoding_state& 00108 operator=(const encoding_state& __obj) 00109 { 00110 construct(__obj); 00111 return *this; 00112 } 00113 00114 ~encoding_state() 00115 { destroy(); } 00116 00117 bool 00118 good() const throw() 00119 { 00120 const descriptor_type __err = (iconv_t)(-1); 00121 bool __test = _M_in_desc && _M_in_desc != __err; 00122 __test &= _M_out_desc && _M_out_desc != __err; 00123 return __test; 00124 } 00125 00126 int 00127 character_ratio() const 00128 { return _M_bytes; } 00129 00130 const std::string 00131 internal_encoding() const 00132 { return _M_int_enc; } 00133 00134 int 00135 internal_bom() const 00136 { return _M_int_bom; } 00137 00138 const std::string 00139 external_encoding() const 00140 { return _M_ext_enc; } 00141 00142 int 00143 external_bom() const 00144 { return _M_ext_bom; } 00145 00146 const descriptor_type& 00147 in_descriptor() const 00148 { return _M_in_desc; } 00149 00150 const descriptor_type& 00151 out_descriptor() const 00152 { return _M_out_desc; } 00153 00154 protected: 00155 void 00156 init() 00157 { 00158 const descriptor_type __err = (iconv_t)(-1); 00159 const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size(); 00160 if (!_M_in_desc && __have_encodings) 00161 { 00162 _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str()); 00163 if (_M_in_desc == __err) 00164 std::__throw_runtime_error(__N("encoding_state::_M_init " 00165 "creating iconv input descriptor failed")); 00166 } 00167 if (!_M_out_desc && __have_encodings) 00168 { 00169 _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str()); 00170 if (_M_out_desc == __err) 00171 std::__throw_runtime_error(__N("encoding_state::_M_init " 00172 "creating iconv output descriptor failed")); 00173 } 00174 } 00175 00176 void 00177 construct(const encoding_state& __obj) 00178 { 00179 destroy(); 00180 _M_int_enc = __obj._M_int_enc; 00181 _M_ext_enc = __obj._M_ext_enc; 00182 _M_ext_bom = __obj._M_ext_bom; 00183 _M_int_bom = __obj._M_int_bom; 00184 _M_bytes = __obj._M_bytes; 00185 init(); 00186 } 00187 00188 void 00189 destroy() throw() 00190 { 00191 const descriptor_type __err = (iconv_t)(-1); 00192 if (_M_in_desc && _M_in_desc != __err) 00193 { 00194 iconv_close(_M_in_desc); 00195 _M_in_desc = 0; 00196 } 00197 if (_M_out_desc && _M_out_desc != __err) 00198 { 00199 iconv_close(_M_out_desc); 00200 _M_out_desc = 0; 00201 } 00202 } 00203 }; 00204 00205 /// encoding_char_traits 00206 // Custom traits type with encoding_state for the state type, and the 00207 // associated fpos<encoding_state> for the position type, all other 00208 // bits equivalent to the required char_traits instantiations. 00209 template<typename _CharT> 00210 struct encoding_char_traits : public std::char_traits<_CharT> 00211 { 00212 typedef encoding_state state_type; 00213 typedef typename std::fpos<state_type> pos_type; 00214 }; 00215 00216 _GLIBCXX_END_NAMESPACE_VERSION 00217 } // namespace 00218 00219 00220 namespace std _GLIBCXX_VISIBILITY(default) 00221 { 00222 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00223 00224 using __gnu_cxx::encoding_state; 00225 00226 /// codecvt<InternT, _ExternT, encoding_state> specialization. 00227 // This partial specialization takes advantage of iconv to provide 00228 // code conversions between a large number of character encodings. 00229 template<typename _InternT, typename _ExternT> 00230 class codecvt<_InternT, _ExternT, encoding_state> 00231 : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state> 00232 { 00233 public: 00234 // Types: 00235 typedef codecvt_base::result result; 00236 typedef _InternT intern_type; 00237 typedef _ExternT extern_type; 00238 typedef __gnu_cxx::encoding_state state_type; 00239 typedef state_type::descriptor_type descriptor_type; 00240 00241 // Data Members: 00242 static locale::id id; 00243 00244 explicit 00245 codecvt(size_t __refs = 0) 00246 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00247 { } 00248 00249 explicit 00250 codecvt(state_type& __enc, size_t __refs = 0) 00251 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00252 { } 00253 00254 protected: 00255 virtual 00256 ~codecvt() { } 00257 00258 virtual result 00259 do_out(state_type& __state, const intern_type* __from, 00260 const intern_type* __from_end, const intern_type*& __from_next, 00261 extern_type* __to, extern_type* __to_end, 00262 extern_type*& __to_next) const; 00263 00264 virtual result 00265 do_unshift(state_type& __state, extern_type* __to, 00266 extern_type* __to_end, extern_type*& __to_next) const; 00267 00268 virtual result 00269 do_in(state_type& __state, const extern_type* __from, 00270 const extern_type* __from_end, const extern_type*& __from_next, 00271 intern_type* __to, intern_type* __to_end, 00272 intern_type*& __to_next) const; 00273 00274 virtual int 00275 do_encoding() const throw(); 00276 00277 virtual bool 00278 do_always_noconv() const throw(); 00279 00280 virtual int 00281 do_length(state_type&, const extern_type* __from, 00282 const extern_type* __end, size_t __max) const; 00283 00284 virtual int 00285 do_max_length() const throw(); 00286 }; 00287 00288 template<typename _InternT, typename _ExternT> 00289 locale::id 00290 codecvt<_InternT, _ExternT, encoding_state>::id; 00291 00292 // This adaptor works around the signature problems of the second 00293 // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 00294 // uses 'char**', which matches the POSIX 1003.1-2001 standard. 00295 // Using this adaptor, g++ will do the work for us. 00296 template<typename _Tp> 00297 inline size_t 00298 __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*), 00299 iconv_t __cd, char** __inbuf, size_t* __inbytes, 00300 char** __outbuf, size_t* __outbytes) 00301 { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); } 00302 00303 template<typename _InternT, typename _ExternT> 00304 codecvt_base::result 00305 codecvt<_InternT, _ExternT, encoding_state>:: 00306 do_out(state_type& __state, const intern_type* __from, 00307 const intern_type* __from_end, const intern_type*& __from_next, 00308 extern_type* __to, extern_type* __to_end, 00309 extern_type*& __to_next) const 00310 { 00311 result __ret = codecvt_base::error; 00312 if (__state.good()) 00313 { 00314 const descriptor_type& __desc = __state.out_descriptor(); 00315 const size_t __fmultiple = sizeof(intern_type); 00316 size_t __fbytes = __fmultiple * (__from_end - __from); 00317 const size_t __tmultiple = sizeof(extern_type); 00318 size_t __tbytes = __tmultiple * (__to_end - __to); 00319 00320 // Argument list for iconv specifies a byte sequence. Thus, 00321 // all to/from arrays must be brutally casted to char*. 00322 char* __cto = reinterpret_cast<char*>(__to); 00323 char* __cfrom; 00324 size_t __conv; 00325 00326 // Some encodings need a byte order marker as the first item 00327 // in the byte stream, to designate endian-ness. The default 00328 // value for the byte order marker is NULL, so if this is 00329 // the case, it's not necessary and we can just go on our 00330 // merry way. 00331 int __int_bom = __state.internal_bom(); 00332 if (__int_bom) 00333 { 00334 size_t __size = __from_end - __from; 00335 intern_type* __cfixed = static_cast<intern_type*> 00336 (__builtin_alloca(sizeof(intern_type) * (__size + 1))); 00337 __cfixed[0] = static_cast<intern_type>(__int_bom); 00338 char_traits<intern_type>::copy(__cfixed + 1, __from, __size); 00339 __cfrom = reinterpret_cast<char*>(__cfixed); 00340 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00341 &__fbytes, &__cto, &__tbytes); 00342 } 00343 else 00344 { 00345 intern_type* __cfixed = const_cast<intern_type*>(__from); 00346 __cfrom = reinterpret_cast<char*>(__cfixed); 00347 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, 00348 &__cto, &__tbytes); 00349 } 00350 00351 if (__conv != size_t(-1)) 00352 { 00353 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00354 __to_next = reinterpret_cast<extern_type*>(__cto); 00355 __ret = codecvt_base::ok; 00356 } 00357 else 00358 { 00359 if (__fbytes < __fmultiple * (__from_end - __from)) 00360 { 00361 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00362 __to_next = reinterpret_cast<extern_type*>(__cto); 00363 __ret = codecvt_base::partial; 00364 } 00365 else 00366 __ret = codecvt_base::error; 00367 } 00368 } 00369 return __ret; 00370 } 00371 00372 template<typename _InternT, typename _ExternT> 00373 codecvt_base::result 00374 codecvt<_InternT, _ExternT, encoding_state>:: 00375 do_unshift(state_type& __state, extern_type* __to, 00376 extern_type* __to_end, extern_type*& __to_next) const 00377 { 00378 result __ret = codecvt_base::error; 00379 if (__state.good()) 00380 { 00381 const descriptor_type& __desc = __state.in_descriptor(); 00382 const size_t __tmultiple = sizeof(intern_type); 00383 size_t __tlen = __tmultiple * (__to_end - __to); 00384 00385 // Argument list for iconv specifies a byte sequence. Thus, 00386 // all to/from arrays must be brutally casted to char*. 00387 char* __cto = reinterpret_cast<char*>(__to); 00388 size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0, 00389 &__cto, &__tlen); 00390 00391 if (__conv != size_t(-1)) 00392 { 00393 __to_next = reinterpret_cast<extern_type*>(__cto); 00394 if (__tlen == __tmultiple * (__to_end - __to)) 00395 __ret = codecvt_base::noconv; 00396 else if (__tlen == 0) 00397 __ret = codecvt_base::ok; 00398 else 00399 __ret = codecvt_base::partial; 00400 } 00401 else 00402 __ret = codecvt_base::error; 00403 } 00404 return __ret; 00405 } 00406 00407 template<typename _InternT, typename _ExternT> 00408 codecvt_base::result 00409 codecvt<_InternT, _ExternT, encoding_state>:: 00410 do_in(state_type& __state, const extern_type* __from, 00411 const extern_type* __from_end, const extern_type*& __from_next, 00412 intern_type* __to, intern_type* __to_end, 00413 intern_type*& __to_next) const 00414 { 00415 result __ret = codecvt_base::error; 00416 if (__state.good()) 00417 { 00418 const descriptor_type& __desc = __state.in_descriptor(); 00419 const size_t __fmultiple = sizeof(extern_type); 00420 size_t __flen = __fmultiple * (__from_end - __from); 00421 const size_t __tmultiple = sizeof(intern_type); 00422 size_t __tlen = __tmultiple * (__to_end - __to); 00423 00424 // Argument list for iconv specifies a byte sequence. Thus, 00425 // all to/from arrays must be brutally casted to char*. 00426 char* __cto = reinterpret_cast<char*>(__to); 00427 char* __cfrom; 00428 size_t __conv; 00429 00430 // Some encodings need a byte order marker as the first item 00431 // in the byte stream, to designate endian-ness. The default 00432 // value for the byte order marker is NULL, so if this is 00433 // the case, it's not necessary and we can just go on our 00434 // merry way. 00435 int __ext_bom = __state.external_bom(); 00436 if (__ext_bom) 00437 { 00438 size_t __size = __from_end - __from; 00439 extern_type* __cfixed = static_cast<extern_type*> 00440 (__builtin_alloca(sizeof(extern_type) * (__size + 1))); 00441 __cfixed[0] = static_cast<extern_type>(__ext_bom); 00442 char_traits<extern_type>::copy(__cfixed + 1, __from, __size); 00443 __cfrom = reinterpret_cast<char*>(__cfixed); 00444 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00445 &__flen, &__cto, &__tlen); 00446 } 00447 else 00448 { 00449 extern_type* __cfixed = const_cast<extern_type*>(__from); 00450 __cfrom = reinterpret_cast<char*>(__cfixed); 00451 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00452 &__flen, &__cto, &__tlen); 00453 } 00454 00455 00456 if (__conv != size_t(-1)) 00457 { 00458 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00459 __to_next = reinterpret_cast<intern_type*>(__cto); 00460 __ret = codecvt_base::ok; 00461 } 00462 else 00463 { 00464 if (__flen < static_cast<size_t>(__from_end - __from)) 00465 { 00466 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00467 __to_next = reinterpret_cast<intern_type*>(__cto); 00468 __ret = codecvt_base::partial; 00469 } 00470 else 00471 __ret = codecvt_base::error; 00472 } 00473 } 00474 return __ret; 00475 } 00476 00477 template<typename _InternT, typename _ExternT> 00478 int 00479 codecvt<_InternT, _ExternT, encoding_state>:: 00480 do_encoding() const throw() 00481 { 00482 int __ret = 0; 00483 if (sizeof(_ExternT) <= sizeof(_InternT)) 00484 __ret = sizeof(_InternT) / sizeof(_ExternT); 00485 return __ret; 00486 } 00487 00488 template<typename _InternT, typename _ExternT> 00489 bool 00490 codecvt<_InternT, _ExternT, encoding_state>:: 00491 do_always_noconv() const throw() 00492 { return false; } 00493 00494 template<typename _InternT, typename _ExternT> 00495 int 00496 codecvt<_InternT, _ExternT, encoding_state>:: 00497 do_length(state_type&, const extern_type* __from, 00498 const extern_type* __end, size_t __max) const 00499 { return std::min(__max, static_cast<size_t>(__end - __from)); } 00500 00501 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00502 // 74. Garbled text for codecvt::do_max_length 00503 template<typename _InternT, typename _ExternT> 00504 int 00505 codecvt<_InternT, _ExternT, encoding_state>:: 00506 do_max_length() const throw() 00507 { return 1; } 00508 00509 _GLIBCXX_END_NAMESPACE_VERSION 00510 } // namespace 00511 00512 #endif