libstdc++
codecvt.h
Go to the documentation of this file.
00001 // Locale support (codecvt) -*- C++ -*-
00002 
00003 // Copyright (C) 2000-2013 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file bits/codecvt.h
00026  *  This is an internal header file, included by other library headers.
00027  *  Do not attempt to use it directly. @headername{locale}
00028  */
00029 
00030 //
00031 // ISO C++ 14882: 22.2.1.5 Template class codecvt
00032 //
00033 
00034 // Written by Benjamin Kosnik <bkoz@redhat.com>
00035 
00036 #ifndef _CODECVT_H
00037 #define _CODECVT_H 1
00038 
00039 #pragma GCC system_header
00040 
00041 namespace std _GLIBCXX_VISIBILITY(default)
00042 {
00043 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00044 
00045   /// Empty base class for codecvt facet [22.2.1.5].
00046   class codecvt_base
00047   {
00048   public:
00049     enum result
00050     {
00051       ok,
00052       partial,
00053       error,
00054       noconv
00055     };
00056   };
00057 
00058   /**
00059    *  @brief  Common base for codecvt functions.
00060    *
00061    *  This template class provides implementations of the public functions
00062    *  that forward to the protected virtual functions.
00063    *
00064    *  This template also provides abstract stubs for the protected virtual
00065    *  functions.
00066   */
00067   template<typename _InternT, typename _ExternT, typename _StateT>
00068     class __codecvt_abstract_base
00069     : public locale::facet, public codecvt_base
00070     {
00071     public:
00072       // Types:
00073       typedef codecvt_base::result  result;
00074       typedef _InternT          intern_type;
00075       typedef _ExternT          extern_type;
00076       typedef _StateT           state_type;
00077 
00078       // 22.2.1.5.1 codecvt members
00079       /**
00080        *  @brief  Convert from internal to external character set.
00081        *
00082        *  Converts input string of intern_type to output string of
00083        *  extern_type.  This is analogous to wcsrtombs.  It does this by
00084        *  calling codecvt::do_out.
00085        *
00086        *  The source and destination character sets are determined by the
00087        *  facet's locale, internal and external types.
00088        *
00089        *  The characters in [from,from_end) are converted and written to
00090        *  [to,to_end).  from_next and to_next are set to point to the
00091        *  character following the last successfully converted character,
00092        *  respectively.  If the result needed no conversion, from_next and
00093        *  to_next are not affected.
00094        *
00095        *  The @a state argument should be initialized if the input is at the
00096        *  beginning and carried from a previous call if continuing
00097        *  conversion.  There are no guarantees about how @a state is used.
00098        *
00099        *  The result returned is a member of codecvt_base::result.  If
00100        *  all the input is converted, returns codecvt_base::ok.  If no
00101        *  conversion is necessary, returns codecvt_base::noconv.  If
00102        *  the input ends early or there is insufficient space in the
00103        *  output, returns codecvt_base::partial.  Otherwise the
00104        *  conversion failed and codecvt_base::error is returned.
00105        *
00106        *  @param  __state  Persistent conversion state data.
00107        *  @param  __from  Start of input.
00108        *  @param  __from_end  End of input.
00109        *  @param  __from_next  Returns start of unconverted data.
00110        *  @param  __to  Start of output buffer.
00111        *  @param  __to_end  End of output buffer.
00112        *  @param  __to_next  Returns start of unused output area.
00113        *  @return  codecvt_base::result.
00114       */
00115       result
00116       out(state_type& __state, const intern_type* __from,
00117       const intern_type* __from_end, const intern_type*& __from_next,
00118       extern_type* __to, extern_type* __to_end,
00119       extern_type*& __to_next) const
00120       {
00121     return this->do_out(__state, __from, __from_end, __from_next,
00122                 __to, __to_end, __to_next);
00123       }
00124 
00125       /**
00126        *  @brief  Reset conversion state.
00127        *
00128        *  Writes characters to output that would restore @a state to initial
00129        *  conditions.  The idea is that if a partial conversion occurs, then
00130        *  the converting the characters written by this function would leave
00131        *  the state in initial conditions, rather than partial conversion
00132        *  state.  It does this by calling codecvt::do_unshift().
00133        *
00134        *  For example, if 4 external characters always converted to 1 internal
00135        *  character, and input to in() had 6 external characters with state
00136        *  saved, this function would write two characters to the output and
00137        *  set the state to initialized conditions.
00138        *
00139        *  The source and destination character sets are determined by the
00140        *  facet's locale, internal and external types.
00141        *
00142        *  The result returned is a member of codecvt_base::result.  If the
00143        *  state could be reset and data written, returns codecvt_base::ok.  If
00144        *  no conversion is necessary, returns codecvt_base::noconv.  If the
00145        *  output has insufficient space, returns codecvt_base::partial.
00146        *  Otherwise the reset failed and codecvt_base::error is returned.
00147        *
00148        *  @param  __state  Persistent conversion state data.
00149        *  @param  __to  Start of output buffer.
00150        *  @param  __to_end  End of output buffer.
00151        *  @param  __to_next  Returns start of unused output area.
00152        *  @return  codecvt_base::result.
00153       */
00154       result
00155       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
00156           extern_type*& __to_next) const
00157       { return this->do_unshift(__state, __to,__to_end,__to_next); }
00158 
00159       /**
00160        *  @brief  Convert from external to internal character set.
00161        *
00162        *  Converts input string of extern_type to output string of
00163        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
00164        *  calling codecvt::do_in.
00165        *
00166        *  The source and destination character sets are determined by the
00167        *  facet's locale, internal and external types.
00168        *
00169        *  The characters in [from,from_end) are converted and written to
00170        *  [to,to_end).  from_next and to_next are set to point to the
00171        *  character following the last successfully converted character,
00172        *  respectively.  If the result needed no conversion, from_next and
00173        *  to_next are not affected.
00174        *
00175        *  The @a state argument should be initialized if the input is at the
00176        *  beginning and carried from a previous call if continuing
00177        *  conversion.  There are no guarantees about how @a state is used.
00178        *
00179        *  The result returned is a member of codecvt_base::result.  If
00180        *  all the input is converted, returns codecvt_base::ok.  If no
00181        *  conversion is necessary, returns codecvt_base::noconv.  If
00182        *  the input ends early or there is insufficient space in the
00183        *  output, returns codecvt_base::partial.  Otherwise the
00184        *  conversion failed and codecvt_base::error is returned.
00185        *
00186        *  @param  __state  Persistent conversion state data.
00187        *  @param  __from  Start of input.
00188        *  @param  __from_end  End of input.
00189        *  @param  __from_next  Returns start of unconverted data.
00190        *  @param  __to  Start of output buffer.
00191        *  @param  __to_end  End of output buffer.
00192        *  @param  __to_next  Returns start of unused output area.
00193        *  @return  codecvt_base::result.
00194       */
00195       result
00196       in(state_type& __state, const extern_type* __from,
00197      const extern_type* __from_end, const extern_type*& __from_next,
00198      intern_type* __to, intern_type* __to_end,
00199      intern_type*& __to_next) const
00200       {
00201     return this->do_in(__state, __from, __from_end, __from_next,
00202                __to, __to_end, __to_next);
00203       }
00204 
00205       int
00206       encoding() const throw()
00207       { return this->do_encoding(); }
00208 
00209       bool
00210       always_noconv() const throw()
00211       { return this->do_always_noconv(); }
00212 
00213       int
00214       length(state_type& __state, const extern_type* __from,
00215          const extern_type* __end, size_t __max) const
00216       { return this->do_length(__state, __from, __end, __max); }
00217 
00218       int
00219       max_length() const throw()
00220       { return this->do_max_length(); }
00221 
00222     protected:
00223       explicit
00224       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
00225 
00226       virtual
00227       ~__codecvt_abstract_base() { }
00228 
00229       /**
00230        *  @brief  Convert from internal to external character set.
00231        *
00232        *  Converts input string of intern_type to output string of
00233        *  extern_type.  This function is a hook for derived classes to change
00234        *  the value returned.  @see out for more information.
00235       */
00236       virtual result
00237       do_out(state_type& __state, const intern_type* __from,
00238          const intern_type* __from_end, const intern_type*& __from_next,
00239          extern_type* __to, extern_type* __to_end,
00240          extern_type*& __to_next) const = 0;
00241 
00242       virtual result
00243       do_unshift(state_type& __state, extern_type* __to,
00244          extern_type* __to_end, extern_type*& __to_next) const = 0;
00245 
00246       virtual result
00247       do_in(state_type& __state, const extern_type* __from,
00248         const extern_type* __from_end, const extern_type*& __from_next,
00249         intern_type* __to, intern_type* __to_end,
00250         intern_type*& __to_next) const = 0;
00251 
00252       virtual int
00253       do_encoding() const throw() = 0;
00254 
00255       virtual bool
00256       do_always_noconv() const throw() = 0;
00257 
00258       virtual int
00259       do_length(state_type&, const extern_type* __from,
00260         const extern_type* __end, size_t __max) const = 0;
00261 
00262       virtual int
00263       do_max_length() const throw() = 0;
00264     };
00265 
00266 
00267 
00268   /**
00269    *  @brief  Primary class template codecvt.
00270    *  @ingroup locales
00271    *
00272    *  NB: Generic, mostly useless implementation.
00273    *
00274   */
00275    template<typename _InternT, typename _ExternT, typename _StateT>
00276     class codecvt
00277     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
00278     {
00279     public:
00280       // Types:
00281       typedef codecvt_base::result  result;
00282       typedef _InternT          intern_type;
00283       typedef _ExternT          extern_type;
00284       typedef _StateT           state_type;
00285 
00286     protected:
00287       __c_locale            _M_c_locale_codecvt;
00288 
00289     public:
00290       static locale::id         id;
00291 
00292       explicit
00293       codecvt(size_t __refs = 0)
00294       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs),
00295     _M_c_locale_codecvt(0)
00296       { }
00297 
00298       explicit
00299       codecvt(__c_locale __cloc, size_t __refs = 0);
00300 
00301     protected:
00302       virtual
00303       ~codecvt() { }
00304 
00305       virtual result
00306       do_out(state_type& __state, const intern_type* __from,
00307          const intern_type* __from_end, const intern_type*& __from_next,
00308          extern_type* __to, extern_type* __to_end,
00309          extern_type*& __to_next) const;
00310 
00311       virtual result
00312       do_unshift(state_type& __state, extern_type* __to,
00313          extern_type* __to_end, extern_type*& __to_next) const;
00314 
00315       virtual result
00316       do_in(state_type& __state, const extern_type* __from,
00317         const extern_type* __from_end, const extern_type*& __from_next,
00318         intern_type* __to, intern_type* __to_end,
00319         intern_type*& __to_next) const;
00320 
00321       virtual int
00322       do_encoding() const throw();
00323 
00324       virtual bool
00325       do_always_noconv() const throw();
00326 
00327       virtual int
00328       do_length(state_type&, const extern_type* __from,
00329         const extern_type* __end, size_t __max) const;
00330 
00331       virtual int
00332       do_max_length() const throw();
00333     };
00334 
00335   template<typename _InternT, typename _ExternT, typename _StateT>
00336     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
00337 
00338   /// class codecvt<char, char, mbstate_t> specialization.
00339   template<>
00340     class codecvt<char, char, mbstate_t>
00341     : public __codecvt_abstract_base<char, char, mbstate_t>
00342     {
00343     public:
00344       // Types:
00345       typedef char          intern_type;
00346       typedef char          extern_type;
00347       typedef mbstate_t         state_type;
00348 
00349     protected:
00350       __c_locale            _M_c_locale_codecvt;
00351 
00352     public:
00353       static locale::id id;
00354 
00355       explicit
00356       codecvt(size_t __refs = 0);
00357 
00358       explicit
00359       codecvt(__c_locale __cloc, size_t __refs = 0);
00360 
00361     protected:
00362       virtual
00363       ~codecvt();
00364 
00365       virtual result
00366       do_out(state_type& __state, const intern_type* __from,
00367          const intern_type* __from_end, const intern_type*& __from_next,
00368          extern_type* __to, extern_type* __to_end,
00369          extern_type*& __to_next) const;
00370 
00371       virtual result
00372       do_unshift(state_type& __state, extern_type* __to,
00373          extern_type* __to_end, extern_type*& __to_next) const;
00374 
00375       virtual result
00376       do_in(state_type& __state, const extern_type* __from,
00377         const extern_type* __from_end, const extern_type*& __from_next,
00378         intern_type* __to, intern_type* __to_end,
00379         intern_type*& __to_next) const;
00380 
00381       virtual int
00382       do_encoding() const throw();
00383 
00384       virtual bool
00385       do_always_noconv() const throw();
00386 
00387       virtual int
00388       do_length(state_type&, const extern_type* __from,
00389         const extern_type* __end, size_t __max) const;
00390 
00391       virtual int
00392       do_max_length() const throw();
00393   };
00394 
00395 #ifdef _GLIBCXX_USE_WCHAR_T
00396   /// class codecvt<wchar_t, char, mbstate_t> specialization.
00397   template<>
00398     class codecvt<wchar_t, char, mbstate_t>
00399     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
00400     {
00401     public:
00402       // Types:
00403       typedef wchar_t           intern_type;
00404       typedef char          extern_type;
00405       typedef mbstate_t         state_type;
00406 
00407     protected:
00408       __c_locale            _M_c_locale_codecvt;
00409 
00410     public:
00411       static locale::id         id;
00412 
00413       explicit
00414       codecvt(size_t __refs = 0);
00415 
00416       explicit
00417       codecvt(__c_locale __cloc, size_t __refs = 0);
00418 
00419     protected:
00420       virtual
00421       ~codecvt();
00422 
00423       virtual result
00424       do_out(state_type& __state, const intern_type* __from,
00425          const intern_type* __from_end, const intern_type*& __from_next,
00426          extern_type* __to, extern_type* __to_end,
00427          extern_type*& __to_next) const;
00428 
00429       virtual result
00430       do_unshift(state_type& __state,
00431          extern_type* __to, extern_type* __to_end,
00432          extern_type*& __to_next) const;
00433 
00434       virtual result
00435       do_in(state_type& __state,
00436          const extern_type* __from, const extern_type* __from_end,
00437          const extern_type*& __from_next,
00438          intern_type* __to, intern_type* __to_end,
00439          intern_type*& __to_next) const;
00440 
00441       virtual
00442       int do_encoding() const throw();
00443 
00444       virtual
00445       bool do_always_noconv() const throw();
00446 
00447       virtual
00448       int do_length(state_type&, const extern_type* __from,
00449             const extern_type* __end, size_t __max) const;
00450 
00451       virtual int
00452       do_max_length() const throw();
00453     };
00454 #endif //_GLIBCXX_USE_WCHAR_T
00455 
00456   /// class codecvt_byname [22.2.1.6].
00457   template<typename _InternT, typename _ExternT, typename _StateT>
00458     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
00459     {
00460     public:
00461       explicit
00462       codecvt_byname(const char* __s, size_t __refs = 0)
00463       : codecvt<_InternT, _ExternT, _StateT>(__refs)
00464       {
00465     if (__builtin_strcmp(__s, "C") != 0
00466         && __builtin_strcmp(__s, "POSIX") != 0)
00467       {
00468         this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
00469         this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
00470       }
00471       }
00472 
00473     protected:
00474       virtual
00475       ~codecvt_byname() { }
00476     };
00477 
00478   // Inhibit implicit instantiations for required instantiations,
00479   // which are defined via explicit instantiations elsewhere.
00480 #if _GLIBCXX_EXTERN_TEMPLATE
00481   extern template class codecvt_byname<char, char, mbstate_t>;
00482 
00483   extern template
00484     const codecvt<char, char, mbstate_t>&
00485     use_facet<codecvt<char, char, mbstate_t> >(const locale&);
00486 
00487   extern template
00488     bool
00489     has_facet<codecvt<char, char, mbstate_t> >(const locale&);
00490 
00491 #ifdef _GLIBCXX_USE_WCHAR_T
00492   extern template class codecvt_byname<wchar_t, char, mbstate_t>;
00493 
00494   extern template
00495     const codecvt<wchar_t, char, mbstate_t>&
00496     use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
00497 
00498   extern template
00499     bool
00500     has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
00501 #endif
00502 #endif
00503 
00504 _GLIBCXX_END_NAMESPACE_VERSION
00505 } // namespace std
00506 
00507 #endif // _CODECVT_H