libtranscript
|
00001 /* Copyright (C) 2011-2012 G.P. Halkes 00002 This program is free software: you can redistribute it and/or modify 00003 it under the terms of the GNU General Public License version 3, as 00004 published by the Free Software Foundation. 00005 00006 This program is distributed in the hope that it will be useful, 00007 but WITHOUT ANY WARRANTY; without even the implied warranty of 00008 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00009 GNU General Public License for more details. 00010 00011 You should have received a copy of the GNU General Public License 00012 along with this program. If not, see <http://www.gnu.org/licenses/>. 00013 */ 00014 #ifdef UTF_ENDIAN_H_VERSION 00015 #define __ALT(x, y) x ## y 00016 #define _ALT(x, y) __ALT(x, y) 00017 #define ALT(x) _ALT(x, UTF_ENDIAN_H_VERSION) 00018 00020 static transcript_error_t ALT(put_utf16)(uint_fast32_t codepoint, char **outbuf, const char const *outbuflimit) { 00021 CHECK_CODEPOINT_RANGE(); 00022 if (codepoint < UINT32_C(0xffff)) { 00023 CHECK_OUTBYTESLEFT(2); 00024 ALT(put16)(codepoint, *(unsigned char **) outbuf); 00025 *outbuf += 2; 00026 } else { 00027 CHECK_OUTBYTESLEFT(4); 00028 codepoint -= UINT32_C(0x10000); 00029 ALT(put16)(UINT32_C(0xd800) + (codepoint >> 10), *(unsigned char **) outbuf); 00030 ALT(put16)(UINT32_C(0xdc00) + (codepoint & 0x3ff), (*(unsigned char **) outbuf) + 2); 00031 *outbuf += 4; 00032 } 00033 return TRANSCRIPT_SUCCESS; 00034 } 00035 00037 static transcript_error_t ALT(put_utf32)(uint_fast32_t codepoint, char **outbuf, const char const *outbuflimit) { 00038 CHECK_CODEPOINT_RANGE(); 00039 00040 CHECK_OUTBYTESLEFT(4); 00041 ALT(put32)(codepoint, *(unsigned char **) outbuf); 00042 *outbuf += 4; 00043 return TRANSCRIPT_SUCCESS; 00044 } 00045 00047 static uint_fast32_t ALT(get_utf16)(const char **inbuf, const char const *inbuflimit, bool_t skip) { 00048 uint_fast32_t codepoint, masked_codepoint; 00049 00050 if ((*inbuf) + 2 > inbuflimit) 00051 return TRANSCRIPT_UTF_INCOMPLETE; 00052 00053 codepoint = ALT(get16)(*(const unsigned char **) inbuf); 00054 masked_codepoint = codepoint & UINT32_C(0xfc00); 00055 00056 if (masked_codepoint == UINT32_C(0xd800)) { 00057 uint_fast32_t next_codepoint; 00058 /* Codepoint is high surrogate. */ 00059 if ((*inbuf) + 4 > inbuflimit) 00060 return TRANSCRIPT_UTF_INCOMPLETE; 00061 00062 next_codepoint = ALT(get16)((*(const unsigned char **) inbuf) + 2); 00063 if ((next_codepoint & UINT32_C(0xfc00)) != UINT32_C(0xdc00)) { 00064 /* Next codepoint is not a low surrogate. */ 00065 if (!skip) 00066 return TRANSCRIPT_UTF_ILLEGAL; 00067 00068 /* Only skip the high surrogate. */ 00069 *inbuf += 2; 00070 return codepoint; 00071 } 00072 codepoint -= UINT32_C(0xd800); 00073 codepoint <<= 10; 00074 codepoint += next_codepoint - UINT32_C(0xdc00); 00075 codepoint += UINT32_C(0x10000); 00076 00077 if (!skip) 00078 CHECK_CODEPOINT_ILLEGAL(); 00079 *inbuf += 4; 00080 return codepoint; 00081 } 00082 00083 if (!skip) { 00084 if (masked_codepoint == UINT32_C(0xdc00)) { 00085 /* Codepoint is a low surrogate. */ 00086 return TRANSCRIPT_UTF_ILLEGAL; 00087 } 00088 CHECK_CODEPOINT_ILLEGAL(); 00089 } 00090 00091 *inbuf += 2; 00092 return codepoint; 00093 } 00094 00096 static uint_fast32_t ALT(get_utf32)(const char **inbuf, const char const *inbuflimit, bool_t skip) { 00097 uint32_t codepoint; 00098 00099 if ((*inbuf) + 4 > inbuflimit) 00100 return TRANSCRIPT_UTF_INCOMPLETE; 00101 00102 memcpy(&codepoint, *inbuf, 4); 00103 codepoint = ALT(get32)(*(const unsigned char **) inbuf); 00104 if (!skip) { 00105 CHECK_CODEPOINT_ILLEGAL(); 00106 CHECK_CODEPOINT_SURROGATES(); 00107 } 00108 00109 *inbuf += 4; 00110 return codepoint; 00111 } 00112 00113 #undef ALT 00114 #undef _ALT 00115 #undef __ALT 00116 #endif