libt3window
/home/gertjan/projects/tilde/window/src/terminal_detection.h
00001 /* Copyright (C) 2011 G.P. Halkes
00002    This program is free software: you can redistribute it and/or modify
00003    it under the terms of the GNU General Public License version 3, as
00004    published by the Free Software Foundation.
00005 
00006    This program is distributed in the hope that it will be useful,
00007    but WITHOUT ANY WARRANTY; without even the implied warranty of
00008    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00009    GNU General Public License for more details.
00010 
00011    You should have received a copy of the GNU General Public License
00012    along with this program.  If not, see <http://www.gnu.org/licenses/>.
00013 */
00014 
00015 #if defined(GENERATE_STRINGS)
00016 #define TEST(_str, _code) send_test_string(_str);
00017 #elif defined(GENERATE_CODE)
00018 #define TEST(_str, _code) if (test++ == report_nr) { _code }
00019 {
00020         int test = 0;
00021 #endif
00022 
00023 /*======================
00024   = Define TESTs below =
00025   ======================*/
00026 /* The tests defined here can use the variable column to check the reported width
00027    of the given test string. Test are executed in the order presented here.
00028 */
00029 //FIXME: test for more encodings here
00030 //FIXME: extend the GB18030 testing
00031 
00032 /*=== Basic character set detection ===*/
00033 
00034 /* This string should be 3 characters wide, if UTF-8 is supported. All characters are from
00035    Unicode version 1.1, so they should be supported if UTF-8 is supported at all. EUC type
00036    terminals will report length of 6 and single byte encodings will report 8.
00037 
00038    U+00E5 LATIN SMALL LETTER A WITH RING ABOVE,
00039    U+0E3F THAI CURRENCY SYMBOL BAHT, U+2592 MEDIUM SHADE */
00040 TEST("\xc3\xa5\xe0\xb8\xbf\xe2\x96\x92",
00041         if (column == 3)
00042                 _t3_term_encoding = _T3_TERM_UTF8;
00043         else if (column == 6)
00044                 _t3_term_encoding = _T3_TERM_CJK;
00045 )
00046 
00047 /* Test for GB18030. For EUC type encodings, this will be length two because the
00048    bytes with the high bit set will be ignored. For UTF-8, the characters with
00049    the high bit set will be replaced by the replacement character, thus reporing
00050    the widht as 4.
00051    U+00DE LATIN CAPITAL LETTER THORN */
00052 TEST("\x81\x30\x89\x37",
00053         if (_t3_term_encoding == _T3_TERM_UNKNOWN) {
00054                 if (column == 1)
00055                         _t3_term_encoding = _T3_TERM_GB18030;
00056                 if (column == 2)
00057                         _t3_term_encoding = _T3_TERM_GBK; //FIXME: or GB2312 for some encoding of it
00058                 else if (column == 4)
00059                         _t3_term_encoding = _T3_TERM_SINGLE_BYTE;
00060         }
00061 )
00062 
00063 /*=== Combining character sequences ===*/
00064 
00065 /* [4.0] U+002E FULL STOP / U+0350 COMBINING RIGHT ARROWHEAD ABOVE */
00066 TEST("\x2e\xcd\x90", /* UTF-8 version */
00067         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 1)
00068                 _t3_term_combining = T3_UNICODE_40;
00069 )
00070 TEST("\x2e\x81\x30\xc4\x36", /* GB-18030 version */
00071         if (_t3_term_encoding == _T3_TERM_GB18030)
00072                 _t3_term_combining = T3_UNICODE_40;
00073 )
00074 
00075 /* [4.1] U+002E FULL STOP / U+0358 COMBINING DOT ABOVE RIGHT */
00076 TEST("\x2e\xcd\x98",
00077         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 1 && _t3_term_combining == T3_UNICODE_40)
00078                 _t3_term_combining = T3_UNICODE_41;
00079 )
00080 /* [5.0] U+002E FULL STOP / U+1DC4 COMBINING MACRON-ACUTE */
00081 TEST("\x2e\xe1\xb7\x84",
00082         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 1 && _t3_term_combining == T3_UNICODE_41)
00083                 _t3_term_combining = T3_UNICODE_50;
00084 )
00085 /* [5.1] U+002E FULL STOP / U+0487 COMBINING CYRILLIC POKRYTIE */
00086 TEST("\x2e\xd2\x87",
00087         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 1 && _t3_term_combining == T3_UNICODE_50)
00088                 _t3_term_combining = T3_UNICODE_51;
00089 )
00090 /* [5.2] U+002E FULL STOP / U+081B SAMARITAN MARK EPENTHETIC YUT */
00091 TEST("\x2e\xe0\xa0\x9b",
00092         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 1 && _t3_term_combining == T3_UNICODE_51)
00093                 _t3_term_combining = T3_UNICODE_52;
00094 )
00095 /* [6.0] U+002E FULL STOP / U+0859 MANDAIC AFFRICATION MARK */
00096 TEST("\x2e\xe0\xa1\x99",
00097         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 1 && _t3_term_combining == T3_UNICODE_52)
00098                 _t3_term_combining = T3_UNICODE_60;
00099 )
00100 
00101 /*=== Double-width character sequences ===*/
00102 
00103 /* [1.1] U+5208 CJK UNIFIED IDEOGRAPH-5208, [4.0] U+FE47 PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET */
00104 TEST("\xe5\x88\x88\xef\xb9\x87",
00105         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 4)
00106                 _t3_term_double_width = T3_UNICODE_40;
00107 )
00108 /* [4.1] U+FE10 PRESENTATION FORM FOR VERTICAL COMMA */
00109 TEST("\xef\xb8\x90",
00110         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 2 && _t3_term_double_width == T3_UNICODE_40)
00111                 _t3_term_double_width = T3_UNICODE_41;
00112 )
00113 /* No new wide characters were introduced in Unicode 5.0. */
00114 /* [5.1] U+31DC CJK STROKE PZ */
00115 TEST("\xe3\x87\x9c",
00116         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 2 && _t3_term_double_width == T3_UNICODE_41)
00117                 _t3_term_double_width = T3_UNICODE_51;
00118 )
00119 /* [5.2] U+3244 CIRCLED IDEOGRAPH QUESTION */
00120 TEST("\xe3\x89\x84",
00121         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 2 && _t3_term_double_width == T3_UNICODE_51)
00122                 _t3_term_double_width = T3_UNICODE_52;
00123 )
00124 /* [6.0] U+31B8 BOPOMOFO LETTER GH */
00125 TEST("\xe3\x86\xb8",
00126         if (_t3_term_encoding == _T3_TERM_UTF8 && column == 2 && _t3_term_double_width == T3_UNICODE_52)
00127                 _t3_term_double_width = T3_UNICODE_60;
00128 )
00129 
00130 
00131 /*==============================================
00132   = Do NOT define any TESTs beyond this point. =
00133   ==============================================*/
00134 
00135 #if defined(GENERATE_CODE)
00136         if (detecting_terminal_capabilities && test - 1 == report_nr) {
00137                 detecting_terminal_capabilities = t3_false;
00138                 finish_detection();
00139                 result = t3_true;
00140         }
00141 }
00142 #endif
00143 #undef TEST
 All Data Structures Variables