ICU 51.2  51.2
unistr.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1998-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File unistr.h
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 11/11/98 stephen Changed per 11/9 code review.
14 * 04/20/99 stephen Overhauled per 4/16 code review.
15 * 11/18/99 aliu Made to inherit from Replaceable. Added method
16 * handleReplaceBetween(); other methods unchanged.
17 * 06/25/01 grhoten Remove dependency on iostream.
18 ******************************************************************************
19 */
20 
21 #ifndef UNISTR_H
22 #define UNISTR_H
23 
29 #include "unicode/utypes.h"
30 #include "unicode/rep.h"
31 #include "unicode/std_string.h"
32 #include "unicode/stringpiece.h"
33 #include "unicode/bytestream.h"
34 #include "unicode/ucasemap.h"
35 
36 struct UConverter; // unicode/ucnv.h
37 class StringThreadTest;
38 
39 #ifndef U_COMPARE_CODE_POINT_ORDER
40 /* see also ustring.h and unorm.h */
46 #define U_COMPARE_CODE_POINT_ORDER 0x8000
47 #endif
48 
49 #ifndef USTRING_H
50 
53 U_STABLE int32_t U_EXPORT2
54 u_strlen(const UChar *s);
55 #endif
56 
57 #ifndef U_HIDE_INTERNAL_API
58 
63 #ifndef U_STRING_CASE_MAPPER_DEFINED
64 #define U_STRING_CASE_MAPPER_DEFINED
65 
70 typedef int32_t U_CALLCONV
72  UChar *dest, int32_t destCapacity,
73  const UChar *src, int32_t srcLength,
74  UErrorCode *pErrorCode);
75 
76 #endif
77 #endif /* U_HIDE_INTERNAL_API */
78 
80 
81 class BreakIterator; // unicode/brkiter.h
82 class Locale; // unicode/locid.h
83 class StringCharacterIterator;
84 class UnicodeStringAppendable; // unicode/appendable.h
85 
86 /* The <iostream> include has been moved to unicode/ustream.h */
87 
98 #define US_INV icu::UnicodeString::kInvariant
99 
117 #if defined(U_DECLARE_UTF16)
118 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
119 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
120 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
121 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
122 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
123 #else
124 # define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
125 #endif
126 
140 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
141 
149 #ifndef UNISTR_FROM_CHAR_EXPLICIT
150 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
151  // Auto-"explicit" in ICU library code.
152 # define UNISTR_FROM_CHAR_EXPLICIT explicit
153 # else
154  // Empty by default for source code compatibility.
155 # define UNISTR_FROM_CHAR_EXPLICIT
156 # endif
157 #endif
158 
169 #ifndef UNISTR_FROM_STRING_EXPLICIT
170 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
171  // Auto-"explicit" in ICU library code.
172 # define UNISTR_FROM_STRING_EXPLICIT explicit
173 # else
174  // Empty by default for source code compatibility.
175 # define UNISTR_FROM_STRING_EXPLICIT
176 # endif
177 #endif
178 
249 {
250 public:
251 
260  enum EInvariant {
265  kInvariant
266  };
267 
268  //========================================
269  // Read-only operations
270  //========================================
271 
272  /* Comparison - bitwise only - for international comparison use collation */
273 
281  inline UBool operator== (const UnicodeString& text) const;
282 
290  inline UBool operator!= (const UnicodeString& text) const;
291 
299  inline UBool operator> (const UnicodeString& text) const;
300 
308  inline UBool operator< (const UnicodeString& text) const;
309 
317  inline UBool operator>= (const UnicodeString& text) const;
318 
326  inline UBool operator<= (const UnicodeString& text) const;
327 
339  inline int8_t compare(const UnicodeString& text) const;
340 
355  inline int8_t compare(int32_t start,
356  int32_t length,
357  const UnicodeString& text) const;
358 
376  inline int8_t compare(int32_t start,
377  int32_t length,
378  const UnicodeString& srcText,
379  int32_t srcStart,
380  int32_t srcLength) const;
381 
394  inline int8_t compare(const UChar *srcChars,
395  int32_t srcLength) const;
396 
411  inline int8_t compare(int32_t start,
412  int32_t length,
413  const UChar *srcChars) const;
414 
432  inline int8_t compare(int32_t start,
433  int32_t length,
434  const UChar *srcChars,
435  int32_t srcStart,
436  int32_t srcLength) const;
437 
455  inline int8_t compareBetween(int32_t start,
456  int32_t limit,
457  const UnicodeString& srcText,
458  int32_t srcStart,
459  int32_t srcLimit) const;
460 
478  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
479 
499  inline int8_t compareCodePointOrder(int32_t start,
500  int32_t length,
501  const UnicodeString& srcText) const;
502 
524  inline int8_t compareCodePointOrder(int32_t start,
525  int32_t length,
526  const UnicodeString& srcText,
527  int32_t srcStart,
528  int32_t srcLength) const;
529 
548  inline int8_t compareCodePointOrder(const UChar *srcChars,
549  int32_t srcLength) const;
550 
570  inline int8_t compareCodePointOrder(int32_t start,
571  int32_t length,
572  const UChar *srcChars) const;
573 
595  inline int8_t compareCodePointOrder(int32_t start,
596  int32_t length,
597  const UChar *srcChars,
598  int32_t srcStart,
599  int32_t srcLength) const;
600 
622  inline int8_t compareCodePointOrderBetween(int32_t start,
623  int32_t limit,
624  const UnicodeString& srcText,
625  int32_t srcStart,
626  int32_t srcLimit) const;
627 
646  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
647 
668  inline int8_t caseCompare(int32_t start,
669  int32_t length,
670  const UnicodeString& srcText,
671  uint32_t options) const;
672 
695  inline int8_t caseCompare(int32_t start,
696  int32_t length,
697  const UnicodeString& srcText,
698  int32_t srcStart,
699  int32_t srcLength,
700  uint32_t options) const;
701 
721  inline int8_t caseCompare(const UChar *srcChars,
722  int32_t srcLength,
723  uint32_t options) const;
724 
745  inline int8_t caseCompare(int32_t start,
746  int32_t length,
747  const UChar *srcChars,
748  uint32_t options) const;
749 
772  inline int8_t caseCompare(int32_t start,
773  int32_t length,
774  const UChar *srcChars,
775  int32_t srcStart,
776  int32_t srcLength,
777  uint32_t options) const;
778 
801  inline int8_t caseCompareBetween(int32_t start,
802  int32_t limit,
803  const UnicodeString& srcText,
804  int32_t srcStart,
805  int32_t srcLimit,
806  uint32_t options) const;
807 
815  inline UBool startsWith(const UnicodeString& text) const;
816 
827  inline UBool startsWith(const UnicodeString& srcText,
828  int32_t srcStart,
829  int32_t srcLength) const;
830 
839  inline UBool startsWith(const UChar *srcChars,
840  int32_t srcLength) const;
841 
851  inline UBool startsWith(const UChar *srcChars,
852  int32_t srcStart,
853  int32_t srcLength) const;
854 
862  inline UBool endsWith(const UnicodeString& text) const;
863 
874  inline UBool endsWith(const UnicodeString& srcText,
875  int32_t srcStart,
876  int32_t srcLength) const;
877 
886  inline UBool endsWith(const UChar *srcChars,
887  int32_t srcLength) const;
888 
899  inline UBool endsWith(const UChar *srcChars,
900  int32_t srcStart,
901  int32_t srcLength) const;
902 
903 
904  /* Searching - bitwise only */
905 
914  inline int32_t indexOf(const UnicodeString& text) const;
915 
925  inline int32_t indexOf(const UnicodeString& text,
926  int32_t start) const;
927 
939  inline int32_t indexOf(const UnicodeString& text,
940  int32_t start,
941  int32_t length) const;
942 
959  inline int32_t indexOf(const UnicodeString& srcText,
960  int32_t srcStart,
961  int32_t srcLength,
962  int32_t start,
963  int32_t length) const;
964 
976  inline int32_t indexOf(const UChar *srcChars,
977  int32_t srcLength,
978  int32_t start) const;
979 
992  inline int32_t indexOf(const UChar *srcChars,
993  int32_t srcLength,
994  int32_t start,
995  int32_t length) const;
996 
1013  int32_t indexOf(const UChar *srcChars,
1014  int32_t srcStart,
1015  int32_t srcLength,
1016  int32_t start,
1017  int32_t length) const;
1018 
1026  inline int32_t indexOf(UChar c) const;
1027 
1036  inline int32_t indexOf(UChar32 c) const;
1037 
1046  inline int32_t indexOf(UChar c,
1047  int32_t start) const;
1048 
1058  inline int32_t indexOf(UChar32 c,
1059  int32_t start) const;
1060 
1071  inline int32_t indexOf(UChar c,
1072  int32_t start,
1073  int32_t length) const;
1074 
1086  inline int32_t indexOf(UChar32 c,
1087  int32_t start,
1088  int32_t length) const;
1089 
1098  inline int32_t lastIndexOf(const UnicodeString& text) const;
1099 
1109  inline int32_t lastIndexOf(const UnicodeString& text,
1110  int32_t start) const;
1111 
1123  inline int32_t lastIndexOf(const UnicodeString& text,
1124  int32_t start,
1125  int32_t length) const;
1126 
1143  inline int32_t lastIndexOf(const UnicodeString& srcText,
1144  int32_t srcStart,
1145  int32_t srcLength,
1146  int32_t start,
1147  int32_t length) const;
1148 
1159  inline int32_t lastIndexOf(const UChar *srcChars,
1160  int32_t srcLength,
1161  int32_t start) const;
1162 
1175  inline int32_t lastIndexOf(const UChar *srcChars,
1176  int32_t srcLength,
1177  int32_t start,
1178  int32_t length) const;
1179 
1196  int32_t lastIndexOf(const UChar *srcChars,
1197  int32_t srcStart,
1198  int32_t srcLength,
1199  int32_t start,
1200  int32_t length) const;
1201 
1209  inline int32_t lastIndexOf(UChar c) const;
1210 
1219  inline int32_t lastIndexOf(UChar32 c) const;
1220 
1229  inline int32_t lastIndexOf(UChar c,
1230  int32_t start) const;
1231 
1241  inline int32_t lastIndexOf(UChar32 c,
1242  int32_t start) const;
1243 
1254  inline int32_t lastIndexOf(UChar c,
1255  int32_t start,
1256  int32_t length) const;
1257 
1269  inline int32_t lastIndexOf(UChar32 c,
1270  int32_t start,
1271  int32_t length) const;
1272 
1273 
1274  /* Character access */
1275 
1284  inline UChar charAt(int32_t offset) const;
1285 
1293  inline UChar operator[] (int32_t offset) const;
1294 
1306  UChar32 char32At(int32_t offset) const;
1307 
1323  int32_t getChar32Start(int32_t offset) const;
1324 
1341  int32_t getChar32Limit(int32_t offset) const;
1342 
1393  int32_t moveIndex32(int32_t index, int32_t delta) const;
1394 
1395  /* Substring extraction */
1396 
1412  inline void extract(int32_t start,
1413  int32_t length,
1414  UChar *dst,
1415  int32_t dstStart = 0) const;
1416 
1438  int32_t
1439  extract(UChar *dest, int32_t destCapacity,
1440  UErrorCode &errorCode) const;
1441 
1452  inline void extract(int32_t start,
1453  int32_t length,
1454  UnicodeString& target) const;
1455 
1467  inline void extractBetween(int32_t start,
1468  int32_t limit,
1469  UChar *dst,
1470  int32_t dstStart = 0) const;
1471 
1481  virtual void extractBetween(int32_t start,
1482  int32_t limit,
1483  UnicodeString& target) const;
1484 
1506  int32_t extract(int32_t start,
1507  int32_t startLength,
1508  char *target,
1509  int32_t targetCapacity,
1510  enum EInvariant inv) const;
1511 
1512 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1513 
1533  int32_t extract(int32_t start,
1534  int32_t startLength,
1535  char *target,
1536  uint32_t targetLength) const;
1537 
1538 #endif
1539 
1540 #if !UCONFIG_NO_CONVERSION
1541 
1567  inline int32_t extract(int32_t start,
1568  int32_t startLength,
1569  char *target,
1570  const char *codepage = 0) const;
1571 
1601  int32_t extract(int32_t start,
1602  int32_t startLength,
1603  char *target,
1604  uint32_t targetLength,
1605  const char *codepage) const;
1606 
1624  int32_t extract(char *dest, int32_t destCapacity,
1625  UConverter *cnv,
1626  UErrorCode &errorCode) const;
1627 
1628 #endif
1629 
1643  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1644 
1655  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1656 
1668  void toUTF8(ByteSink &sink) const;
1669 
1670 #if U_HAVE_STD_STRING
1671 
1684  template<typename StringClass>
1685  StringClass &toUTF8String(StringClass &result) const {
1686  StringByteSink<StringClass> sbs(&result);
1687  toUTF8(sbs);
1688  return result;
1689  }
1690 
1691 #endif
1692 
1708  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1709 
1710  /* Length operations */
1711 
1720  inline int32_t length(void) const;
1721 
1735  int32_t
1736  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1737 
1761  UBool
1762  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1763 
1769  inline UBool isEmpty(void) const;
1770 
1780  inline int32_t getCapacity(void) const;
1781 
1782  /* Other operations */
1783 
1789  inline int32_t hashCode(void) const;
1790 
1803  inline UBool isBogus(void) const;
1804 
1805 
1806  //========================================
1807  // Write operations
1808  //========================================
1809 
1810  /* Assignment operations */
1811 
1819  UnicodeString &operator=(const UnicodeString &srcText);
1820 
1843  UnicodeString &fastCopyFrom(const UnicodeString &src);
1844 
1852  inline UnicodeString& operator= (UChar ch);
1853 
1861  inline UnicodeString& operator= (UChar32 ch);
1862 
1874  inline UnicodeString& setTo(const UnicodeString& srcText,
1875  int32_t srcStart);
1876 
1890  inline UnicodeString& setTo(const UnicodeString& srcText,
1891  int32_t srcStart,
1892  int32_t srcLength);
1893 
1902  inline UnicodeString& setTo(const UnicodeString& srcText);
1903 
1912  inline UnicodeString& setTo(const UChar *srcChars,
1913  int32_t srcLength);
1914 
1923  UnicodeString& setTo(UChar srcChar);
1924 
1933  UnicodeString& setTo(UChar32 srcChar);
1934 
1958  UnicodeString &setTo(UBool isTerminated,
1959  const UChar *text,
1960  int32_t textLength);
1961 
1981  UnicodeString &setTo(UChar *buffer,
1982  int32_t buffLength,
1983  int32_t buffCapacity);
1984 
2025  void setToBogus();
2026 
2034  UnicodeString& setCharAt(int32_t offset,
2035  UChar ch);
2036 
2037 
2038  /* Append operations */
2039 
2047  inline UnicodeString& operator+= (UChar ch);
2048 
2056  inline UnicodeString& operator+= (UChar32 ch);
2057 
2065  inline UnicodeString& operator+= (const UnicodeString& srcText);
2066 
2081  inline UnicodeString& append(const UnicodeString& srcText,
2082  int32_t srcStart,
2083  int32_t srcLength);
2084 
2092  inline UnicodeString& append(const UnicodeString& srcText);
2093 
2107  inline UnicodeString& append(const UChar *srcChars,
2108  int32_t srcStart,
2109  int32_t srcLength);
2110 
2120  inline UnicodeString& append(const UChar *srcChars,
2121  int32_t srcLength);
2122 
2129  inline UnicodeString& append(UChar srcChar);
2130 
2137  UnicodeString& append(UChar32 srcChar);
2138 
2139 
2140  /* Insert operations */
2141 
2155  inline UnicodeString& insert(int32_t start,
2156  const UnicodeString& srcText,
2157  int32_t srcStart,
2158  int32_t srcLength);
2159 
2168  inline UnicodeString& insert(int32_t start,
2169  const UnicodeString& srcText);
2170 
2184  inline UnicodeString& insert(int32_t start,
2185  const UChar *srcChars,
2186  int32_t srcStart,
2187  int32_t srcLength);
2188 
2198  inline UnicodeString& insert(int32_t start,
2199  const UChar *srcChars,
2200  int32_t srcLength);
2201 
2210  inline UnicodeString& insert(int32_t start,
2211  UChar srcChar);
2212 
2221  inline UnicodeString& insert(int32_t start,
2222  UChar32 srcChar);
2223 
2224 
2225  /* Replace operations */
2226 
2244  UnicodeString& replace(int32_t start,
2245  int32_t length,
2246  const UnicodeString& srcText,
2247  int32_t srcStart,
2248  int32_t srcLength);
2249 
2262  UnicodeString& replace(int32_t start,
2263  int32_t length,
2264  const UnicodeString& srcText);
2265 
2283  UnicodeString& replace(int32_t start,
2284  int32_t length,
2285  const UChar *srcChars,
2286  int32_t srcStart,
2287  int32_t srcLength);
2288 
2301  inline UnicodeString& replace(int32_t start,
2302  int32_t length,
2303  const UChar *srcChars,
2304  int32_t srcLength);
2305 
2317  inline UnicodeString& replace(int32_t start,
2318  int32_t length,
2319  UChar srcChar);
2320 
2332  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2333 
2343  inline UnicodeString& replaceBetween(int32_t start,
2344  int32_t limit,
2345  const UnicodeString& srcText);
2346 
2361  inline UnicodeString& replaceBetween(int32_t start,
2362  int32_t limit,
2363  const UnicodeString& srcText,
2364  int32_t srcStart,
2365  int32_t srcLimit);
2366 
2377  virtual void handleReplaceBetween(int32_t start,
2378  int32_t limit,
2379  const UnicodeString& text);
2380 
2386  virtual UBool hasMetaData() const;
2387 
2403  virtual void copy(int32_t start, int32_t limit, int32_t dest);
2404 
2405  /* Search and replace operations */
2406 
2415  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2416  const UnicodeString& newText);
2417 
2429  inline UnicodeString& findAndReplace(int32_t start,
2430  int32_t length,
2431  const UnicodeString& oldText,
2432  const UnicodeString& newText);
2433 
2451  UnicodeString& findAndReplace(int32_t start,
2452  int32_t length,
2453  const UnicodeString& oldText,
2454  int32_t oldStart,
2455  int32_t oldLength,
2456  const UnicodeString& newText,
2457  int32_t newStart,
2458  int32_t newLength);
2459 
2460 
2461  /* Remove operations */
2462 
2468  inline UnicodeString& remove(void);
2469 
2478  inline UnicodeString& remove(int32_t start,
2479  int32_t length = (int32_t)INT32_MAX);
2480 
2489  inline UnicodeString& removeBetween(int32_t start,
2490  int32_t limit = (int32_t)INT32_MAX);
2491 
2501  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2502 
2503  /* Length operations */
2504 
2516  UBool padLeading(int32_t targetLength,
2517  UChar padChar = 0x0020);
2518 
2530  UBool padTrailing(int32_t targetLength,
2531  UChar padChar = 0x0020);
2532 
2539  inline UBool truncate(int32_t targetLength);
2540 
2546  UnicodeString& trim(void);
2547 
2548 
2549  /* Miscellaneous operations */
2550 
2556  inline UnicodeString& reverse(void);
2557 
2566  inline UnicodeString& reverse(int32_t start,
2567  int32_t length);
2568 
2575  UnicodeString& toUpper(void);
2576 
2584  UnicodeString& toUpper(const Locale& locale);
2585 
2592  UnicodeString& toLower(void);
2593 
2601  UnicodeString& toLower(const Locale& locale);
2602 
2603 #if !UCONFIG_NO_BREAK_ITERATION
2604 
2631  UnicodeString &toTitle(BreakIterator *titleIter);
2632 
2660  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2661 
2693  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2694 
2695 #endif
2696 
2710  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2711 
2712  //========================================
2713  // Access to the internal buffer
2714  //========================================
2715 
2759  UChar *getBuffer(int32_t minCapacity);
2760 
2781  void releaseBuffer(int32_t newLength=-1);
2782 
2813  inline const UChar *getBuffer() const;
2814 
2848  inline const UChar *getTerminatedBuffer();
2849 
2850  //========================================
2851  // Constructors
2852  //========================================
2853 
2857  inline UnicodeString();
2858 
2870  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2871 
2882 
2893 
2905 
2913  UnicodeString(const UChar *text,
2914  int32_t textLength);
2915 
2938  UnicodeString(UBool isTerminated,
2939  const UChar *text,
2940  int32_t textLength);
2941 
2960  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2961 
2962 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2963 
2983  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
2984 
2993  UnicodeString(const char *codepageData, int32_t dataLength);
2994 
2995 #endif
2996 
2997 #if !UCONFIG_NO_CONVERSION
2998 
3016  UnicodeString(const char *codepageData, const char *codepage);
3017 
3035  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3036 
3058  UnicodeString(
3059  const char *src, int32_t srcLength,
3060  UConverter *cnv,
3061  UErrorCode &errorCode);
3062 
3063 #endif
3064 
3089  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3090 
3091 
3097  UnicodeString(const UnicodeString& that);
3098 
3105  UnicodeString(const UnicodeString& src, int32_t srcStart);
3106 
3114  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3115 
3132  virtual Replaceable *clone() const;
3133 
3137  virtual ~UnicodeString();
3138 
3152  static UnicodeString fromUTF8(const StringPiece &utf8);
3153 
3165  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3166 
3167  /* Miscellaneous operations */
3168 
3203  UnicodeString unescape() const;
3204 
3224  UChar32 unescapeAt(int32_t &offset) const;
3225 
3231  static UClassID U_EXPORT2 getStaticClassID();
3232 
3238  virtual UClassID getDynamicClassID() const;
3239 
3240  //========================================
3241  // Implementation methods
3242  //========================================
3243 
3244 protected:
3249  virtual int32_t getLength() const;
3250 
3256  virtual UChar getCharAt(int32_t offset) const;
3257 
3263  virtual UChar32 getChar32At(int32_t offset) const;
3264 
3265 private:
3266  // For char* constructors. Could be made public.
3267  UnicodeString &setToUTF8(const StringPiece &utf8);
3268  // For extract(char*).
3269  // We could make a toUTF8(target, capacity, errorCode) public but not
3270  // this version: New API will be cleaner if we make callers create substrings
3271  // rather than having start+length on every method,
3272  // and it should take a UErrorCode&.
3273  int32_t
3274  toUTF8(int32_t start, int32_t len,
3275  char *target, int32_t capacity) const;
3276 
3281  UBool doEquals(const UnicodeString &text, int32_t len) const;
3282 
3283  inline int8_t
3284  doCompare(int32_t start,
3285  int32_t length,
3286  const UnicodeString& srcText,
3287  int32_t srcStart,
3288  int32_t srcLength) const;
3289 
3290  int8_t doCompare(int32_t start,
3291  int32_t length,
3292  const UChar *srcChars,
3293  int32_t srcStart,
3294  int32_t srcLength) const;
3295 
3296  inline int8_t
3297  doCompareCodePointOrder(int32_t start,
3298  int32_t length,
3299  const UnicodeString& srcText,
3300  int32_t srcStart,
3301  int32_t srcLength) const;
3302 
3303  int8_t doCompareCodePointOrder(int32_t start,
3304  int32_t length,
3305  const UChar *srcChars,
3306  int32_t srcStart,
3307  int32_t srcLength) const;
3308 
3309  inline int8_t
3310  doCaseCompare(int32_t start,
3311  int32_t length,
3312  const UnicodeString &srcText,
3313  int32_t srcStart,
3314  int32_t srcLength,
3315  uint32_t options) const;
3316 
3317  int8_t
3318  doCaseCompare(int32_t start,
3319  int32_t length,
3320  const UChar *srcChars,
3321  int32_t srcStart,
3322  int32_t srcLength,
3323  uint32_t options) const;
3324 
3325  int32_t doIndexOf(UChar c,
3326  int32_t start,
3327  int32_t length) const;
3328 
3329  int32_t doIndexOf(UChar32 c,
3330  int32_t start,
3331  int32_t length) const;
3332 
3333  int32_t doLastIndexOf(UChar c,
3334  int32_t start,
3335  int32_t length) const;
3336 
3337  int32_t doLastIndexOf(UChar32 c,
3338  int32_t start,
3339  int32_t length) const;
3340 
3341  void doExtract(int32_t start,
3342  int32_t length,
3343  UChar *dst,
3344  int32_t dstStart) const;
3345 
3346  inline void doExtract(int32_t start,
3347  int32_t length,
3348  UnicodeString& target) const;
3349 
3350  inline UChar doCharAt(int32_t offset) const;
3351 
3352  UnicodeString& doReplace(int32_t start,
3353  int32_t length,
3354  const UnicodeString& srcText,
3355  int32_t srcStart,
3356  int32_t srcLength);
3357 
3358  UnicodeString& doReplace(int32_t start,
3359  int32_t length,
3360  const UChar *srcChars,
3361  int32_t srcStart,
3362  int32_t srcLength);
3363 
3364  UnicodeString& doReverse(int32_t start,
3365  int32_t length);
3366 
3367  // calculate hash code
3368  int32_t doHashCode(void) const;
3369 
3370  // get pointer to start of array
3371  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3372  inline UChar* getArrayStart(void);
3373  inline const UChar* getArrayStart(void) const;
3374 
3375  // A UnicodeString object (not necessarily its current buffer)
3376  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3377  inline UBool isWritable() const;
3378 
3379  // Is the current buffer writable?
3380  inline UBool isBufferWritable() const;
3381 
3382  // None of the following does releaseArray().
3383  inline void setLength(int32_t len); // sets only fShortLength and fLength
3384  inline void setToEmpty(); // sets fFlags=kShortString
3385  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3386 
3387  // allocate the array; result may be fStackBuffer
3388  // sets refCount to 1 if appropriate
3389  // sets fArray, fCapacity, and fFlags
3390  // returns boolean for success or failure
3391  UBool allocate(int32_t capacity);
3392 
3393  // release the array if owned
3394  void releaseArray(void);
3395 
3396  // turn a bogus string into an empty one
3397  void unBogus();
3398 
3399  // implements assigment operator, copy constructor, and fastCopyFrom()
3400  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3401 
3402  // Pin start and limit to acceptable values.
3403  inline void pinIndex(int32_t& start) const;
3404  inline void pinIndices(int32_t& start,
3405  int32_t& length) const;
3406 
3407 #if !UCONFIG_NO_CONVERSION
3408 
3409  /* Internal extract() using UConverter. */
3410  int32_t doExtract(int32_t start, int32_t length,
3411  char *dest, int32_t destCapacity,
3412  UConverter *cnv,
3413  UErrorCode &errorCode) const;
3414 
3415  /*
3416  * Real constructor for converting from codepage data.
3417  * It assumes that it is called with !fRefCounted.
3418  *
3419  * If <code>codepage==0</code>, then the default converter
3420  * is used for the platform encoding.
3421  * If <code>codepage</code> is an empty string (<code>""</code>),
3422  * then a simple conversion is performed on the codepage-invariant
3423  * subset ("invariant characters") of the platform encoding. See utypes.h.
3424  */
3425  void doCodepageCreate(const char *codepageData,
3426  int32_t dataLength,
3427  const char *codepage);
3428 
3429  /*
3430  * Worker function for creating a UnicodeString from
3431  * a codepage string using a UConverter.
3432  */
3433  void
3434  doCodepageCreate(const char *codepageData,
3435  int32_t dataLength,
3436  UConverter *converter,
3437  UErrorCode &status);
3438 
3439 #endif
3440 
3441  /*
3442  * This function is called when write access to the array
3443  * is necessary.
3444  *
3445  * We need to make a copy of the array if
3446  * the buffer is read-only, or
3447  * the buffer is refCounted (shared), and refCount>1, or
3448  * the buffer is too small.
3449  *
3450  * Return FALSE if memory could not be allocated.
3451  */
3452  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3453  int32_t growCapacity = -1,
3454  UBool doCopyArray = TRUE,
3455  int32_t **pBufferToDelete = 0,
3456  UBool forceClone = FALSE);
3457 
3463  UnicodeString &
3464  caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
3465 
3466  // ref counting
3467  void addRef(void);
3468  int32_t removeRef(void);
3469  int32_t refCount(void) const;
3470 
3471  // constants
3472  enum {
3473  // Set the stack buffer size so that sizeof(UnicodeString) is,
3474  // naturally (without padding), a multiple of sizeof(pointer).
3475  US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
3476  kInvalidUChar=0xffff, // invalid UChar index
3477  kGrowSize=128, // grow size for this buffer
3478  kInvalidHashCode=0, // invalid hash code
3479  kEmptyHashCode=1, // hash code for empty string
3480 
3481  // bit flag values for fFlags
3482  kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3483  kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
3484  kRefCounted=4, // there is a refCount field before the characters in fArray
3485  kBufferIsReadonly=8,// do not write to this buffer
3486  kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3487  // and releaseBuffer(newLength) must be called
3488 
3489  // combined values for convenience
3490  kShortString=kUsingStackBuffer,
3491  kLongString=kRefCounted,
3492  kReadonlyAlias=kBufferIsReadonly,
3493  kWritableAlias=0
3494  };
3495 
3496  friend class StringThreadTest;
3497  friend class UnicodeStringAppendable;
3498 
3499  union StackBufferOrFields; // forward declaration necessary before friend declaration
3500  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3501 
3502  /*
3503  * The following are all the class fields that are stored
3504  * in each UnicodeString object.
3505  * Note that UnicodeString has virtual functions,
3506  * therefore there is an implicit vtable pointer
3507  * as the first real field.
3508  * The fields should be aligned such that no padding is necessary.
3509  * On 32-bit machines, the size should be 32 bytes,
3510  * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3511  *
3512  * We use a hack to achieve this.
3513  *
3514  * With at least some compilers, each of the following is forced to
3515  * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3516  * rounded up with additional padding if the fields do not already fit that requirement:
3517  * - sizeof(class UnicodeString)
3518  * - offsetof(UnicodeString, fUnion)
3519  * - sizeof(fUnion)
3520  * - sizeof(fFields)
3521  *
3522  * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
3523  * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
3524  * (Padding at the end of fFields is ok:
3525  * As long as there is no padding after fStackBuffer, it is not wasted space.)
3526  *
3527  * We further assume that the compiler does not reorder the fields,
3528  * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
3529  * with at most some padding (but no other field) in between.
3530  * (Padding there would be wasted space, but functionally harmless.)
3531  *
3532  * We use a few more sizeof(pointer)'s chunks of space with
3533  * fRestOfStackBuffer, fShortLength and fFlags,
3534  * to get up exactly to the intended sizeof(UnicodeString).
3535  */
3536  // (implicit) *vtable;
3537  union StackBufferOrFields {
3538  // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3539  // else fFields is used
3540  UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
3541  struct {
3542  UChar *fArray; // the Unicode data
3543  int32_t fCapacity; // capacity of fArray (in UChars)
3544  int32_t fLength; // number of characters in fArray if >127; else undefined
3545  } fFields;
3546  } fUnion;
3547  UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
3548  int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
3549  uint8_t fFlags; // bit flags: see constants above
3550 };
3551 
3560 U_COMMON_API UnicodeString U_EXPORT2
3561 operator+ (const UnicodeString &s1, const UnicodeString &s2);
3562 
3563 //========================================
3564 // Inline members
3565 //========================================
3566 
3567 //========================================
3568 // Privates
3569 //========================================
3570 
3571 inline void
3572 UnicodeString::pinIndex(int32_t& start) const
3573 {
3574  // pin index
3575  if(start < 0) {
3576  start = 0;
3577  } else if(start > length()) {
3578  start = length();
3579  }
3580 }
3581 
3582 inline void
3583 UnicodeString::pinIndices(int32_t& start,
3584  int32_t& _length) const
3585 {
3586  // pin indices
3587  int32_t len = length();
3588  if(start < 0) {
3589  start = 0;
3590  } else if(start > len) {
3591  start = len;
3592  }
3593  if(_length < 0) {
3594  _length = 0;
3595  } else if(_length > (len - start)) {
3596  _length = (len - start);
3597  }
3598 }
3599 
3600 inline UChar*
3601 UnicodeString::getArrayStart()
3602 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3603 
3604 inline const UChar*
3605 UnicodeString::getArrayStart() const
3606 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3607 
3608 //========================================
3609 // Default constructor
3610 //========================================
3611 
3612 inline
3613 UnicodeString::UnicodeString()
3614  : fShortLength(0),
3615  fFlags(kShortString)
3616 {}
3617 
3618 //========================================
3619 // Read-only implementation methods
3620 //========================================
3621 inline int32_t
3623 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3624 
3625 inline int32_t
3627 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3628 
3629 inline int32_t
3631 { return doHashCode(); }
3632 
3633 inline UBool
3635 { return (UBool)(fFlags & kIsBogus); }
3636 
3637 inline UBool
3638 UnicodeString::isWritable() const
3639 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3640 
3641 inline UBool
3642 UnicodeString::isBufferWritable() const
3643 {
3644  return (UBool)(
3645  !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3646  (!(fFlags&kRefCounted) || refCount()==1));
3647 }
3648 
3649 inline const UChar *
3651  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3652  return 0;
3653  } else if(fFlags&kUsingStackBuffer) {
3654  return fUnion.fStackBuffer;
3655  } else {
3656  return fUnion.fFields.fArray;
3657  }
3658 }
3659 
3660 //========================================
3661 // Read-only alias methods
3662 //========================================
3663 inline int8_t
3664 UnicodeString::doCompare(int32_t start,
3665  int32_t thisLength,
3666  const UnicodeString& srcText,
3667  int32_t srcStart,
3668  int32_t srcLength) const
3669 {
3670  if(srcText.isBogus()) {
3671  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3672  } else {
3673  srcText.pinIndices(srcStart, srcLength);
3674  return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3675  }
3676 }
3677 
3678 inline UBool
3680 {
3681  if(isBogus()) {
3682  return text.isBogus();
3683  } else {
3684  int32_t len = length(), textLength = text.length();
3685  return !text.isBogus() && len == textLength && doEquals(text, len);
3686  }
3687 }
3688 
3689 inline UBool
3691 { return (! operator==(text)); }
3692 
3693 inline UBool
3695 { return doCompare(0, length(), text, 0, text.length()) == 1; }
3696 
3697 inline UBool
3699 { return doCompare(0, length(), text, 0, text.length()) == -1; }
3700 
3701 inline UBool
3703 { return doCompare(0, length(), text, 0, text.length()) != -1; }
3704 
3705 inline UBool
3707 { return doCompare(0, length(), text, 0, text.length()) != 1; }
3708 
3709 inline int8_t
3711 { return doCompare(0, length(), text, 0, text.length()); }
3712 
3713 inline int8_t
3715  int32_t _length,
3716  const UnicodeString& srcText) const
3717 { return doCompare(start, _length, srcText, 0, srcText.length()); }
3718 
3719 inline int8_t
3721  int32_t srcLength) const
3722 { return doCompare(0, length(), srcChars, 0, srcLength); }
3723 
3724 inline int8_t
3726  int32_t _length,
3727  const UnicodeString& srcText,
3728  int32_t srcStart,
3729  int32_t srcLength) const
3730 { return doCompare(start, _length, srcText, srcStart, srcLength); }
3731 
3732 inline int8_t
3734  int32_t _length,
3735  const UChar *srcChars) const
3736 { return doCompare(start, _length, srcChars, 0, _length); }
3737 
3738 inline int8_t
3740  int32_t _length,
3741  const UChar *srcChars,
3742  int32_t srcStart,
3743  int32_t srcLength) const
3744 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
3745 
3746 inline int8_t
3748  int32_t limit,
3749  const UnicodeString& srcText,
3750  int32_t srcStart,
3751  int32_t srcLimit) const
3752 { return doCompare(start, limit - start,
3753  srcText, srcStart, srcLimit - srcStart); }
3754 
3755 inline int8_t
3756 UnicodeString::doCompareCodePointOrder(int32_t start,
3757  int32_t thisLength,
3758  const UnicodeString& srcText,
3759  int32_t srcStart,
3760  int32_t srcLength) const
3761 {
3762  if(srcText.isBogus()) {
3763  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3764  } else {
3765  srcText.pinIndices(srcStart, srcLength);
3766  return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3767  }
3768 }
3769 
3770 inline int8_t
3772 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3773 
3774 inline int8_t
3776  int32_t _length,
3777  const UnicodeString& srcText) const
3778 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3779 
3780 inline int8_t
3782  int32_t srcLength) const
3783 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3784 
3785 inline int8_t
3787  int32_t _length,
3788  const UnicodeString& srcText,
3789  int32_t srcStart,
3790  int32_t srcLength) const
3791 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3792 
3793 inline int8_t
3795  int32_t _length,
3796  const UChar *srcChars) const
3797 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3798 
3799 inline int8_t
3801  int32_t _length,
3802  const UChar *srcChars,
3803  int32_t srcStart,
3804  int32_t srcLength) const
3805 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3806 
3807 inline int8_t
3809  int32_t limit,
3810  const UnicodeString& srcText,
3811  int32_t srcStart,
3812  int32_t srcLimit) const
3813 { return doCompareCodePointOrder(start, limit - start,
3814  srcText, srcStart, srcLimit - srcStart); }
3815 
3816 inline int8_t
3817 UnicodeString::doCaseCompare(int32_t start,
3818  int32_t thisLength,
3819  const UnicodeString &srcText,
3820  int32_t srcStart,
3821  int32_t srcLength,
3822  uint32_t options) const
3823 {
3824  if(srcText.isBogus()) {
3825  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3826  } else {
3827  srcText.pinIndices(srcStart, srcLength);
3828  return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3829  }
3830 }
3831 
3832 inline int8_t
3833 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3834  return doCaseCompare(0, length(), text, 0, text.length(), options);
3835 }
3836 
3837 inline int8_t
3839  int32_t _length,
3840  const UnicodeString &srcText,
3841  uint32_t options) const {
3842  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3843 }
3844 
3845 inline int8_t
3847  int32_t srcLength,
3848  uint32_t options) const {
3849  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3850 }
3851 
3852 inline int8_t
3854  int32_t _length,
3855  const UnicodeString &srcText,
3856  int32_t srcStart,
3857  int32_t srcLength,
3858  uint32_t options) const {
3859  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3860 }
3861 
3862 inline int8_t
3864  int32_t _length,
3865  const UChar *srcChars,
3866  uint32_t options) const {
3867  return doCaseCompare(start, _length, srcChars, 0, _length, options);
3868 }
3869 
3870 inline int8_t
3872  int32_t _length,
3873  const UChar *srcChars,
3874  int32_t srcStart,
3875  int32_t srcLength,
3876  uint32_t options) const {
3877  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3878 }
3879 
3880 inline int8_t
3882  int32_t limit,
3883  const UnicodeString &srcText,
3884  int32_t srcStart,
3885  int32_t srcLimit,
3886  uint32_t options) const {
3887  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3888 }
3889 
3890 inline int32_t
3892  int32_t srcStart,
3893  int32_t srcLength,
3894  int32_t start,
3895  int32_t _length) const
3896 {
3897  if(!srcText.isBogus()) {
3898  srcText.pinIndices(srcStart, srcLength);
3899  if(srcLength > 0) {
3900  return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3901  }
3902  }
3903  return -1;
3904 }
3905 
3906 inline int32_t
3908 { return indexOf(text, 0, text.length(), 0, length()); }
3909 
3910 inline int32_t
3912  int32_t start) const {
3913  pinIndex(start);
3914  return indexOf(text, 0, text.length(), start, length() - start);
3915 }
3916 
3917 inline int32_t
3919  int32_t start,
3920  int32_t _length) const
3921 { return indexOf(text, 0, text.length(), start, _length); }
3922 
3923 inline int32_t
3925  int32_t srcLength,
3926  int32_t start) const {
3927  pinIndex(start);
3928  return indexOf(srcChars, 0, srcLength, start, length() - start);
3929 }
3930 
3931 inline int32_t
3933  int32_t srcLength,
3934  int32_t start,
3935  int32_t _length) const
3936 { return indexOf(srcChars, 0, srcLength, start, _length); }
3937 
3938 inline int32_t
3940  int32_t start,
3941  int32_t _length) const
3942 { return doIndexOf(c, start, _length); }
3943 
3944 inline int32_t
3946  int32_t start,
3947  int32_t _length) const
3948 { return doIndexOf(c, start, _length); }
3949 
3950 inline int32_t
3952 { return doIndexOf(c, 0, length()); }
3953 
3954 inline int32_t
3956 { return indexOf(c, 0, length()); }
3957 
3958 inline int32_t
3960  int32_t start) const {
3961  pinIndex(start);
3962  return doIndexOf(c, start, length() - start);
3963 }
3964 
3965 inline int32_t
3967  int32_t start) const {
3968  pinIndex(start);
3969  return indexOf(c, start, length() - start);
3970 }
3971 
3972 inline int32_t
3974  int32_t srcLength,
3975  int32_t start,
3976  int32_t _length) const
3977 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3978 
3979 inline int32_t
3981  int32_t srcLength,
3982  int32_t start) const {
3983  pinIndex(start);
3984  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3985 }
3986 
3987 inline int32_t
3989  int32_t srcStart,
3990  int32_t srcLength,
3991  int32_t start,
3992  int32_t _length) const
3993 {
3994  if(!srcText.isBogus()) {
3995  srcText.pinIndices(srcStart, srcLength);
3996  if(srcLength > 0) {
3997  return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3998  }
3999  }
4000  return -1;
4001 }
4002 
4003 inline int32_t
4005  int32_t start,
4006  int32_t _length) const
4007 { return lastIndexOf(text, 0, text.length(), start, _length); }
4008 
4009 inline int32_t
4011  int32_t start) const {
4012  pinIndex(start);
4013  return lastIndexOf(text, 0, text.length(), start, length() - start);
4014 }
4015 
4016 inline int32_t
4018 { return lastIndexOf(text, 0, text.length(), 0, length()); }
4019 
4020 inline int32_t
4022  int32_t start,
4023  int32_t _length) const
4024 { return doLastIndexOf(c, start, _length); }
4025 
4026 inline int32_t
4028  int32_t start,
4029  int32_t _length) const {
4030  return doLastIndexOf(c, start, _length);
4031 }
4032 
4033 inline int32_t
4035 { return doLastIndexOf(c, 0, length()); }
4036 
4037 inline int32_t
4039  return lastIndexOf(c, 0, length());
4040 }
4041 
4042 inline int32_t
4044  int32_t start) const {
4045  pinIndex(start);
4046  return doLastIndexOf(c, start, length() - start);
4047 }
4048 
4049 inline int32_t
4051  int32_t start) const {
4052  pinIndex(start);
4053  return lastIndexOf(c, start, length() - start);
4054 }
4055 
4056 inline UBool
4058 { return compare(0, text.length(), text, 0, text.length()) == 0; }
4059 
4060 inline UBool
4062  int32_t srcStart,
4063  int32_t srcLength) const
4064 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4065 
4066 inline UBool
4067 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
4068  if(srcLength < 0) {
4069  srcLength = u_strlen(srcChars);
4070  }
4071  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4072 }
4073 
4074 inline UBool
4075 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
4076  if(srcLength < 0) {
4077  srcLength = u_strlen(srcChars);
4078  }
4079  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4080 }
4081 
4082 inline UBool
4084 { return doCompare(length() - text.length(), text.length(),
4085  text, 0, text.length()) == 0; }
4086 
4087 inline UBool
4089  int32_t srcStart,
4090  int32_t srcLength) const {
4091  srcText.pinIndices(srcStart, srcLength);
4092  return doCompare(length() - srcLength, srcLength,
4093  srcText, srcStart, srcLength) == 0;
4094 }
4095 
4096 inline UBool
4098  int32_t srcLength) const {
4099  if(srcLength < 0) {
4100  srcLength = u_strlen(srcChars);
4101  }
4102  return doCompare(length() - srcLength, srcLength,
4103  srcChars, 0, srcLength) == 0;
4104 }
4105 
4106 inline UBool
4108  int32_t srcStart,
4109  int32_t srcLength) const {
4110  if(srcLength < 0) {
4111  srcLength = u_strlen(srcChars + srcStart);
4112  }
4113  return doCompare(length() - srcLength, srcLength,
4114  srcChars, srcStart, srcLength) == 0;
4115 }
4116 
4117 //========================================
4118 // replace
4119 //========================================
4120 inline UnicodeString&
4122  int32_t _length,
4123  const UnicodeString& srcText)
4124 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4125 
4126 inline UnicodeString&
4128  int32_t _length,
4129  const UnicodeString& srcText,
4130  int32_t srcStart,
4131  int32_t srcLength)
4132 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4133 
4134 inline UnicodeString&
4136  int32_t _length,
4137  const UChar *srcChars,
4138  int32_t srcLength)
4139 { return doReplace(start, _length, srcChars, 0, srcLength); }
4140 
4141 inline UnicodeString&
4143  int32_t _length,
4144  const UChar *srcChars,
4145  int32_t srcStart,
4146  int32_t srcLength)
4147 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4148 
4149 inline UnicodeString&
4151  int32_t _length,
4152  UChar srcChar)
4153 { return doReplace(start, _length, &srcChar, 0, 1); }
4154 
4155 inline UnicodeString&
4157  int32_t limit,
4158  const UnicodeString& srcText)
4159 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4160 
4161 inline UnicodeString&
4163  int32_t limit,
4164  const UnicodeString& srcText,
4165  int32_t srcStart,
4166  int32_t srcLimit)
4167 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4168 
4169 inline UnicodeString&
4171  const UnicodeString& newText)
4172 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4173  newText, 0, newText.length()); }
4174 
4175 inline UnicodeString&
4177  int32_t _length,
4178  const UnicodeString& oldText,
4179  const UnicodeString& newText)
4180 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4181  newText, 0, newText.length()); }
4182 
4183 // ============================
4184 // extract
4185 // ============================
4186 inline void
4187 UnicodeString::doExtract(int32_t start,
4188  int32_t _length,
4189  UnicodeString& target) const
4190 { target.replace(0, target.length(), *this, start, _length); }
4191 
4192 inline void
4194  int32_t _length,
4195  UChar *target,
4196  int32_t targetStart) const
4197 { doExtract(start, _length, target, targetStart); }
4198 
4199 inline void
4201  int32_t _length,
4202  UnicodeString& target) const
4203 { doExtract(start, _length, target); }
4204 
4205 #if !UCONFIG_NO_CONVERSION
4206 
4207 inline int32_t
4209  int32_t _length,
4210  char *dst,
4211  const char *codepage) const
4212 
4213 {
4214  // This dstSize value will be checked explicitly
4215  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4216 }
4217 
4218 #endif
4219 
4220 inline void
4222  int32_t limit,
4223  UChar *dst,
4224  int32_t dstStart) const {
4225  pinIndex(start);
4226  pinIndex(limit);
4227  doExtract(start, limit - start, dst, dstStart);
4228 }
4229 
4230 inline UnicodeString
4231 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4232  return tempSubString(start, limit - start);
4233 }
4234 
4235 inline UChar
4236 UnicodeString::doCharAt(int32_t offset) const
4237 {
4238  if((uint32_t)offset < (uint32_t)length()) {
4239  return getArrayStart()[offset];
4240  } else {
4241  return kInvalidUChar;
4242  }
4243 }
4244 
4245 inline UChar
4246 UnicodeString::charAt(int32_t offset) const
4247 { return doCharAt(offset); }
4248 
4249 inline UChar
4250 UnicodeString::operator[] (int32_t offset) const
4251 { return doCharAt(offset); }
4252 
4253 inline UBool
4255  return fShortLength == 0;
4256 }
4257 
4258 //========================================
4259 // Write implementation methods
4260 //========================================
4261 inline void
4262 UnicodeString::setLength(int32_t len) {
4263  if(len <= 127) {
4264  fShortLength = (int8_t)len;
4265  } else {
4266  fShortLength = (int8_t)-1;
4267  fUnion.fFields.fLength = len;
4268  }
4269 }
4270 
4271 inline void
4272 UnicodeString::setToEmpty() {
4273  fShortLength = 0;
4274  fFlags = kShortString;
4275 }
4276 
4277 inline void
4278 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4279  setLength(len);
4280  fUnion.fFields.fArray = array;
4281  fUnion.fFields.fCapacity = capacity;
4282 }
4283 
4284 inline const UChar *
4286  if(!isWritable()) {
4287  return 0;
4288  } else {
4289  UChar *array = getArrayStart();
4290  int32_t len = length();
4291  if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
4292  /*
4293  * kRefCounted: Do not write the NUL if the buffer is shared.
4294  * That is mostly safe, except when the length of one copy was modified
4295  * without copy-on-write, e.g., via truncate(newLength) or remove(void).
4296  * Then the NUL would be written into the middle of another copy's string.
4297  */
4298  if(!(fFlags&kBufferIsReadonly)) {
4299  /*
4300  * We must not write to a readonly buffer, but it is known to be
4301  * NUL-terminated if len<capacity.
4302  * A shared, allocated buffer (refCount()>1) must not have its contents
4303  * modified, but the NUL at [len] is beyond the string contents,
4304  * and multiple string objects and threads writing the same NUL into the
4305  * same location is harmless.
4306  * In all other cases, the buffer is fully writable and it is anyway safe
4307  * to write the NUL.
4308  *
4309  * Note: An earlier version of this code tested whether there is a NUL
4310  * at [len] already, but, while safe, it generated lots of warnings from
4311  * tools like valgrind and Purify.
4312  */
4313  array[len] = 0;
4314  }
4315  return array;
4316  } else if(cloneArrayIfNeeded(len+1)) {
4317  array = getArrayStart();
4318  array[len] = 0;
4319  return array;
4320  } else {
4321  return 0;
4322  }
4323  }
4324 }
4325 
4326 inline UnicodeString&
4328 { return doReplace(0, length(), &ch, 0, 1); }
4329 
4330 inline UnicodeString&
4332 { return replace(0, length(), ch); }
4333 
4334 inline UnicodeString&
4336  int32_t srcStart,
4337  int32_t srcLength)
4338 {
4339  unBogus();
4340  return doReplace(0, length(), srcText, srcStart, srcLength);
4341 }
4342 
4343 inline UnicodeString&
4345  int32_t srcStart)
4346 {
4347  unBogus();
4348  srcText.pinIndex(srcStart);
4349  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4350 }
4351 
4352 inline UnicodeString&
4354 {
4355  return copyFrom(srcText);
4356 }
4357 
4358 inline UnicodeString&
4359 UnicodeString::setTo(const UChar *srcChars,
4360  int32_t srcLength)
4361 {
4362  unBogus();
4363  return doReplace(0, length(), srcChars, 0, srcLength);
4364 }
4365 
4366 inline UnicodeString&
4368 {
4369  unBogus();
4370  return doReplace(0, length(), &srcChar, 0, 1);
4371 }
4372 
4373 inline UnicodeString&
4375 {
4376  unBogus();
4377  return replace(0, length(), srcChar);
4378 }
4379 
4380 inline UnicodeString&
4382  int32_t srcStart,
4383  int32_t srcLength)
4384 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
4385 
4386 inline UnicodeString&
4388 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4389 
4390 inline UnicodeString&
4392  int32_t srcStart,
4393  int32_t srcLength)
4394 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4395 
4396 inline UnicodeString&
4398  int32_t srcLength)
4399 { return doReplace(length(), 0, srcChars, 0, srcLength); }
4400 
4401 inline UnicodeString&
4403 { return doReplace(length(), 0, &srcChar, 0, 1); }
4404 
4405 inline UnicodeString&
4407 { return doReplace(length(), 0, &ch, 0, 1); }
4408 
4409 inline UnicodeString&
4411  return append(ch);
4412 }
4413 
4414 inline UnicodeString&
4416 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4417 
4418 inline UnicodeString&
4420  const UnicodeString& srcText,
4421  int32_t srcStart,
4422  int32_t srcLength)
4423 { return doReplace(start, 0, srcText, srcStart, srcLength); }
4424 
4425 inline UnicodeString&
4427  const UnicodeString& srcText)
4428 { return doReplace(start, 0, srcText, 0, srcText.length()); }
4429 
4430 inline UnicodeString&
4432  const UChar *srcChars,
4433  int32_t srcStart,
4434  int32_t srcLength)
4435 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4436 
4437 inline UnicodeString&
4439  const UChar *srcChars,
4440  int32_t srcLength)
4441 { return doReplace(start, 0, srcChars, 0, srcLength); }
4442 
4443 inline UnicodeString&
4445  UChar srcChar)
4446 { return doReplace(start, 0, &srcChar, 0, 1); }
4447 
4448 inline UnicodeString&
4450  UChar32 srcChar)
4451 { return replace(start, 0, srcChar); }
4452 
4453 
4454 inline UnicodeString&
4456 {
4457  // remove() of a bogus string makes the string empty and non-bogus
4458  // we also un-alias a read-only alias to deal with NUL-termination
4459  // issues with getTerminatedBuffer()
4460  if(fFlags & (kIsBogus|kBufferIsReadonly)) {
4461  setToEmpty();
4462  } else {
4463  fShortLength = 0;
4464  }
4465  return *this;
4466 }
4467 
4468 inline UnicodeString&
4470  int32_t _length)
4471 {
4472  if(start <= 0 && _length == INT32_MAX) {
4473  // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4474  return remove();
4475  }
4476  return doReplace(start, _length, NULL, 0, 0);
4477 }
4478 
4479 inline UnicodeString&
4481  int32_t limit)
4482 { return doReplace(start, limit - start, NULL, 0, 0); }
4483 
4484 inline UnicodeString &
4485 UnicodeString::retainBetween(int32_t start, int32_t limit) {
4486  truncate(limit);
4487  return doReplace(0, start, NULL, 0, 0);
4488 }
4489 
4490 inline UBool
4491 UnicodeString::truncate(int32_t targetLength)
4492 {
4493  if(isBogus() && targetLength == 0) {
4494  // truncate(0) of a bogus string makes the string empty and non-bogus
4495  unBogus();
4496  return FALSE;
4497  } else if((uint32_t)targetLength < (uint32_t)length()) {
4498  setLength(targetLength);
4499  if(fFlags&kBufferIsReadonly) {
4500  fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more
4501  }
4502  return TRUE;
4503  } else {
4504  return FALSE;
4505  }
4506 }
4507 
4508 inline UnicodeString&
4510 { return doReverse(0, length()); }
4511 
4512 inline UnicodeString&
4514  int32_t _length)
4515 { return doReverse(start, _length); }
4516 
4518 
4519 #endif