• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files

unistr.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1998-2008, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File unistr.h
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   09/25/98    stephen     Creation.
00013 *   11/11/98    stephen     Changed per 11/9 code review.
00014 *   04/20/99    stephen     Overhauled per 4/16 code review.
00015 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
00016 *                           handleReplaceBetween(); other methods unchanged.
00017 *   06/25/01    grhoten     Remove dependency on iostream.
00018 ******************************************************************************
00019 */
00020 
00021 #ifndef UNISTR_H
00022 #define UNISTR_H
00023 
00029 #include "unicode/rep.h"
00030 
00031 struct UConverter;          // unicode/ucnv.h
00032 class  StringThreadTest;
00033 
00034 #ifndef U_COMPARE_CODE_POINT_ORDER
00035 /* see also ustring.h and unorm.h */
00041 #define U_COMPARE_CODE_POINT_ORDER  0x8000
00042 #endif
00043 
00044 #ifndef USTRING_H
00045 
00048 U_STABLE int32_t U_EXPORT2
00049 u_strlen(const UChar *s);
00050 #endif
00051 
00052 U_NAMESPACE_BEGIN
00053 
00054 class Locale;               // unicode/locid.h
00055 class StringCharacterIterator;
00056 class BreakIterator;        // unicode/brkiter.h
00057 
00058 /* The <iostream> include has been moved to unicode/ustream.h */
00059 
00070 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
00071 
00089 #if defined(U_DECLARE_UTF16)
00090 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
00091 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
00092 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
00093 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
00094 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
00095 #else
00096 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
00097 #endif
00098 
00112 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
00113 
00183 class U_COMMON_API UnicodeString : public Replaceable
00184 {
00185 public:
00186 
00195   enum EInvariant {
00200     kInvariant
00201   };
00202 
00203   //========================================
00204   // Read-only operations
00205   //========================================
00206 
00207   /* Comparison - bitwise only - for international comparison use collation */
00208 
00216   inline UBool operator== (const UnicodeString& text) const;
00217 
00225   inline UBool operator!= (const UnicodeString& text) const;
00226 
00234   inline UBool operator> (const UnicodeString& text) const;
00235 
00243   inline UBool operator< (const UnicodeString& text) const;
00244 
00252   inline UBool operator>= (const UnicodeString& text) const;
00253 
00261   inline UBool operator<= (const UnicodeString& text) const;
00262 
00274   inline int8_t compare(const UnicodeString& text) const;
00275 
00290   inline int8_t compare(int32_t start,
00291          int32_t length,
00292          const UnicodeString& text) const;
00293 
00311    inline int8_t compare(int32_t start,
00312          int32_t length,
00313          const UnicodeString& srcText,
00314          int32_t srcStart,
00315          int32_t srcLength) const;
00316 
00329   inline int8_t compare(const UChar *srcChars,
00330          int32_t srcLength) const;
00331 
00346   inline int8_t compare(int32_t start,
00347          int32_t length,
00348          const UChar *srcChars) const;
00349 
00367   inline int8_t compare(int32_t start,
00368          int32_t length,
00369          const UChar *srcChars,
00370          int32_t srcStart,
00371          int32_t srcLength) const;
00372 
00390   inline int8_t compareBetween(int32_t start,
00391             int32_t limit,
00392             const UnicodeString& srcText,
00393             int32_t srcStart,
00394             int32_t srcLimit) const;
00395 
00413   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
00414 
00434   inline int8_t compareCodePointOrder(int32_t start,
00435                                       int32_t length,
00436                                       const UnicodeString& srcText) const;
00437 
00459    inline int8_t compareCodePointOrder(int32_t start,
00460                                        int32_t length,
00461                                        const UnicodeString& srcText,
00462                                        int32_t srcStart,
00463                                        int32_t srcLength) const;
00464 
00483   inline int8_t compareCodePointOrder(const UChar *srcChars,
00484                                       int32_t srcLength) const;
00485 
00505   inline int8_t compareCodePointOrder(int32_t start,
00506                                       int32_t length,
00507                                       const UChar *srcChars) const;
00508 
00530   inline int8_t compareCodePointOrder(int32_t start,
00531                                       int32_t length,
00532                                       const UChar *srcChars,
00533                                       int32_t srcStart,
00534                                       int32_t srcLength) const;
00535 
00557   inline int8_t compareCodePointOrderBetween(int32_t start,
00558                                              int32_t limit,
00559                                              const UnicodeString& srcText,
00560                                              int32_t srcStart,
00561                                              int32_t srcLimit) const;
00562 
00581   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
00582 
00603   inline int8_t caseCompare(int32_t start,
00604          int32_t length,
00605          const UnicodeString& srcText,
00606          uint32_t options) const;
00607 
00630   inline int8_t caseCompare(int32_t start,
00631          int32_t length,
00632          const UnicodeString& srcText,
00633          int32_t srcStart,
00634          int32_t srcLength,
00635          uint32_t options) const;
00636 
00656   inline int8_t caseCompare(const UChar *srcChars,
00657          int32_t srcLength,
00658          uint32_t options) const;
00659 
00680   inline int8_t caseCompare(int32_t start,
00681          int32_t length,
00682          const UChar *srcChars,
00683          uint32_t options) const;
00684 
00707   inline int8_t caseCompare(int32_t start,
00708          int32_t length,
00709          const UChar *srcChars,
00710          int32_t srcStart,
00711          int32_t srcLength,
00712          uint32_t options) const;
00713 
00736   inline int8_t caseCompareBetween(int32_t start,
00737             int32_t limit,
00738             const UnicodeString& srcText,
00739             int32_t srcStart,
00740             int32_t srcLimit,
00741             uint32_t options) const;
00742 
00750   inline UBool startsWith(const UnicodeString& text) const;
00751 
00762   inline UBool startsWith(const UnicodeString& srcText,
00763             int32_t srcStart,
00764             int32_t srcLength) const;
00765 
00774   inline UBool startsWith(const UChar *srcChars,
00775             int32_t srcLength) const;
00776 
00786   inline UBool startsWith(const UChar *srcChars,
00787             int32_t srcStart,
00788             int32_t srcLength) const;
00789 
00797   inline UBool endsWith(const UnicodeString& text) const;
00798 
00809   inline UBool endsWith(const UnicodeString& srcText,
00810           int32_t srcStart,
00811           int32_t srcLength) const;
00812 
00821   inline UBool endsWith(const UChar *srcChars,
00822           int32_t srcLength) const;
00823 
00834   inline UBool endsWith(const UChar *srcChars,
00835           int32_t srcStart,
00836           int32_t srcLength) const;
00837 
00838 
00839   /* Searching - bitwise only */
00840 
00849   inline int32_t indexOf(const UnicodeString& text) const;
00850 
00860   inline int32_t indexOf(const UnicodeString& text,
00861               int32_t start) const;
00862 
00874   inline int32_t indexOf(const UnicodeString& text,
00875               int32_t start,
00876               int32_t length) const;
00877 
00894   inline int32_t indexOf(const UnicodeString& srcText,
00895               int32_t srcStart,
00896               int32_t srcLength,
00897               int32_t start,
00898               int32_t length) const;
00899 
00911   inline int32_t indexOf(const UChar *srcChars,
00912               int32_t srcLength,
00913               int32_t start) const;
00914 
00927   inline int32_t indexOf(const UChar *srcChars,
00928               int32_t srcLength,
00929               int32_t start,
00930               int32_t length) const;
00931 
00948   int32_t indexOf(const UChar *srcChars,
00949               int32_t srcStart,
00950               int32_t srcLength,
00951               int32_t start,
00952               int32_t length) const;
00953 
00961   inline int32_t indexOf(UChar c) const;
00962 
00971   inline int32_t indexOf(UChar32 c) const;
00972 
00981   inline int32_t indexOf(UChar c,
00982               int32_t start) const;
00983 
00993   inline int32_t indexOf(UChar32 c,
00994               int32_t start) const;
00995 
01006   inline int32_t indexOf(UChar c,
01007               int32_t start,
01008               int32_t length) const;
01009 
01021   inline int32_t indexOf(UChar32 c,
01022               int32_t start,
01023               int32_t length) const;
01024 
01033   inline int32_t lastIndexOf(const UnicodeString& text) const;
01034 
01044   inline int32_t lastIndexOf(const UnicodeString& text,
01045               int32_t start) const;
01046 
01058   inline int32_t lastIndexOf(const UnicodeString& text,
01059               int32_t start,
01060               int32_t length) const;
01061 
01078   inline int32_t lastIndexOf(const UnicodeString& srcText,
01079               int32_t srcStart,
01080               int32_t srcLength,
01081               int32_t start,
01082               int32_t length) const;
01083 
01094   inline int32_t lastIndexOf(const UChar *srcChars,
01095               int32_t srcLength,
01096               int32_t start) const;
01097 
01110   inline int32_t lastIndexOf(const UChar *srcChars,
01111               int32_t srcLength,
01112               int32_t start,
01113               int32_t length) const;
01114 
01131   int32_t lastIndexOf(const UChar *srcChars,
01132               int32_t srcStart,
01133               int32_t srcLength,
01134               int32_t start,
01135               int32_t length) const;
01136 
01144   inline int32_t lastIndexOf(UChar c) const;
01145 
01154   inline int32_t lastIndexOf(UChar32 c) const;
01155 
01164   inline int32_t lastIndexOf(UChar c,
01165               int32_t start) const;
01166 
01176   inline int32_t lastIndexOf(UChar32 c,
01177               int32_t start) const;
01178 
01189   inline int32_t lastIndexOf(UChar c,
01190               int32_t start,
01191               int32_t length) const;
01192 
01204   inline int32_t lastIndexOf(UChar32 c,
01205               int32_t start,
01206               int32_t length) const;
01207 
01208 
01209   /* Character access */
01210 
01219   inline UChar charAt(int32_t offset) const;
01220 
01228   inline UChar operator[] (int32_t offset) const;
01229 
01241   inline UChar32 char32At(int32_t offset) const;
01242 
01258   inline int32_t getChar32Start(int32_t offset) const;
01259 
01276   inline int32_t getChar32Limit(int32_t offset) const;
01277 
01328   int32_t moveIndex32(int32_t index, int32_t delta) const;
01329 
01330   /* Substring extraction */
01331 
01347   inline void extract(int32_t start,
01348            int32_t length,
01349            UChar *dst,
01350            int32_t dstStart = 0) const;
01351 
01373   int32_t
01374   extract(UChar *dest, int32_t destCapacity,
01375           UErrorCode &errorCode) const;
01376 
01387   inline void extract(int32_t start,
01388            int32_t length,
01389            UnicodeString& target) const;
01390 
01402   inline void extractBetween(int32_t start,
01403               int32_t limit,
01404               UChar *dst,
01405               int32_t dstStart = 0) const;
01406 
01416   virtual void extractBetween(int32_t start,
01417               int32_t limit,
01418               UnicodeString& target) const;
01419 
01441   int32_t extract(int32_t start,
01442            int32_t startLength,
01443            char *target,
01444            int32_t targetCapacity,
01445            enum EInvariant inv) const;
01446 
01447 #if !UCONFIG_NO_CONVERSION
01448 
01474   inline int32_t extract(int32_t start,
01475                  int32_t startLength,
01476                  char *target,
01477                  const char *codepage = 0) const;
01478 
01508   int32_t extract(int32_t start,
01509            int32_t startLength,
01510            char *target,
01511            uint32_t targetLength,
01512            const char *codepage = 0) const;
01513 
01531   int32_t extract(char *dest, int32_t destCapacity,
01532                   UConverter *cnv,
01533                   UErrorCode &errorCode) const;
01534 
01535 #endif
01536 
01537   /* Length operations */
01538 
01547   inline int32_t length(void) const;
01548 
01562   int32_t
01563   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
01564 
01588   UBool
01589   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
01590 
01596   inline UBool isEmpty(void) const;
01597 
01607   inline int32_t getCapacity(void) const;
01608 
01609   /* Other operations */
01610 
01616   inline int32_t hashCode(void) const;
01617 
01629   inline UBool isBogus(void) const;
01630 
01631 
01632   //========================================
01633   // Write operations
01634   //========================================
01635 
01636   /* Assignment operations */
01637 
01645   UnicodeString &operator=(const UnicodeString &srcText);
01646 
01667   UnicodeString &fastCopyFrom(const UnicodeString &src);
01668 
01676   inline UnicodeString& operator= (UChar ch);
01677 
01685   inline UnicodeString& operator= (UChar32 ch);
01686 
01698   inline UnicodeString& setTo(const UnicodeString& srcText,
01699                int32_t srcStart);
01700 
01714   inline UnicodeString& setTo(const UnicodeString& srcText,
01715                int32_t srcStart,
01716                int32_t srcLength);
01717 
01726   inline UnicodeString& setTo(const UnicodeString& srcText);
01727 
01736   inline UnicodeString& setTo(const UChar *srcChars,
01737                int32_t srcLength);
01738 
01747   UnicodeString& setTo(UChar srcChar);
01748 
01757   UnicodeString& setTo(UChar32 srcChar);
01758 
01779   UnicodeString &setTo(UBool isTerminated,
01780                        const UChar *text,
01781                        int32_t textLength);
01782 
01802   UnicodeString &setTo(UChar *buffer,
01803                        int32_t buffLength,
01804                        int32_t buffCapacity);
01805 
01846   void setToBogus();
01847 
01855   UnicodeString& setCharAt(int32_t offset,
01856                UChar ch);
01857 
01858 
01859   /* Append operations */
01860 
01868  inline  UnicodeString& operator+= (UChar ch);
01869 
01877  inline  UnicodeString& operator+= (UChar32 ch);
01878 
01887   inline UnicodeString& operator+= (const UnicodeString& srcText);
01888 
01903   inline UnicodeString& append(const UnicodeString& srcText,
01904             int32_t srcStart,
01905             int32_t srcLength);
01906 
01914   inline UnicodeString& append(const UnicodeString& srcText);
01915 
01929   inline UnicodeString& append(const UChar *srcChars,
01930             int32_t srcStart,
01931             int32_t srcLength);
01932 
01941   inline UnicodeString& append(const UChar *srcChars,
01942             int32_t srcLength);
01943 
01950   inline UnicodeString& append(UChar srcChar);
01951 
01958   inline UnicodeString& append(UChar32 srcChar);
01959 
01960 
01961   /* Insert operations */
01962 
01976   inline UnicodeString& insert(int32_t start,
01977             const UnicodeString& srcText,
01978             int32_t srcStart,
01979             int32_t srcLength);
01980 
01989   inline UnicodeString& insert(int32_t start,
01990             const UnicodeString& srcText);
01991 
02005   inline UnicodeString& insert(int32_t start,
02006             const UChar *srcChars,
02007             int32_t srcStart,
02008             int32_t srcLength);
02009 
02019   inline UnicodeString& insert(int32_t start,
02020             const UChar *srcChars,
02021             int32_t srcLength);
02022 
02031   inline UnicodeString& insert(int32_t start,
02032             UChar srcChar);
02033 
02042   inline UnicodeString& insert(int32_t start,
02043             UChar32 srcChar);
02044 
02045 
02046   /* Replace operations */
02047 
02065   UnicodeString& replace(int32_t start,
02066              int32_t length,
02067              const UnicodeString& srcText,
02068              int32_t srcStart,
02069              int32_t srcLength);
02070 
02083   UnicodeString& replace(int32_t start,
02084              int32_t length,
02085              const UnicodeString& srcText);
02086 
02104   UnicodeString& replace(int32_t start,
02105              int32_t length,
02106              const UChar *srcChars,
02107              int32_t srcStart,
02108              int32_t srcLength);
02109 
02122   inline UnicodeString& replace(int32_t start,
02123              int32_t length,
02124              const UChar *srcChars,
02125              int32_t srcLength);
02126 
02138   inline UnicodeString& replace(int32_t start,
02139              int32_t length,
02140              UChar srcChar);
02141 
02153   inline UnicodeString& replace(int32_t start,
02154              int32_t length,
02155              UChar32 srcChar);
02156 
02166   inline UnicodeString& replaceBetween(int32_t start,
02167                 int32_t limit,
02168                 const UnicodeString& srcText);
02169 
02184   inline UnicodeString& replaceBetween(int32_t start,
02185                 int32_t limit,
02186                 const UnicodeString& srcText,
02187                 int32_t srcStart,
02188                 int32_t srcLimit);
02189 
02200   virtual void handleReplaceBetween(int32_t start,
02201                                     int32_t limit,
02202                                     const UnicodeString& text);
02203 
02209   virtual UBool hasMetaData() const;
02210 
02226   virtual void copy(int32_t start, int32_t limit, int32_t dest);
02227 
02228   /* Search and replace operations */
02229 
02238   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
02239                 const UnicodeString& newText);
02240 
02252   inline UnicodeString& findAndReplace(int32_t start,
02253                 int32_t length,
02254                 const UnicodeString& oldText,
02255                 const UnicodeString& newText);
02256 
02274   UnicodeString& findAndReplace(int32_t start,
02275                 int32_t length,
02276                 const UnicodeString& oldText,
02277                 int32_t oldStart,
02278                 int32_t oldLength,
02279                 const UnicodeString& newText,
02280                 int32_t newStart,
02281                 int32_t newLength);
02282 
02283 
02284   /* Remove operations */
02285 
02291   inline UnicodeString& remove(void);
02292 
02301   inline UnicodeString& remove(int32_t start,
02302                                int32_t length = (int32_t)INT32_MAX);
02303 
02312   inline UnicodeString& removeBetween(int32_t start,
02313                                       int32_t limit = (int32_t)INT32_MAX);
02314 
02315 
02316   /* Length operations */
02317 
02329   UBool padLeading(int32_t targetLength,
02330                     UChar padChar = 0x0020);
02331 
02343   UBool padTrailing(int32_t targetLength,
02344                      UChar padChar = 0x0020);
02345 
02352   inline UBool truncate(int32_t targetLength);
02353 
02359   UnicodeString& trim(void);
02360 
02361 
02362   /* Miscellaneous operations */
02363 
02369   inline UnicodeString& reverse(void);
02370 
02379   inline UnicodeString& reverse(int32_t start,
02380              int32_t length);
02381 
02388   UnicodeString& toUpper(void);
02389 
02397   UnicodeString& toUpper(const Locale& locale);
02398 
02405   UnicodeString& toLower(void);
02406 
02414   UnicodeString& toLower(const Locale& locale);
02415 
02416 #if !UCONFIG_NO_BREAK_ITERATION
02417 
02444   UnicodeString &toTitle(BreakIterator *titleIter);
02445 
02473   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
02474 
02506   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
02507 
02508 #endif
02509 
02521   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
02522 
02523   //========================================
02524   // Access to the internal buffer
02525   //========================================
02526 
02570   UChar *getBuffer(int32_t minCapacity);
02571 
02592   void releaseBuffer(int32_t newLength=-1);
02593 
02624   inline const UChar *getBuffer() const;
02625 
02659   inline const UChar *getTerminatedBuffer();
02660 
02661   //========================================
02662   // Constructors
02663   //========================================
02664 
02668   UnicodeString();
02669 
02681   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
02682 
02688   UnicodeString(UChar ch);
02689 
02695   UnicodeString(UChar32 ch);
02696 
02703   UnicodeString(const UChar *text);
02704 
02712   UnicodeString(const UChar *text,
02713         int32_t textLength);
02714 
02734   UnicodeString(UBool isTerminated,
02735                 const UChar *text,
02736                 int32_t textLength);
02737 
02756   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
02757 
02758 #if !UCONFIG_NO_CONVERSION
02759 
02777   UnicodeString(const char *codepageData,
02778         const char *codepage = 0);
02779 
02797   UnicodeString(const char *codepageData,
02798         int32_t dataLength,
02799         const char *codepage = 0);
02800 
02822   UnicodeString(
02823         const char *src, int32_t srcLength,
02824         UConverter *cnv,
02825         UErrorCode &errorCode);
02826 
02827 #endif
02828 
02853   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
02854 
02855 
02861   UnicodeString(const UnicodeString& that);
02862 
02869   UnicodeString(const UnicodeString& src, int32_t srcStart);
02870 
02878   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
02879 
02896   virtual Replaceable *clone() const;
02897 
02901   virtual ~UnicodeString();
02902 
02903 
02904   /* Miscellaneous operations */
02905 
02940   UnicodeString unescape() const;
02941 
02961   UChar32 unescapeAt(int32_t &offset) const;
02962 
02968   static UClassID U_EXPORT2 getStaticClassID();
02969 
02975   virtual UClassID getDynamicClassID() const;
02976 
02977   //========================================
02978   // Implementation methods
02979   //========================================
02980 
02981 protected:
02986   virtual int32_t getLength() const;
02987 
02993   virtual UChar getCharAt(int32_t offset) const;
02994 
03000   virtual UChar32 getChar32At(int32_t offset) const;
03001 
03002 private:
03003 
03004   inline int8_t
03005   doCompare(int32_t start,
03006            int32_t length,
03007            const UnicodeString& srcText,
03008            int32_t srcStart,
03009            int32_t srcLength) const;
03010 
03011   int8_t doCompare(int32_t start,
03012            int32_t length,
03013            const UChar *srcChars,
03014            int32_t srcStart,
03015            int32_t srcLength) const;
03016 
03017   inline int8_t
03018   doCompareCodePointOrder(int32_t start,
03019                           int32_t length,
03020                           const UnicodeString& srcText,
03021                           int32_t srcStart,
03022                           int32_t srcLength) const;
03023 
03024   int8_t doCompareCodePointOrder(int32_t start,
03025                                  int32_t length,
03026                                  const UChar *srcChars,
03027                                  int32_t srcStart,
03028                                  int32_t srcLength) const;
03029 
03030   inline int8_t
03031   doCaseCompare(int32_t start,
03032                 int32_t length,
03033                 const UnicodeString &srcText,
03034                 int32_t srcStart,
03035                 int32_t srcLength,
03036                 uint32_t options) const;
03037 
03038   int8_t
03039   doCaseCompare(int32_t start,
03040                 int32_t length,
03041                 const UChar *srcChars,
03042                 int32_t srcStart,
03043                 int32_t srcLength,
03044                 uint32_t options) const;
03045 
03046   int32_t doIndexOf(UChar c,
03047             int32_t start,
03048             int32_t length) const;
03049 
03050   int32_t doIndexOf(UChar32 c,
03051                         int32_t start,
03052                         int32_t length) const;
03053 
03054   int32_t doLastIndexOf(UChar c,
03055                 int32_t start,
03056                 int32_t length) const;
03057 
03058   int32_t doLastIndexOf(UChar32 c,
03059                             int32_t start,
03060                             int32_t length) const;
03061 
03062   void doExtract(int32_t start,
03063          int32_t length,
03064          UChar *dst,
03065          int32_t dstStart) const;
03066 
03067   inline void doExtract(int32_t start,
03068          int32_t length,
03069          UnicodeString& target) const;
03070 
03071   inline UChar doCharAt(int32_t offset)  const;
03072 
03073   UnicodeString& doReplace(int32_t start,
03074                int32_t length,
03075                const UnicodeString& srcText,
03076                int32_t srcStart,
03077                int32_t srcLength);
03078 
03079   UnicodeString& doReplace(int32_t start,
03080                int32_t length,
03081                const UChar *srcChars,
03082                int32_t srcStart,
03083                int32_t srcLength);
03084 
03085   UnicodeString& doReverse(int32_t start,
03086                int32_t length);
03087 
03088   // calculate hash code
03089   int32_t doHashCode(void) const;
03090 
03091   // get pointer to start of array
03092   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
03093   inline UChar* getArrayStart(void);
03094   inline const UChar* getArrayStart(void) const;
03095 
03096   // A UnicodeString object (not necessarily its current buffer)
03097   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
03098   inline UBool isWritable() const;
03099 
03100   // Is the current buffer writable?
03101   inline UBool isBufferWritable() const;
03102 
03103   // None of the following does releaseArray().
03104   inline void setLength(int32_t len);        // sets only fShortLength and fLength
03105   inline void setToEmpty();                  // sets fFlags=kShortString
03106   inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString
03107   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
03108 
03109   // allocate the array; result may be fStackBuffer
03110   // sets refCount to 1 if appropriate
03111   // sets fArray, fCapacity, and fFlags
03112   // returns boolean for success or failure
03113   UBool allocate(int32_t capacity);
03114 
03115   // release the array if owned
03116   void releaseArray(void);
03117 
03118   // turn a bogus string into an empty one
03119   void unBogus();
03120 
03121   // implements assigment operator, copy constructor, and fastCopyFrom()
03122   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
03123 
03124   // Pin start and limit to acceptable values.
03125   inline void pinIndex(int32_t& start) const;
03126   inline void pinIndices(int32_t& start,
03127                          int32_t& length) const;
03128 
03129 #if !UCONFIG_NO_CONVERSION
03130 
03131   /* Internal extract() using UConverter. */
03132   int32_t doExtract(int32_t start, int32_t length,
03133                     char *dest, int32_t destCapacity,
03134                     UConverter *cnv,
03135                     UErrorCode &errorCode) const;
03136 
03137   /*
03138    * Real constructor for converting from codepage data.
03139    * It assumes that it is called with !fRefCounted.
03140    *
03141    * If <code>codepage==0</code>, then the default converter
03142    * is used for the platform encoding.
03143    * If <code>codepage</code> is an empty string (<code>""</code>),
03144    * then a simple conversion is performed on the codepage-invariant
03145    * subset ("invariant characters") of the platform encoding. See utypes.h.
03146    */
03147   void doCodepageCreate(const char *codepageData,
03148                         int32_t dataLength,
03149                         const char *codepage);
03150 
03151   /*
03152    * Worker function for creating a UnicodeString from
03153    * a codepage string using a UConverter.
03154    */
03155   void
03156   doCodepageCreate(const char *codepageData,
03157                    int32_t dataLength,
03158                    UConverter *converter,
03159                    UErrorCode &status);
03160 
03161 #endif
03162 
03163   /*
03164    * This function is called when write access to the array
03165    * is necessary.
03166    *
03167    * We need to make a copy of the array if
03168    * the buffer is read-only, or
03169    * the buffer is refCounted (shared), and refCount>1, or
03170    * the buffer is too small.
03171    *
03172    * Return FALSE if memory could not be allocated.
03173    */
03174   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
03175                             int32_t growCapacity = -1,
03176                             UBool doCopyArray = TRUE,
03177                             int32_t **pBufferToDelete = 0,
03178                             UBool forceClone = FALSE);
03179 
03180   // common function for case mappings
03181   UnicodeString &
03182   caseMap(BreakIterator *titleIter,
03183           const char *locale,
03184           uint32_t options,
03185           int32_t toWhichCase);
03186 
03187   // ref counting
03188   void addRef(void);
03189   int32_t removeRef(void);
03190   int32_t refCount(void) const;
03191 
03192   // constants
03193   enum {
03194     // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
03195     // 32-bit pointers: 4+1+1+13*2 = 32 bytes
03196     // 64-bit pointers: 8+1+1+15*2 = 40 bytes
03197     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
03198     kInvalidUChar=0xffff, // invalid UChar index
03199     kGrowSize=128, // grow size for this buffer
03200     kInvalidHashCode=0, // invalid hash code
03201     kEmptyHashCode=1, // hash code for empty string
03202 
03203     // bit flag values for fFlags
03204     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
03205     kUsingStackBuffer=2,// fArray==fStackBuffer
03206     kRefCounted=4,      // there is a refCount field before the characters in fArray
03207     kBufferIsReadonly=8,// do not write to this buffer
03208     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
03209                         // and releaseBuffer(newLength) must be called
03210 
03211     // combined values for convenience
03212     kShortString=kUsingStackBuffer,
03213     kLongString=kRefCounted,
03214     kReadonlyAlias=kBufferIsReadonly,
03215     kWritableAlias=0
03216   };
03217 
03218   friend class StringThreadTest;
03219 
03220   union StackBufferOrFields;        // forward declaration necessary before friend declaration
03221   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
03222 
03223   /*
03224    * The following are all the class fields that are stored
03225    * in each UnicodeString object.
03226    * Note that UnicodeString has virtual functions,
03227    * therefore there is an implicit vtable pointer
03228    * as the first real field.
03229    * The fields should be aligned such that no padding is
03230    * necessary, mostly by having larger types first.
03231    * On 32-bit machines, the size should be 32 bytes,
03232    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
03233    */
03234   // (implicit) *vtable;
03235   int8_t    fShortLength;   // 0..127: length  <0: real length is in fUnion.fFields.fLength
03236   uint8_t   fFlags;         // bit flags: see constants above
03237   union StackBufferOrFields {
03238     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
03239     // else fFields is used
03240     UChar     fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
03241     struct {
03242       uint16_t  fPadding;   // align the following field at 8B (32b pointers) or 12B (64b)
03243       int32_t   fLength;    // number of characters in fArray if >127; else undefined
03244       UChar     *fArray;    // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
03245       int32_t   fCapacity;  // sizeof fArray
03246     } fFields;
03247   } fUnion;
03248 };
03249 
03258 U_COMMON_API UnicodeString U_EXPORT2
03259 operator+ (const UnicodeString &s1, const UnicodeString &s2);
03260 
03261 //========================================
03262 // Inline members
03263 //========================================
03264 
03265 //========================================
03266 // Privates
03267 //========================================
03268 
03269 inline void
03270 UnicodeString::pinIndex(int32_t& start) const
03271 {
03272   // pin index
03273   if(start < 0) {
03274     start = 0;
03275   } else if(start > length()) {
03276     start = length();
03277   }
03278 }
03279 
03280 inline void
03281 UnicodeString::pinIndices(int32_t& start,
03282                           int32_t& _length) const
03283 {
03284   // pin indices
03285   int32_t len = length();
03286   if(start < 0) {
03287     start = 0;
03288   } else if(start > len) {
03289     start = len;
03290   }
03291   if(_length < 0) {
03292     _length = 0;
03293   } else if(_length > (len - start)) {
03294     _length = (len - start);
03295   }
03296 }
03297 
03298 inline UChar*
03299 UnicodeString::getArrayStart()
03300 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03301 
03302 inline const UChar*
03303 UnicodeString::getArrayStart() const
03304 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03305 
03306 //========================================
03307 // Read-only implementation methods
03308 //========================================
03309 inline int32_t
03310 UnicodeString::length() const
03311 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
03312 
03313 inline int32_t
03314 UnicodeString::getCapacity() const
03315 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
03316 
03317 inline int32_t
03318 UnicodeString::hashCode() const
03319 { return doHashCode(); }
03320 
03321 inline UBool
03322 UnicodeString::isBogus() const
03323 { return (UBool)(fFlags & kIsBogus); }
03324 
03325 inline UBool
03326 UnicodeString::isWritable() const
03327 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
03328 
03329 inline UBool
03330 UnicodeString::isBufferWritable() const
03331 {
03332   return (UBool)(
03333       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
03334       (!(fFlags&kRefCounted) || refCount()==1));
03335 }
03336 
03337 inline const UChar *
03338 UnicodeString::getBuffer() const {
03339   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
03340     return 0;
03341   } else if(fFlags&kUsingStackBuffer) {
03342     return fUnion.fStackBuffer;
03343   } else {
03344     return fUnion.fFields.fArray;
03345   }
03346 }
03347 
03348 //=====================================