ICU 60.2  60.2
translit.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1999-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/17/99 aliu Creation.
10 **********************************************************************
11 */
12 #ifndef TRANSLIT_H
13 #define TRANSLIT_H
14 
15 #include "unicode/utypes.h"
16 
22 #if !UCONFIG_NO_TRANSLITERATION
23 
24 #include "unicode/uobject.h"
25 #include "unicode/unistr.h"
26 #include "unicode/parseerr.h"
27 #include "unicode/utrans.h" // UTransPosition, UTransDirection
28 #include "unicode/strenum.h"
29 
31 
32 class UnicodeFilter;
33 class UnicodeSet;
34 class CompoundTransliterator;
35 class TransliteratorParser;
36 class NormalizationTransliterator;
37 class TransliteratorIDParser;
38 
244 
245 private:
246 
250  UnicodeString ID;
251 
258  UnicodeFilter* filter;
259 
260  int32_t maximumContextLength;
261 
262  public:
263 
269  union Token {
274  int32_t integer;
279  void* pointer;
280  };
281 
282 #ifndef U_HIDE_INTERNAL_API
283 
288  inline static Token integerToken(int32_t);
289 
295  inline static Token pointerToken(void*);
296 #endif /* U_HIDE_INTERNAL_API */
297 
313  typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
314 
315 protected:
316 
326  Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
327 
333 
338  Transliterator& operator=(const Transliterator&);
339 
351  static Transliterator* createBasicInstance(const UnicodeString& id,
352  const UnicodeString* canon);
353 
354  friend class TransliteratorParser; // for parseID()
355  friend class TransliteratorIDParser; // for createBasicInstance()
356  friend class TransliteratorAlias; // for setID()
357 
358 public:
359 
364  virtual ~Transliterator();
365 
380  virtual Transliterator* clone() const;
381 
397  virtual int32_t transliterate(Replaceable& text,
398  int32_t start, int32_t limit) const;
399 
405  virtual void transliterate(Replaceable& text) const;
406 
471  virtual void transliterate(Replaceable& text, UTransPosition& index,
472  const UnicodeString& insertion,
473  UErrorCode& status) const;
474 
490  virtual void transliterate(Replaceable& text, UTransPosition& index,
491  UChar32 insertion,
492  UErrorCode& status) const;
493 
507  virtual void transliterate(Replaceable& text, UTransPosition& index,
508  UErrorCode& status) const;
509 
521  virtual void finishTransliteration(Replaceable& text,
522  UTransPosition& index) const;
523 
524 private:
525 
541  void _transliterate(Replaceable& text,
542  UTransPosition& index,
543  const UnicodeString* insertion,
544  UErrorCode &status) const;
545 
546 protected:
547 
627  virtual void handleTransliterate(Replaceable& text,
628  UTransPosition& pos,
629  UBool incremental) const = 0;
630 
631 public:
643  virtual void filteredTransliterate(Replaceable& text,
644  UTransPosition& index,
645  UBool incremental) const;
646 
647 private:
648 
676  virtual void filteredTransliterate(Replaceable& text,
677  UTransPosition& index,
678  UBool incremental,
679  UBool rollback) const;
680 
681 public:
682 
696  int32_t getMaximumContextLength(void) const;
697 
698 protected:
699 
706  void setMaximumContextLength(int32_t maxContextLength);
707 
708 public:
709 
720  virtual const UnicodeString& getID(void) const;
721 
731  static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
732  UnicodeString& result);
733 
755  static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
756  const Locale& inLocale,
757  UnicodeString& result);
758 
766  const UnicodeFilter* getFilter(void) const;
767 
777  UnicodeFilter* orphanFilter(void);
778 
789  void adoptFilter(UnicodeFilter* adoptedFilter);
790 
810  Transliterator* createInverse(UErrorCode& status) const;
811 
828  static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
829  UTransDirection dir,
830  UParseError& parseError,
831  UErrorCode& status);
832 
843  static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
844  UTransDirection dir,
845  UErrorCode& status);
846 
862  static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
863  const UnicodeString& rules,
864  UTransDirection dir,
865  UParseError& parseError,
866  UErrorCode& status);
867 
879  virtual UnicodeString& toRules(UnicodeString& result,
880  UBool escapeUnprintable) const;
881 
894  int32_t countElements() const;
895 
915  const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
916 
932  UnicodeSet& getSourceSet(UnicodeSet& result) const;
933 
948  virtual void handleGetSourceSet(UnicodeSet& result) const;
949 
963  virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
964 
965 public:
966 
983  static void U_EXPORT2 registerFactory(const UnicodeString& id,
984  Factory factory,
985  Token context);
986 
1008  static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
1009 
1024  static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
1025  const UnicodeString& realID);
1026 
1027 protected:
1028 
1029 #ifndef U_HIDE_INTERNAL_API
1030 
1039  static void _registerFactory(const UnicodeString& id,
1040  Factory factory,
1041  Token context);
1042 
1046  static void _registerInstance(Transliterator* adoptedObj);
1047 
1051  static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
1052 
1086  static void _registerSpecialInverse(const UnicodeString& target,
1087  const UnicodeString& inverseTarget,
1088  UBool bidirectional);
1089 #endif /* U_HIDE_INTERNAL_API */
1090 
1091 public:
1092 
1110  static void U_EXPORT2 unregister(const UnicodeString& ID);
1111 
1112 public:
1113 
1123  static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);
1124 
1130  static int32_t U_EXPORT2 countAvailableSources(void);
1131 
1141  static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
1142  UnicodeString& result);
1143 
1152  static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
1153 
1165  static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
1166  const UnicodeString& source,
1167  UnicodeString& result);
1168 
1176  static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
1177  const UnicodeString& target);
1178 
1192  static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
1193  const UnicodeString& source,
1194  const UnicodeString& target,
1195  UnicodeString& result);
1196 
1197 protected:
1198 
1199 #ifndef U_HIDE_INTERNAL_API
1200 
1204  static int32_t _countAvailableSources(void);
1205 
1210  static UnicodeString& _getAvailableSource(int32_t index,
1211  UnicodeString& result);
1212 
1217  static int32_t _countAvailableTargets(const UnicodeString& source);
1218 
1223  static UnicodeString& _getAvailableTarget(int32_t index,
1224  const UnicodeString& source,
1225  UnicodeString& result);
1226 
1231  static int32_t _countAvailableVariants(const UnicodeString& source,
1232  const UnicodeString& target);
1233 
1238  static UnicodeString& _getAvailableVariant(int32_t index,
1239  const UnicodeString& source,
1240  const UnicodeString& target,
1241  UnicodeString& result);
1242 #endif /* U_HIDE_INTERNAL_API */
1243 
1244 protected:
1245 
1252  void setID(const UnicodeString& id);
1253 
1254 public:
1255 
1266  static UClassID U_EXPORT2 getStaticClassID(void);
1267 
1283  virtual UClassID getDynamicClassID(void) const = 0;
1284 
1285 private:
1286  static UBool initializeRegistry(UErrorCode &status);
1287 
1288 public:
1289 #ifndef U_HIDE_OBSOLETE_API
1290 
1297  static int32_t U_EXPORT2 countAvailableIDs(void);
1298 
1311  static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
1312 #endif /* U_HIDE_OBSOLETE_API */
1313 };
1314 
1315 inline int32_t Transliterator::getMaximumContextLength(void) const {
1316  return maximumContextLength;
1317 }
1318 
1319 inline void Transliterator::setID(const UnicodeString& id) {
1320  ID = id;
1321  // NUL-terminate the ID string, which is a non-aliased copy.
1322  ID.append((char16_t)0);
1323  ID.truncate(ID.length()-1);
1324 }
1325 
1326 #ifndef U_HIDE_INTERNAL_API
1327 inline Transliterator::Token Transliterator::integerToken(int32_t i) {
1328  Token t;
1329  t.integer = i;
1330  return t;
1331 }
1332 
1333 inline Transliterator::Token Transliterator::pointerToken(void* p) {
1334  Token t;
1335  t.pointer = p;
1336  return t;
1337 }
1338 #endif /* U_HIDE_INTERNAL_API */
1339 
1341 
1342 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
1343 
1344 #endif
Transliterator *(* Factory)(const UnicodeString &ID, Token context)
A function that creates and returns a Transliterator.
Definition: translit.h:313
Position structure for utrans_transIncremental() incremental transliteration.
Definition: utrans.h:122
C++ API: Unicode String.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:93
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:57
Transliterator is an abstract class that transliterates text from one format to another.
Definition: translit.h:243
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:360
A context integer or pointer for a factory function, passed by value.
Definition: translit.h:269
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:73
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
UBool truncate(int32_t targetLength)
Truncate this UnicodeString to the targetLength.
Definition: unistr.h:4782
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
Definition: unifilt.h:61
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:396
C API: Transliterator.
UnicodeString & append(const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Append the characters in srcText in the range [srcStart, srcStart + srcLength) to the UnicodeString o...
Definition: unistr.h:4674
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:278
C++ API: Common ICU base class UObject.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
C API: Parse Error Information.
int32_t integer
This token, interpreted as a 32-bit integer.
Definition: translit.h:274
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:396
C++ API: String Enumeration.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Basic definitions for ICU, for both C and C++ APIs.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:286
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules o...
Definition: utrans.h:80
int8_t UBool
The ICU boolean type.
Definition: umachine.h:236
void * pointer
This token, interpreted as a native pointer.
Definition: translit.h:279
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:188