ICU 59.1  59.1
uset.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uset.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar07
16 * created by: Markus W. Scherer
17 *
18 * C version of UnicodeSet.
19 */
20 
21 
29 #ifndef __USET_H__
30 #define __USET_H__
31 
32 #include "unicode/utypes.h"
33 #include "unicode/uchar.h"
34 #include "unicode/localpointer.h"
35 
36 #ifndef UCNV_H
37 struct USet;
43 typedef struct USet USet;
44 #endif
45 
51 enum {
57 
85 
95 };
96 
152 typedef enum USetSpanCondition {
201 #ifndef U_HIDE_DEPRECATED_API
202 
207 #endif // U_HIDE_DEPRECATED_API
209 
210 enum {
218 };
219 
225 typedef struct USerializedSet {
230  const uint16_t *array;
235  int32_t bmpLength;
240  int32_t length;
247 
248 /*********************************************************************
249  * USet API
250  *********************************************************************/
251 
259 U_STABLE USet* U_EXPORT2
260 uset_openEmpty(void);
261 
272 U_STABLE USet* U_EXPORT2
273 uset_open(UChar32 start, UChar32 end);
274 
284 U_STABLE USet* U_EXPORT2
285 uset_openPattern(const UChar* pattern, int32_t patternLength,
286  UErrorCode* ec);
287 
299 U_STABLE USet* U_EXPORT2
300 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
301  uint32_t options,
302  UErrorCode* ec);
303 
310 U_STABLE void U_EXPORT2
311 uset_close(USet* set);
312 
313 #if U_SHOW_CPLUSPLUS_API
314 
316 
327 
329 
330 #endif
331 
341 U_STABLE USet * U_EXPORT2
342 uset_clone(const USet *set);
343 
353 U_STABLE UBool U_EXPORT2
354 uset_isFrozen(const USet *set);
355 
370 U_STABLE void U_EXPORT2
371 uset_freeze(USet *set);
372 
383 U_STABLE USet * U_EXPORT2
384 uset_cloneAsThawed(const USet *set);
385 
395 U_STABLE void U_EXPORT2
396 uset_set(USet* set,
397  UChar32 start, UChar32 end);
398 
420 U_STABLE int32_t U_EXPORT2
422  const UChar *pattern, int32_t patternLength,
423  uint32_t options,
424  UErrorCode *status);
425 
448 U_STABLE void U_EXPORT2
450  UProperty prop, int32_t value, UErrorCode* ec);
451 
487 U_STABLE void U_EXPORT2
489  const UChar *prop, int32_t propLength,
490  const UChar *value, int32_t valueLength,
491  UErrorCode* ec);
492 
502 U_STABLE UBool U_EXPORT2
503 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
504  int32_t pos);
505 
521 U_STABLE int32_t U_EXPORT2
522 uset_toPattern(const USet* set,
523  UChar* result, int32_t resultCapacity,
524  UBool escapeUnprintable,
525  UErrorCode* ec);
526 
535 U_STABLE void U_EXPORT2
536 uset_add(USet* set, UChar32 c);
537 
550 U_STABLE void U_EXPORT2
551 uset_addAll(USet* set, const USet *additionalSet);
552 
562 U_STABLE void U_EXPORT2
563 uset_addRange(USet* set, UChar32 start, UChar32 end);
564 
574 U_STABLE void U_EXPORT2
575 uset_addString(USet* set, const UChar* str, int32_t strLen);
576 
586 U_STABLE void U_EXPORT2
587 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
588 
597 U_STABLE void U_EXPORT2
598 uset_remove(USet* set, UChar32 c);
599 
609 U_STABLE void U_EXPORT2
610 uset_removeRange(USet* set, UChar32 start, UChar32 end);
611 
621 U_STABLE void U_EXPORT2
622 uset_removeString(USet* set, const UChar* str, int32_t strLen);
623 
635 U_STABLE void U_EXPORT2
636 uset_removeAll(USet* set, const USet* removeSet);
637 
652 U_STABLE void U_EXPORT2
653 uset_retain(USet* set, UChar32 start, UChar32 end);
654 
667 U_STABLE void U_EXPORT2
668 uset_retainAll(USet* set, const USet* retain);
669 
678 U_STABLE void U_EXPORT2
679 uset_compact(USet* set);
680 
689 U_STABLE void U_EXPORT2
690 uset_complement(USet* set);
691 
703 U_STABLE void U_EXPORT2
704 uset_complementAll(USet* set, const USet* complement);
705 
713 U_STABLE void U_EXPORT2
714 uset_clear(USet* set);
715 
742 U_STABLE void U_EXPORT2
743 uset_closeOver(USet* set, int32_t attributes);
744 
751 U_STABLE void U_EXPORT2
753 
761 U_STABLE UBool U_EXPORT2
762 uset_isEmpty(const USet* set);
763 
772 U_STABLE UBool U_EXPORT2
773 uset_contains(const USet* set, UChar32 c);
774 
784 U_STABLE UBool U_EXPORT2
785 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
786 
795 U_STABLE UBool U_EXPORT2
796 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
797 
808 U_STABLE int32_t U_EXPORT2
809 uset_indexOf(const USet* set, UChar32 c);
810 
821 U_STABLE UChar32 U_EXPORT2
822 uset_charAt(const USet* set, int32_t charIndex);
823 
832 U_STABLE int32_t U_EXPORT2
833 uset_size(const USet* set);
834 
843 U_STABLE int32_t U_EXPORT2
844 uset_getItemCount(const USet* set);
845 
864 U_STABLE int32_t U_EXPORT2
865 uset_getItem(const USet* set, int32_t itemIndex,
866  UChar32* start, UChar32* end,
867  UChar* str, int32_t strCapacity,
868  UErrorCode* ec);
869 
878 U_STABLE UBool U_EXPORT2
879 uset_containsAll(const USet* set1, const USet* set2);
880 
891 U_STABLE UBool U_EXPORT2
892 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
893 
902 U_STABLE UBool U_EXPORT2
903 uset_containsNone(const USet* set1, const USet* set2);
904 
913 U_STABLE UBool U_EXPORT2
914 uset_containsSome(const USet* set1, const USet* set2);
915 
935 U_STABLE int32_t U_EXPORT2
936 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
937 
956 U_STABLE int32_t U_EXPORT2
957 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
958 
978 U_STABLE int32_t U_EXPORT2
979 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
980 
999 U_STABLE int32_t U_EXPORT2
1000 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1001 
1010 U_STABLE UBool U_EXPORT2
1011 uset_equals(const USet* set1, const USet* set2);
1012 
1013 /*********************************************************************
1014  * Serialized set API
1015  *********************************************************************/
1016 
1066 U_STABLE int32_t U_EXPORT2
1067 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1068 
1077 U_STABLE UBool U_EXPORT2
1078 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1079 
1087 U_STABLE void U_EXPORT2
1089 
1098 U_STABLE UBool U_EXPORT2
1100 
1110 U_STABLE int32_t U_EXPORT2
1112 
1126 U_STABLE UBool U_EXPORT2
1127 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1128  UChar32* pStart, UChar32* pEnd);
1129 
1130 #endif
UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns TRUE if the given USet contains all characters c where start <= c && c <= end...
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:245
void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
void uset_clear(USet *set)
Removes all of the elements from this set.
UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
USet * uset_clone(const USet *set)
Returns a copy of this object.
int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they&#39;re not already present.
Spans the longest substring that is a concatenation of set elements (characters or strings)...
Definition: uset.h:180
UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
One more than the last span condition.
Definition: uset.h:206
void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:230
Capacity of USerializedSet::staticArray.
Definition: uset.h:217
int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
Enable case insensitive matching.
Definition: uset.h:84
USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
"Smart pointer" class, closes a USet via uset_close().
void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
UBool uset_contains(const USet *set, UChar32 c)
Returns TRUE if the given USet contains the given character.
UBool uset_isEmpty(const USet *set)
Returns TRUE if the given USet contains no characters and no strings.
int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
void uset_removeAllStrings(USet *set)
Remove all strings from this set.
void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property...
void uset_freeze(USet *set)
Freeze the set (make it immutable).
USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
USet * uset_openEmpty(void)
Create an empty USet object.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:496
UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:396
void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns TRUE if the given USet contains the given string.
int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
Enable case insensitive matching.
Definition: uset.h:94
void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set...
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:152
C API: Unicode Properties.
int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:349
void uset_complement(USet *set)
Inverts this set.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
struct USet USet
Definition: ucnv.h:69
int32_t length
The total length of the array.
Definition: uset.h:240
void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:56
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:163
UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns TRUE if the given USerializedSet contains the given character.
int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:396
UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
Basic definitions for ICU, for both C and C++ APIs.
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:235
A serialized form of a Unicode set.
Definition: uset.h:225
Continues a span() while there is a set element at the current position.
Definition: uset.h:200
void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
Continues a span() while there is no set element at the current position.
Definition: uset.h:165
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
int8_t UBool
The ICU boolean type.
Definition: umachine.h:236
void uset_close(USet *set)
Disposes of the storage used by a USet object.