ICU 59.1  59.1
uregex.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2004-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * file name: uregex.h
9 * encoding: UTF-8
10 * indentation:4
11 *
12 * created on: 2004mar09
13 * created by: Andy Heninger
14 *
15 * ICU Regular Expressions, API for C
16 */
17 
25 #ifndef UREGEX_H
26 #define UREGEX_H
27 
28 #include "unicode/utext.h"
29 #include "unicode/utypes.h"
30 
31 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
32 
33 #include "unicode/localpointer.h"
34 #include "unicode/parseerr.h"
35 
36 struct URegularExpression;
43 
44 
49 typedef enum URegexpFlag{
50 
51 #ifndef U_HIDE_DRAFT_API
52 
56 #endif /* U_HIDE_DRAFT_API */
57 
59 
62 
66 
78 
84 
91 
99  UREGEX_UWORD = 256,
100 
109 
110 } URegexpFlag;
111 
134 U_STABLE URegularExpression * U_EXPORT2
135 uregex_open( const UChar *pattern,
136  int32_t patternLength,
137  uint32_t flags,
138  UParseError *pe,
139  UErrorCode *status);
140 
164 U_STABLE URegularExpression * U_EXPORT2
165 uregex_openUText(UText *pattern,
166  uint32_t flags,
167  UParseError *pe,
168  UErrorCode *status);
169 
193 #if !UCONFIG_NO_CONVERSION
194 U_STABLE URegularExpression * U_EXPORT2
195 uregex_openC( const char *pattern,
196  uint32_t flags,
197  UParseError *pe,
198  UErrorCode *status);
199 #endif
200 
201 
202 
210 U_STABLE void U_EXPORT2
212 
213 #if U_SHOW_CPLUSPLUS_API
214 
216 
227 
229 
230 #endif
231 
250 U_STABLE URegularExpression * U_EXPORT2
251 uregex_clone(const URegularExpression *regexp, UErrorCode *status);
252 
270 U_STABLE const UChar * U_EXPORT2
271 uregex_pattern(const URegularExpression *regexp,
272  int32_t *patLength,
273  UErrorCode *status);
274 
286 U_STABLE UText * U_EXPORT2
288  UErrorCode *status);
289 
298 U_STABLE int32_t U_EXPORT2
299 uregex_flags(const URegularExpression *regexp,
300  UErrorCode *status);
301 
302 
323 U_STABLE void U_EXPORT2
325  const UChar *text,
326  int32_t textLength,
327  UErrorCode *status);
328 
329 
346 U_STABLE void U_EXPORT2
348  UText *text,
349  UErrorCode *status);
350 
371 U_STABLE const UChar * U_EXPORT2
373  int32_t *textLength,
374  UErrorCode *status);
375 
392 U_STABLE UText * U_EXPORT2
394  UText *dest,
395  UErrorCode *status);
396 
422 U_STABLE void U_EXPORT2
424  UText *text,
425  UErrorCode *status);
426 
447 U_STABLE UBool U_EXPORT2
449  int32_t startIndex,
450  UErrorCode *status);
451 
473 U_STABLE UBool U_EXPORT2
475  int64_t startIndex,
476  UErrorCode *status);
477 
501 U_STABLE UBool U_EXPORT2
503  int32_t startIndex,
504  UErrorCode *status);
505 
530 U_STABLE UBool U_EXPORT2
532  int64_t startIndex,
533  UErrorCode *status);
534 
554 U_STABLE UBool U_EXPORT2
556  int32_t startIndex,
557  UErrorCode *status);
558 
579 U_STABLE UBool U_EXPORT2
581  int64_t startIndex,
582  UErrorCode *status);
583 
597 U_STABLE UBool U_EXPORT2
599  UErrorCode *status);
600 
608 U_STABLE int32_t U_EXPORT2
610  UErrorCode *status);
611 
628 U_STABLE int32_t U_EXPORT2
630  const UChar *groupName,
631  int32_t nameLength,
632  UErrorCode *status);
633 
634 
652 U_STABLE int32_t U_EXPORT2
654  const char *groupName,
655  int32_t nameLength,
656  UErrorCode *status);
657 
674 U_STABLE int32_t U_EXPORT2
676  int32_t groupNum,
677  UChar *dest,
678  int32_t destCapacity,
679  UErrorCode *status);
680 
703 U_STABLE UText * U_EXPORT2
705  int32_t groupNum,
706  UText *dest,
707  int64_t *groupLength,
708  UErrorCode *status);
709 
724 U_STABLE int32_t U_EXPORT2
726  int32_t groupNum,
727  UErrorCode *status);
728 
744 U_STABLE int64_t U_EXPORT2
746  int32_t groupNum,
747  UErrorCode *status);
748 
762 U_STABLE int32_t U_EXPORT2
764  int32_t groupNum,
765  UErrorCode *status);
766 
781 U_STABLE int64_t U_EXPORT2
783  int32_t groupNum,
784  UErrorCode *status);
785 
799 U_STABLE void U_EXPORT2
801  int32_t index,
802  UErrorCode *status);
803 
818 U_STABLE void U_EXPORT2
820  int64_t index,
821  UErrorCode *status);
822 
843 U_STABLE void U_EXPORT2
845  int32_t regionStart,
846  int32_t regionLimit,
847  UErrorCode *status);
848 
870 U_STABLE void U_EXPORT2
872  int64_t regionStart,
873  int64_t regionLimit,
874  UErrorCode *status);
875 
890 U_STABLE void U_EXPORT2
892  int64_t regionStart,
893  int64_t regionLimit,
894  int64_t startIndex,
895  UErrorCode *status);
896 
906 U_STABLE int32_t U_EXPORT2
908  UErrorCode *status);
909 
920 U_STABLE int64_t U_EXPORT2
922  UErrorCode *status);
923 
934 U_STABLE int32_t U_EXPORT2
936  UErrorCode *status);
937 
949 U_STABLE int64_t U_EXPORT2
951  UErrorCode *status);
952 
963 U_STABLE UBool U_EXPORT2
965  UErrorCode *status);
966 
967 
987 U_STABLE void U_EXPORT2
989  UBool b,
990  UErrorCode *status);
991 
992 
1002 U_STABLE UBool U_EXPORT2
1004  UErrorCode *status);
1005 
1006 
1020 U_STABLE void U_EXPORT2
1022  UBool b,
1023  UErrorCode *status);
1024 
1035 U_STABLE UBool U_EXPORT2
1036 uregex_hitEnd(const URegularExpression *regexp,
1037  UErrorCode *status);
1038 
1050 U_STABLE UBool U_EXPORT2
1052  UErrorCode *status);
1053 
1054 
1055 
1056 
1057 
1082 U_STABLE int32_t U_EXPORT2
1084  const UChar *replacementText,
1085  int32_t replacementLength,
1086  UChar *destBuf,
1087  int32_t destCapacity,
1088  UErrorCode *status);
1089 
1111 U_STABLE UText * U_EXPORT2
1113  UText *replacement,
1114  UText *dest,
1115  UErrorCode *status);
1116 
1141 U_STABLE int32_t U_EXPORT2
1143  const UChar *replacementText,
1144  int32_t replacementLength,
1145  UChar *destBuf,
1146  int32_t destCapacity,
1147  UErrorCode *status);
1148 
1170 U_STABLE UText * U_EXPORT2
1172  UText *replacement,
1173  UText *dest,
1174  UErrorCode *status);
1175 
1222 U_STABLE int32_t U_EXPORT2
1224  const UChar *replacementText,
1225  int32_t replacementLength,
1226  UChar **destBuf,
1227  int32_t *destCapacity,
1228  UErrorCode *status);
1229 
1252 U_STABLE void U_EXPORT2
1254  UText *replacementText,
1255  UText *dest,
1256  UErrorCode *status);
1257 
1282 U_STABLE int32_t U_EXPORT2
1284  UChar **destBuf,
1285  int32_t *destCapacity,
1286  UErrorCode *status);
1287 
1306 U_STABLE UText * U_EXPORT2
1308  UText *dest,
1309  UErrorCode *status);
1310 
1362 U_STABLE int32_t U_EXPORT2
1364  UChar *destBuf,
1365  int32_t destCapacity,
1366  int32_t *requiredCapacity,
1367  UChar *destFields[],
1368  int32_t destFieldsCapacity,
1369  UErrorCode *status);
1370 
1397 U_STABLE int32_t U_EXPORT2
1399  UText *destFields[],
1400  int32_t destFieldsCapacity,
1401  UErrorCode *status);
1402 
1425 U_STABLE void U_EXPORT2
1427  int32_t limit,
1428  UErrorCode *status);
1429 
1439 U_STABLE int32_t U_EXPORT2
1441  UErrorCode *status);
1442 
1463 U_STABLE void U_EXPORT2
1465  int32_t limit,
1466  UErrorCode *status);
1467 
1475 U_STABLE int32_t U_EXPORT2
1477  UErrorCode *status);
1478 
1479 
1500  const void *context,
1501  int32_t steps);
1503 
1518 U_STABLE void U_EXPORT2
1520  URegexMatchCallback *callback,
1521  const void *context,
1522  UErrorCode *status);
1523 
1524 
1536 U_STABLE void U_EXPORT2
1538  URegexMatchCallback **callback,
1539  const void **context,
1540  UErrorCode *status);
1541 
1574  const void *context,
1575  int64_t matchIndex);
1577 
1578 
1590 U_STABLE void U_EXPORT2
1592  URegexFindProgressCallback *callback,
1593  const void *context,
1594  UErrorCode *status);
1595 
1607 U_STABLE void U_EXPORT2
1609  URegexFindProgressCallback **callback,
1610  const void **context,
1611  UErrorCode *status);
1612 
1613 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
1614 #endif /* UREGEX_H */
UText * uregex_getUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
int64_t uregex_regionEnd64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionEnd.
void uregex_setText(URegularExpression *regexp, const UChar *text, int32_t textLength, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
UBool uregex_lookingAt64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_lookingAt.
int32_t uregex_groupNumberFromCName(URegularExpression *regexp, const char *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
URegularExpression * uregex_openC(const char *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
Forces normalization of pattern and strings.
Definition: uregex.h:55
int32_t uregex_getTimeLimit(const URegularExpression *regexp, UErrorCode *status)
Get the time limit for for matches with this URegularExpression.
"Smart pointer" class, closes a URegularExpression via uregex_close().
int32_t uregex_groupCount(URegularExpression *regexp, UErrorCode *status)
Get the number of capturing groups in this regular expression's pattern.
int32_t uregex_splitUText(URegularExpression *regexp, UText *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
void uregex_setUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
Control behavior of "$" and "^" If set, recognize line terminators within string, otherwise...
Definition: uregex.h:83
void uregex_reset(URegularExpression *regexp, int32_t index, UErrorCode *status)
Reset any saved state from the previous match.
UBool uregex_find(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Find the first matching substring of the input string that matches the pattern.
void uregex_getMatchCallback(const URegularExpression *regexp, URegexMatchCallback **callback, const void **context, UErrorCode *status)
Get the callback function for this URegularExpression.
int32_t uregex_appendTail(URegularExpression *regexp, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string...
int32_t uregex_regionEnd(const URegularExpression *regexp, UErrorCode *status)
Reports the end index (exclusive) of the matching region for this URegularExpression.
If set, '.
Definition: uregex.h:65
Unix-only line endings.
Definition: uregex.h:90
UBool uregex_requireEnd(const URegularExpression *regexp, UErrorCode *status)
Return TRUE the most recent match succeeded and additional input could cause it to fail...
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:871
int32_t uregex_replaceAll(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string...
void uregex_setRegion(URegularExpression *regexp, int32_t regionStart, int32_t regionLimit, UErrorCode *status)
Sets the limits of the matching region for this URegularExpression.
int32_t uregex_split(URegularExpression *regexp, UChar *destBuf, int32_t destCapacity, int32_t *requiredCapacity, UChar *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
int32_t uregex_end(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the position following the end of the text matched by the sp...
UBool uregex_matches64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_matches.
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1573
UBool uregex_matches(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string against the pattern.
URegularExpression * uregex_open(const UChar *pattern, int32_t patternLength, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
void uregex_close(URegularExpression *regexp)
Close the regular expression, recovering all resources (memory) it was holding.
int32_t uregex_groupNumberFromName(URegularExpression *regexp, const UChar *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
C API: Abstract Unicode Text API.
int64_t uregex_start64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_start.
UBool uregex_findNext(URegularExpression *regexp, UErrorCode *status)
Find the next pattern match in the input string.
void uregex_setFindProgressCallback(URegularExpression *regexp, URegexFindProgressCallback *callback, const void *context, UErrorCode *status)
Set the find progress callback function for this URegularExpression.
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:84
int32_t uregex_getStackLimit(const URegularExpression *regexp, UErrorCode *status)
Get the size of the heap storage available for use by the back tracking stack.
URegularExpression * uregex_clone(const URegularExpression *regexp, UErrorCode *status)
Make a copy of a compiled regular expression.
URegexpFlag
Constants for Regular Expression Match Modes.
Definition: uregex.h:49
void uregex_setStackLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set the amount of heap storage available for use by the match backtracking stack. ...
Allow white space and comments within patterns.
Definition: uregex.h:61
void uregex_useAnchoringBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Set whether this URegularExpression is using Anchoring Bounds for its region.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
int32_t uregex_appendReplacement(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
void uregex_appendReplacementUText(URegularExpression *regexp, UText *replacementText, UText *dest, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:496
UBool uregex_hasTransparentBounds(const URegularExpression *regexp, UErrorCode *status)
Queries the transparency of region bounds for this URegularExpression.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
const UChar * uregex_pattern(const URegularExpression *regexp, int32_t *patLength, UErrorCode *status)
Returns a pointer to the source form of the pattern for this regular expression.
UBool uregex_find64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_find.
void uregex_setRegionAndStart(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode *status)
Set the matching region and the starting index for subsequent matches in a single operation...
void uregex_reset64(URegularExpression *regexp, int64_t index, UErrorCode *status)
64bit version of uregex_reset.
void uregex_setMatchCallback(URegularExpression *regexp, URegexMatchCallback *callback, const void *context, UErrorCode *status)
Set a callback function for this URegularExpression.
int32_t uregex_flags(const URegularExpression *regexp, UErrorCode *status)
Get the match mode flags that were specified when compiling this regular expression.
struct URegularExpression URegularExpression
Structure representing a compiled regular expression, plus the results of a match operation...
Definition: uregex.h:42
void uregex_getFindProgressCallback(const URegularExpression *regexp, URegexFindProgressCallback **callback, const void **context, UErrorCode *status)
Get the find progress callback function for this URegularExpression.
UText * uregex_replaceFirstUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string...
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:349
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:85
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1499
UBool uregex_hasAnchoringBounds(const URegularExpression *regexp, UErrorCode *status)
Return true if this URegularExpression is using anchoring bounds.
int32_t uregex_regionStart(const URegularExpression *regexp, UErrorCode *status)
Reports the start index of the matching region.
C API: Parse Error Information.
Unicode word boundaries.
Definition: uregex.h:99
UText * uregex_groupUText(URegularExpression *regexp, int32_t groupNum, UText *dest, int64_t *groupLength, UErrorCode *status)
Returns a shallow immutable clone of the entire input string with the current index set to the beginn...
UText * uregex_appendTailUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string...
int32_t uregex_group(URegularExpression *regexp, int32_t groupNum, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract the string for the specified matching expression or subexpression.
Error on Unrecognized backslash escapes.
Definition: uregex.h:108
UText * uregex_patternUText(const URegularExpression *regexp, UErrorCode *status)
Returns the source text of the pattern for this regular expression.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:396
If set, treat the entire pattern as a literal string.
Definition: uregex.h:77
void uregex_useTransparentBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Sets the transparency of region bounds for this URegularExpression.
UText struct.
Definition: utext.h:1345
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Basic definitions for ICU, for both C and C++ APIs.
int32_t uregex_start(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the start of the text matched by the specified capture group...
void uregex_refreshUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression is looking for matches without changing...
int64_t uregex_regionStart64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionStart.
Enable case insensitive matching.
Definition: uregex.h:58
const UChar * uregex_getText(URegularExpression *regexp, int32_t *textLength, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
URegularExpression * uregex_openUText(UText *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
UBool uregex_hitEnd(const URegularExpression *regexp, UErrorCode *status)
Return TRUE if the most recent matching operation touched the end of the text being processed...
void uregex_setTimeLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set a processing time limit for match operations with this URegularExpression.
UText * uregex_replaceAllUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string...
int32_t uregex_replaceFirst(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string...
int64_t uregex_end64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_end.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
void uregex_setRegion64(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, UErrorCode *status)
64bit version of uregex_setRegion.
int8_t UBool
The ICU boolean type.
Definition: umachine.h:236
UBool uregex_lookingAt(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string, starting from the specified index, against the pattern...