ICU 59.1  59.1
uspoof.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 2008-2016, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 ***************************************************************************
8 * file name: uspoof.h
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2008Feb13
14 * created by: Andy Heninger
15 *
16 * Unicode Spoof Detection
17 */
18 
19 #ifndef USPOOF_H
20 #define USPOOF_H
21 
22 #include "unicode/utypes.h"
23 #include "unicode/uset.h"
24 #include "unicode/parseerr.h"
25 #include "unicode/localpointer.h"
26 
27 #if !UCONFIG_NO_NORMALIZATION
28 
29 
30 #if U_SHOW_CPLUSPLUS_API
31 #include "unicode/unistr.h"
32 #include "unicode/uniset.h"
33 #endif
34 
35 
370 struct USpoofChecker;
371 typedef struct USpoofChecker USpoofChecker;
373 #ifndef U_HIDE_DRAFT_API
374 
377 struct USpoofCheckResult;
382 #endif /* U_HIDE_DRAFT_API */
383 
391 typedef enum USpoofChecks {
401 
411 
421 
422 #ifndef U_HIDE_DRAFT_API
423 
433 #endif /* U_HIDE_DRAFT_API */
434 
435 #ifndef U_HIDE_DEPRECATED_API
436 
442 #endif /* U_HIDE_DEPRECATED_API */
443 
458 
459 #ifndef U_HIDE_DEPRECATED_API
460 
466 #endif /* U_HIDE_DEPRECATED_API */
467 
475 
482 
490 
497 
510  USPOOF_AUX_INFO = 0x40000000
511 
512  } USpoofChecks;
513 
514 
524  typedef enum URestrictionLevel {
531  USPOOF_ASCII = 0x10000000,
572  USPOOF_UNRESTRICTIVE = 0x60000000,
579 #ifndef U_HIDE_INTERNAL_API
580 
585 #endif /* U_HIDE_INTERNAL_API */
587 
598 U_STABLE USpoofChecker * U_EXPORT2
599 uspoof_open(UErrorCode *status);
600 
601 
623 U_STABLE USpoofChecker * U_EXPORT2
624 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
625  UErrorCode *pErrorCode);
626 
657 U_STABLE USpoofChecker * U_EXPORT2
658 uspoof_openFromSource(const char *confusables, int32_t confusablesLen,
659  const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
660  int32_t *errType, UParseError *pe, UErrorCode *status);
661 
662 
668 U_STABLE void U_EXPORT2
670 
671 #if U_SHOW_CPLUSPLUS_API
672 
674 
685 
687 
688 #endif
689 
699 U_STABLE USpoofChecker * U_EXPORT2
700 uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
701 
702 
740 U_STABLE void U_EXPORT2
741 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
742 
754 U_STABLE int32_t U_EXPORT2
755 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
756 
768 U_STABLE void U_EXPORT2
770 
771 
779 U_STABLE URestrictionLevel U_EXPORT2
781 
824 U_STABLE void U_EXPORT2
825 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
826 
848 U_STABLE const char * U_EXPORT2
850 
851 
870 U_STABLE void U_EXPORT2
871 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);
872 
873 
894 U_STABLE const USet * U_EXPORT2
896 
897 
898 #if U_SHOW_CPLUSPLUS_API
899 
917 U_STABLE void U_EXPORT2
919 
920 
941 U_STABLE const icu::UnicodeSet * U_EXPORT2
943 #endif
944 
945 
978 U_STABLE int32_t U_EXPORT2
979 uspoof_check(const USpoofChecker *sc,
980  const UChar *id, int32_t length,
981  int32_t *position,
982  UErrorCode *status);
983 
984 
1017 U_STABLE int32_t U_EXPORT2
1019  const char *id, int32_t length,
1020  int32_t *position,
1021  UErrorCode *status);
1022 
1023 
1024 #if U_SHOW_CPLUSPLUS_API
1025 
1053 U_STABLE int32_t U_EXPORT2
1055  const icu::UnicodeString &id,
1056  int32_t *position,
1057  UErrorCode *status);
1058 #endif
1059 
1060 
1061 #ifndef U_HIDE_DRAFT_API
1062 
1090 U_DRAFT int32_t U_EXPORT2
1091 uspoof_check2(const USpoofChecker *sc,
1092  const UChar* id, int32_t length,
1093  USpoofCheckResult* checkResult,
1094  UErrorCode *status);
1095 
1127 U_DRAFT int32_t U_EXPORT2
1129  const char *id, int32_t length,
1130  USpoofCheckResult* checkResult,
1131  UErrorCode *status);
1132 
1133 #if U_SHOW_CPLUSPLUS_API
1134 
1159 U_DRAFT int32_t U_EXPORT2
1161  const icu::UnicodeString &id,
1162  USpoofCheckResult* checkResult,
1163  UErrorCode *status);
1164 #endif
1165 
1184 U_DRAFT USpoofCheckResult* U_EXPORT2
1186 
1194 U_DRAFT void U_EXPORT2
1196 
1197 #if U_SHOW_CPLUSPLUS_API
1198 
1200 
1211 
1213 
1214 #endif
1215 
1230 U_DRAFT int32_t U_EXPORT2
1231 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);
1232 
1243 U_DRAFT URestrictionLevel U_EXPORT2
1245 
1257 U_DRAFT const USet* U_EXPORT2
1258 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);
1259 #endif /* U_HIDE_DRAFT_API */
1260 
1261 
1305 U_STABLE int32_t U_EXPORT2
1307  const UChar *id1, int32_t length1,
1308  const UChar *id2, int32_t length2,
1309  UErrorCode *status);
1310 
1311 
1312 
1338 U_STABLE int32_t U_EXPORT2
1340  const char *id1, int32_t length1,
1341  const char *id2, int32_t length2,
1342  UErrorCode *status);
1343 
1344 
1345 
1346 
1347 #if U_SHOW_CPLUSPLUS_API
1348 
1369 U_STABLE int32_t U_EXPORT2
1371  const icu::UnicodeString &s1,
1372  const icu::UnicodeString &s2,
1373  UErrorCode *status);
1374 #endif
1375 
1376 
1408 U_STABLE int32_t U_EXPORT2
1410  uint32_t type,
1411  const UChar *id, int32_t length,
1412  UChar *dest, int32_t destCapacity,
1413  UErrorCode *status);
1414 
1448 U_STABLE int32_t U_EXPORT2
1450  uint32_t type,
1451  const char *id, int32_t length,
1452  char *dest, int32_t destCapacity,
1453  UErrorCode *status);
1454 
1455 #if U_SHOW_CPLUSPLUS_API
1456 
1479 U_I18N_API icu::UnicodeString & U_EXPORT2
1481  uint32_t type,
1482  const icu::UnicodeString &id,
1483  icu::UnicodeString &dest,
1484  UErrorCode *status);
1485 #endif /* U_SHOW_CPLUSPLUS_API */
1486 
1499 U_STABLE const USet * U_EXPORT2
1501 
1514 U_STABLE const USet * U_EXPORT2
1516 
1517 #if U_SHOW_CPLUSPLUS_API
1518 
1531 U_STABLE const icu::UnicodeSet * U_EXPORT2
1533 
1546 U_STABLE const icu::UnicodeSet * U_EXPORT2
1548 
1549 #endif /* U_SHOW_CPLUSPLUS_API */
1550 
1573 U_STABLE int32_t U_EXPORT2
1575  void *data, int32_t capacity,
1576  UErrorCode *status);
1577 
1578 
1579 #endif
1580 
1581 #endif /* USPOOF_H */
void uspoof_close(USpoofChecker *sc)
Close a Spoof Checker, freeing any memory that was being held by its implementation.
U_STABLE const icu::UnicodeSet *U_EXPORT2 uspoof_getInclusionUnicodeSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in http://unicode.org/Public/security/latest/xidmodifications.txt and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
URestrictionLevel
Constants from UAX #39 for use in uspoof_setRestrictionLevel, and for returned identifier restriction...
Definition: uspoof.h:524
Check an identifier for the presence of invisible characters, such as zero-width spaces, or character sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.
Definition: uspoof.h:474
Check that an identifier contains only characters from a single script (plus chars from the common an...
Definition: uspoof.h:465
U_STABLE int32_t U_EXPORT2 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status)
Get the set of checks that this Spoof Checker has been configured to perform.
struct USpoofCheckResult USpoofCheckResult
Definition: uspoof.h:381
USpoofChecker * uspoof_clone(const USpoofChecker *sc, UErrorCode *status)
Clone a Spoof Checker.
All characters in the string are in the identifier profile and all characters in the string are in th...
Definition: uspoof.h:531
U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, uint32_t type, const icu::UnicodeString &id, icu::UnicodeString &dest, UErrorCode *status)
Get the "skeleton" for an identifier.
Check that an identifier contains only characters from a specified set of acceptable characters...
Definition: uspoof.h:481
All characters in the string are in the identifier profile.
Definition: uspoof.h:566
USpoofChecks
Enum for the kinds of checks that USpoofChecker can perform.
Definition: uspoof.h:391
U_DRAFT int32_t U_EXPORT2 uspoof_check2UnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
C++ API: Unicode String.
USpoofChecker * uspoof_open(UErrorCode *status)
Create a Unicode Spoof Checker, configured to perform all checks except for USPOOF_LOCALE_LIMIT and U...
U_DRAFT USpoofCheckResult *U_EXPORT2 uspoof_openCheckResult(UErrorCode *status)
Create a USpoofCheckResult, used by the uspoof_check2 class of functions to return information about ...
Enable this flag in uspoof_setChecks to turn on all types of confusables.
Definition: uspoof.h:432
USpoofChecker * uspoof_openFromSource(const char *confusables, int32_t confusablesLen, const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, int32_t *errType, UParseError *pe, UErrorCode *status)
Open a Spoof Checker from the source form of the spoof data.
U_STABLE const USet *U_EXPORT2 uspoof_getRecommendedSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in http:/...
C API: Unicode Set.
U_STABLE void U_EXPORT2 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
Enable all spoof checks.
Definition: uspoof.h:496
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:360
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
U_DRAFT const USet *U_EXPORT2 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status)
Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled; otherwis...
U_STABLE const char *U_EXPORT2 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status)
Get a list of locales for the scripts that are acceptable in strings to be checked.
This flag is deprecated and no longer affects the behavior of SpoofChecker.
Definition: uspoof.h:441
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition: uspoof.h:420
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:496
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
U_STABLE const icu::UnicodeSet *U_EXPORT2 uspoof_getRecommendedUnicodeSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in http:/...
U_STABLE void U_EXPORT2 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel)
Set the loosest restriction level allowed for strings.
The string classifies as Highly Restrictive, or all characters in the string are in the identifier pr...
Definition: uspoof.h:560
Check that an identifier does not mix numbers from different numbering systems.
Definition: uspoof.h:489
U_DRAFT int32_t U_EXPORT2 uspoof_check2UTF8(const USpoofChecker *sc, const char *id, int32_t length, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
The string classifies as ASCII-Only, or all characters in the string are in the identifier profile an...
Definition: uspoof.h:538
The string classifies as Single Script, or all characters in the string are in the identifier profile...
Definition: uspoof.h:552
U_STABLE int32_t U_EXPORT2 uspoof_getSkeleton(const USpoofChecker *sc, uint32_t type, const UChar *id, int32_t length, UChar *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
U_STABLE int32_t U_EXPORT2 uspoof_areConfusableUTF8(const USpoofChecker *sc, const char *id1, int32_t length1, const char *id2, int32_t length2, UErrorCode *status)
A version of uspoof_areConfusable accepting strings in UTF-8 format.
USpoofChecker * uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode)
Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
U_DRAFT URestrictionLevel U_EXPORT2 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status)
Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check was enabled; ot...
U_STABLE void U_EXPORT2 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
U_STABLE void U_EXPORT2 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status)
Limit characters that are acceptable in identifiers being checked to those normally used with the lan...
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:278
struct USpoofChecker USpoofChecker
typedef for C of USpoofChecker
Definition: uspoof.h:371
U_DRAFT int32_t U_EXPORT2 uspoof_check2(const USpoofChecker *sc, const UChar *id, int32_t length, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
"Smart pointer" class, closes a USpoofChecker via uspoof_close().
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition: uspoof.h:400
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:349
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
struct USet USet
Definition: ucnv.h:69
C API: Parse Error Information.
U_STABLE const icu::UnicodeSet *U_EXPORT2 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status)
Get a UnicodeSet for the characters permitted in an identifier.
U_STABLE URestrictionLevel U_EXPORT2 uspoof_getRestrictionLevel(const USpoofChecker *sc)
Get the Restriction Level that will be tested if the checks include USPOOF_RESTRICTION_LEVEL.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:396
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition: uspoof.h:410
U_NAMESPACE_END U_DRAFT int32_t U_EXPORT2 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status)
Indicates which of the spoof check(s) have failed.
U_STABLE int32_t U_EXPORT2 uspoof_checkUTF8(const USpoofChecker *sc, const char *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
U_STABLE void U_EXPORT2 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status)
Specify the bitmask of checks that will be performed by uspoof_check.
U_STABLE int32_t U_EXPORT2 uspoof_check(const USpoofChecker *sc, const UChar *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
Enable the return of auxillary (non-error) information in the upper bits of the check results value...
Definition: uspoof.h:510
U_STABLE int32_t U_EXPORT2 uspoof_checkUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
Check that an identifier is no looser than the specified RestrictionLevel.
Definition: uspoof.h:457
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Basic definitions for ICU, for both C and C++ APIs.
U_STABLE const USet *U_EXPORT2 uspoof_getInclusionSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in http://unicode.org/Public/security/latest/xidmodifications.txt and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
Mask for selecting the Restriction Level bits from the return value of uspoof_check.
Definition: uspoof.h:578
U_STABLE int32_t U_EXPORT2 uspoof_serialize(USpoofChecker *sc, void *data, int32_t capacity, UErrorCode *status)
Serialize the data for a spoof detector into a chunk of memory.
U_STABLE int32_t U_EXPORT2 uspoof_areConfusableUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &s1, const icu::UnicodeString &s2, UErrorCode *status)
A version of uspoof_areConfusable accepting UnicodeStrings.
Any valid identifiers, including characters outside of the Identifier Profile.
Definition: uspoof.h:572
U_STABLE const USet *U_EXPORT2 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status)
Get a USet for the characters permitted in an identifier.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
"Smart pointer" class, closes a USpoofCheckResult via uspoof_closeCheckResult.
#define U_DRAFT
This is used to declare a function as a draft public ICU C API.
Definition: umachine.h:113
U_STABLE int32_t U_EXPORT2 uspoof_areConfusable(const USpoofChecker *sc, const UChar *id1, int32_t length1, const UChar *id2, int32_t length2, UErrorCode *status)
Check the whether two specified strings are visually confusable.
U_STABLE int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
An undefined restriction level.
Definition: uspoof.h:584
U_DRAFT void U_EXPORT2 uspoof_closeCheckResult(USpoofCheckResult *checkResult)
Close a USpoofCheckResult, freeing any memory that was being held by its implementation.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
C++ API: Unicode Set.