
Collation Examples
Simple Collation Sample Customization
The following program demonstrates how to compare and create sort keys with default locale.
In C:
#include <stdio.h>
#include <memory.h>
#include <string.h>
#include "unicode/ustring.h"
#include "unicode/utypes.h"
#include "unicode/uloc.h"
#include "unicode/ucol.h"
#define MAXBUFFERSIZE 100
#define BIGBUFFERSIZE 5000
UBool collateWithLocaleInC(const char* locale, UErrorCode *status)
{
UChar dispName [MAXBUFFERSIZE];
int32_t bufferLen = 0;
UChar source [MAXBUFFERSIZE];
UChar target [MAXBUFFERSIZE];
UCollationResult result = UCOL_EQUAL;
uint8_t sourceKeyArray [MAXBUFFERSIZE];
uint8_t targetKeyArray [MAXBUFFERSIZE];
int32_t sourceKeyOut = 0,
targetKeyOut = 0;
UCollator *myCollator = 0;
if (U_FAILURE(*status))
{
return FALSE;
}
u_uastrcpy(source, "This is a test.");
u_uastrcpy(target, "THIS IS A TEST.");
myCollator = ucol_open(locale, status);
if (U_FAILURE(*status)){
bufferLen = uloc_getDisplayName(locale, 0, dispName, MAXBUFFERSIZE, status);
/*Report the error with display name... */
fprintf(stderr,
"Failed to create the collator for : \"%s\"\n", dispName);
return FALSE;
}
result = ucol_strcoll(myCollator, source, u_strlen(source), target, u_strlen(target));
/* result is 1, secondary differences only for ignorable space characters*/
if (result != UCOL_LESS)
{
fprintf(stderr,
"Comparing two strings with only secondary differences in C failed.\n");
return FALSE;
}
/* To compare them with just primary differences */
ucol_setStrength(myCollator, UCOL_PRIMARY);
result = ucol_strcoll(myCollator, source, u_strlen(source), target, u_strlen(target));
/* result is 0 */
if (result != 0)
{
fprintf(stderr,
"Comparing two strings with no differences in C failed.\n");
return FALSE;
}
|
In C++:
#include <stdio.h>
#include "unicode/unistr.h"
#include "unicode/utypes.h"
#include "unicode/locid.h"
#include "unicode/coll.h"
#include "unicode/tblcoll.h"
#include "unicode/coleitr.h"
#include "unicode/sortkey.h"
UBool collateWithLocaleInCPP(const Locale& locale, UErrorCode& status)
{
UnicodeString dispName;
UnicodeString source("This is a test.");
UnicodeString target("THIS IS A TEST.");
Collator::EComparisonResult result = Collator::EQUAL;
CollationKey sourceKey;
CollationKey targetKey;
Collator *myCollator = 0;
if (U_FAILURE(status))
{
return FALSE;
}
myCollator = Collator::createInstance(locale, status);
if (U_FAILURE(status)){
locale.getDisplayName(dispName);
/*Report the error with display name... */
fprintf(stderr,
"%s: Failed to create the collator for : \"%s\"\n", dispName);
return FALSE;
}
result = myCollator->compare(source, target);
/* result is 1, secondary differences only for ignorable space characters*/
if (result != UCOL_LESS)
{
fprintf(stderr,
"Comparing two strings with only secondary differences in C failed.\n");
return FALSE;
}
/* To compare them with just primary differences */
myCollator->setStrength(Collator::PRIMARY);
result = myCollator->compare(source, target);
/* result is 0 */
if (result != 0)
{
fprintf(stderr,
"Comparing two strings with no differences in C failed.\n");
return FALSE;
}
/* Now, do the same comparison with keys */
myCollator->getCollationKey(source, sourceKey, status);
myCollator->getCollationKey(target, targetKey, status);
result = Collator::EQUAL;
|
Main Function:
extern "C" UBool collateWithLocaleInC(const char* locale, UErrorCode *status);
int main()
{
UErrorCode status = U_ZERO_ERROR;
fprintf(stdout, "\n");
if (collateWithLocaleInCPP(Locale("en", "US"), status) != TRUE)
{
fprintf(stderr,
"Collate with locale in C++ failed.\n");
} else
{
fprintf(stdout, "Collate with Locale C++ example worked!!\n");
}
status = U_ZERO_ERROR;
fprintf(stdout, "\n");
if (collateWithLocaleInC("en_US", &status) != TRUE)
{
fprintf(stderr,
"%s: Collate with locale in C failed.\n");
} else
{
fprintf(stdout, "Collate with Locale C example worked!!\n");
}
return 0;
}
|
In Java:
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.CollationElementIterator;
import com.ibm.icu.text.CollationKey;
import java.util.Locale;
public class CollateExample
{
public static void main(String arg[])
{
CollateExample example = new CollateExample();
try {
if (!example.collateWithLocale(Locale.US)) {
System.err.println("Collate with locale example failed.");
}
else {
System.out.println("Collate with Locale example worked!!");
}
} catch (Exception e) {
System.err.println("Collating with locale failed");
e.printStackTrace();
}
}
public boolean collateWithLocale(Locale locale) throws Exception
{
String source = "This is a test.";
String target = "THIS IS A TEST.";
Collator myCollator = Collator.getInstance(locale);
|
Language-sensitive searching
String searching is a well-researched area, and there are algorithms that can optimize the searching process. Perhaps the best is the Boyer-Moore method. For full textual description of concept behind the sample programs, please see Laura Werner's text searching article for more details (http://icu-project.org/docs/papers/efficient_text_searching_in_java.html ).
The source of the language-sensitive text searching based on ICU Collation Service can be found on the Internet at http://source.icu-project.org/repos/icu/icu/trunk/source/i18n/usearch.cpp .
Using large buffers to manage sort keys
A good solution for the problem of not knowing the sort key size in advance is to allocate a large buffer and store all the sort keys there, while keeping a list of indexes or pointers to that buffer.
Following is sample code that will take a pointer to an array of UChar pointer, an array of key indexes. It will allocate and fill a buffer with sort keys and return the maximum size for a sort key. Once you have done this to your string, you just need to allocate a field of maximum size and copy your sortkeys from the buffer to fields.
uint32_t |
Copyright (c) 2000 - 2008 IBM and Others - PDF Version - Feedback: http://icu-project.org/contacts.html
User Guide for ICU v4.0 Generated 2008-09-11.
