Skip to content
Snippets Groups Projects
Commit 148a6dee authored by Michael Paquier's avatar Michael Paquier
Browse files

pg_sasl_prepare: Implement decomposition of Hangul characters

The Unicode documentation is rather specific on the matter with that:
http://unicode.org/reports/tr15/tr15-18.html#Hangul
parent 98f3a3cd
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -93,7 +93,6 @@ get_code_entry(uint32 code)
sizeof(pg_utf_decomposition),
conv_compare);
 
Assert(entry != NULL);
return entry;
}
 
Loading
Loading
@@ -108,6 +107,26 @@ get_decomposed_size(uint32 code)
int size = 0;
int i;
 
/*
* Fast path for Hangul characters not stored in tables to save memory
* as decomposition is algorithmic.
* See http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details
* on the matter.
*/
if (code >= 0xAC00 && code < 0xD7A4)
{
uint32 l, v, t, hindex;
hindex = code - 0xAC00;
l = 0x1100 + hindex / (21 * 28);
v = 0x1161 + (hindex % (21 * 28)) / 28;
t = hindex % 28;
if (t != 0)
return 3;
return 2;
}
entry = get_code_entry(code);
 
/* Just count current code if no other decompositions */
Loading
Loading
@@ -143,6 +162,36 @@ decompose_code(uint32 code, int **result, int *current)
pg_utf_decomposition *entry;
int i;
 
/*
* Fast path for Hangul characters not stored in tables to save memory
* as decomposition is algorithmic.
* See http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details
* on the matter.
*/
if (code >= 0xAC00 && code < 0xD7A4)
{
uint32 l, v, t, hindex;
int *res = *result;
hindex = code - 0xAC00;
l = 0x1100 + hindex / (21 * 28);
v = 0x1161 + (hindex % (21 * 28)) / 28;
t = hindex % 28;
res[*current] = l;
(*current)++;
res[*current] = v;
(*current)++;
if (t != 0)
{
res[*current] = 0x11A7 + t;
(*current)++;
}
return;
}
entry = get_code_entry(code);
 
/*
Loading
Loading
@@ -233,6 +282,14 @@ pg_sasl_prepare(PG_FUNCTION_ARGS)
pg_utf_decomposition *prevEntry = get_code_entry(prev);
pg_utf_decomposition *nextEntry = get_code_entry(next);
 
/*
* If no entries are found, the character used is either an Hangul
* character or a character with a class of 0 and no decompositions,
* so move to next result.
*/
if (prevEntry == NULL || nextEntry == NULL)
continue;
/*
* Per Unicode (http://unicode.org/reports/tr15/tr15-18.html) annex 4,
* a sequence of two adjacent characters in a string is an exchangeable
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment