Skip to content
Snippets Groups Projects
Commit 2c46337f authored by Michael Paquier's avatar Michael Paquier
Browse files

pg_sasl_prepare: Move decomposition tables into sub-tables

The main table tracks the length of each decomposition, which is then
used with each character code to look at the decomposition set on a
set of tables divided by decomposition size. This gets the binary size
from 2.4MB to 120kB, or 20 times less. I am not sure if I could get
that further down, no space is wasted now.
parent 2af81dfe
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -69,14 +69,18 @@ print $OUTPUT <<HEADER;
*/
typedef struct
{
uint32 utf; /* UTF-8 */
uint8 class; /* combining class of character */
uint32 codes[18]; /* decomposition codes */
uint32 utf; /* UTF-8 */
uint8 class; /* combining class of character */
uint8 dec_size; /* size of decomposition code list */
} pg_utf_decomposition;
 
/* conversion table */
HEADER
print $OUTPUT "static const pg_utf_decomposition SASLPrepConv[ $input_lines ] = {\n";
print $OUTPUT "static const pg_utf_decomposition SASLPrepConv[ $input_lines ] =\n{\n";
# Hash for decomposition tables made of string arrays (one for each
# character decomposition, classified by size).
my %decomp_tabs = ();
 
my $first_item = 1;
while ( my $line = <$INPUT> )
Loading
Loading
@@ -107,18 +111,33 @@ while ( my $line = <$INPUT> )
print $OUTPUT ",\n";
}
 
# Remove decomposition type if any, keep only character codes and
# then print them.
$decom =~ s/\<[^][]*\>//g;
my @decom_elts = split(" ", $decom);
# Now print a single entry in the conversion table.
print $OUTPUT "\t{";
# Code number
print $OUTPUT "0x$code, ";
# Combining class
print $OUTPUT "$class, {";
print $OUTPUT "$class, ";
# Decomposition size
# Print size of decomposition
my $decom_size = scalar(@decom_elts);
 
# Remove decomposition type if any, keep only character codes and
# then print them.
$decom =~ s/\<[^][]*\>//g;
my @decom_elts = split(" ", $decom);
print $OUTPUT "$decom_size}";
# If the character has no decomposition we are done.
next if $decom_size == 0;
# Now save the decompositions into a dedicated area that will
# be written afterwards. First build the entry dedicated to
# a sub-table with the code and decomposition.
my $first_decom = 1;
my $decomp_string = "{";
# Code number
$decomp_string .= "0x$code, {";
foreach(@decom_elts)
{
if ($first_decom)
Loading
Loading
@@ -127,14 +146,53 @@ while ( my $line = <$INPUT> )
}
else
{
print $OUTPUT ", ";
$decomp_string .= ", ";
}
my $decom_data = get_hexa_code($_);
print $OUTPUT "0x$decom_data";
$decomp_string .= "0x$decom_data";
}
$decomp_string .= "}}";
# Store it in its dedicated list.
push(@{ $decomp_tabs{$decom_size} }, $decomp_string);
}
print $OUTPUT "\n};\n\n\n";
# Print the decomposition tables by size.
foreach my $decomp_size (sort keys %decomp_tabs )
{
my @decomp_entries = @{ $decomp_tabs{$decomp_size}};
my $decomp_length = scalar(@decomp_entries);
# First print the header.
print $OUTPUT <<HEADER;
\n\n/* Decomposition table with entries of list length of $decomp_size */
typedef struct
{
uint32 utf; /* UTF-8 */
uint32 decomp[$decomp_size]; /* size of decomposition code list */
} pg_utf_decomposition_size_$decomp_size;
static const pg_utf_decomposition_size_$decomp_size UtfDecomp_$decomp_size [ $decomp_length ] =
{
HEADER
$first_item = 1;
# Print each entry.
foreach(@decomp_entries)
{
if ($first_item)
{
$first_item = 0;
}
else
{
print $OUTPUT ",\n";
}
print $OUTPUT "\t$_";
}
print $OUTPUT "}}";
print $OUTPUT "\n};\n";
}
 
print $OUTPUT "\n};\n";
close $OUTPUT;
close $INPUT;
Loading
Loading
@@ -56,7 +56,7 @@ new_intArrayType(int num)
}
 
/*
* comparison routine for bsearch()
* comparison routine for bsearch() of main conversion table.
* this routine is intended for UTF8 code -> conversion entry
*/
static int
Loading
Loading
@@ -70,8 +70,31 @@ conv_compare(const void *p1, const void *p2)
}
 
/*
* Get the entry corresponding to code in the conversion table. This
* is useful to avoid repeating the calls to bsearch everywhere.
* Set of comparison functions for sub-tables.
*/
#define CONV_COMPARE_SIZE(type) \
static int \
conv_compare_size_##type(const void *p1, const void *p2) \
{ \
uint32 v1, v2; \
v1 = *(const uint32 *) p1; \
v2 = ((const pg_utf_decomposition_size_##type *) p2)->utf; \
return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1); \
}
/* Update this list of new sub-tables are present in utf8_table.h */
CONV_COMPARE_SIZE(1);
CONV_COMPARE_SIZE(2);
CONV_COMPARE_SIZE(3);
CONV_COMPARE_SIZE(4);
CONV_COMPARE_SIZE(5);
CONV_COMPARE_SIZE(6);
CONV_COMPARE_SIZE(7);
CONV_COMPARE_SIZE(8);
CONV_COMPARE_SIZE(18);
/*
* Get the entry corresponding to code in the main conversion table.
* This is useful to avoid repeating the calls to bsearch everywhere.
*/
static pg_utf_decomposition *
get_code_entry(uint32 code)
Loading
Loading
@@ -96,6 +119,55 @@ get_code_entry(uint32 code)
return entry;
}
 
/*
* Using an entry from the main decomposition table, return an
* array which is a pointer to the decomposition.
*/
#define CONV_SEARCH_SIZE(type) \
{ \
pg_utf_decomposition_size_##type *item; \
uint32 *result; \
item = bsearch(&code, \
(void *) UtfDecomp_##type, \
lengthof(UtfDecomp_##type), \
sizeof(pg_utf_decomposition_size_##type), \
conv_compare_size_##type); \
result = item->decomp; \
return result; \
} while(0);
static uint32 *
get_code_decomposition(pg_utf_decomposition *entry)
{
uint32 code = entry->utf;
switch (entry->dec_size)
{
case 1:
CONV_SEARCH_SIZE(1);
case 2:
CONV_SEARCH_SIZE(2);
case 3:
CONV_SEARCH_SIZE(3);
case 4:
CONV_SEARCH_SIZE(4);
case 5:
CONV_SEARCH_SIZE(5);
case 6:
CONV_SEARCH_SIZE(6);
case 7:
CONV_SEARCH_SIZE(7);
case 8:
CONV_SEARCH_SIZE(8);
case 18:
CONV_SEARCH_SIZE(18);
default:
Assert(false);
}
/* should not come here */
return NULL;
}
/*
* Recursively look at the number of elements in the conversion table
* to calculate how many characters are used for the given code.
Loading
Loading
@@ -106,6 +178,7 @@ get_decomposed_size(uint32 code)
pg_utf_decomposition *entry;
int size = 0;
int i;
uint32 *decomp;
 
/*
* Fast path for Hangul characters not stored in tables to save memory
Loading
Loading
@@ -131,22 +204,20 @@ get_decomposed_size(uint32 code)
 
/*
* Just count current code if no other decompositions. A NULL entry
* is equivalent to a character with class 0 and no decompositions,
* so just leave.
* is equivalent to a character with class 0 and no decompositions.
*/
if (entry == NULL || entry->codes[0] == 0x0)
if (entry == NULL || entry->dec_size == 0)
return 1;
 
/*
* If this entry has other decomposition codes look at them as well.
* First get its decomposition in the list of tables available.
*/
for (i = 0; i < lengthof(SASLPrepConv[0].codes); i++)
decomp = get_code_decomposition(entry);
for (i = 0; i < entry->dec_size; i++)
{
uint32 lcode = entry->codes[i];
uint32 lcode = decomp[i];
 
/* Leave if no more decompositions */
if (lcode == 0x0)
break;
size += get_decomposed_size(lcode);
}
 
Loading
Loading
@@ -165,6 +236,7 @@ decompose_code(uint32 code, int **result, int *current)
{
pg_utf_decomposition *entry;
int i;
uint32 *decomp;
 
/*
* Fast path for Hangul characters not stored in tables to save memory
Loading
Loading
@@ -204,7 +276,7 @@ decompose_code(uint32 code, int **result, int *current)
* to a character with class 0 and no decompositions, so just leave
* also in this case.
*/
if (entry == NULL || entry->codes[0] == 0x0)
if (entry == NULL || entry->dec_size == 0)
{
int *res = *result;
 
Loading
Loading
@@ -216,13 +288,12 @@ decompose_code(uint32 code, int **result, int *current)
/*
* If this entry has other decomposition codes look at them as well.
*/
for (i = 0; i < lengthof(SASLPrepConv->codes); i++)
decomp = get_code_decomposition(entry);
for (i = 0; i < entry->dec_size; i++)
{
uint32 lcode = entry->codes[i];
uint32 lcode = decomp[i];
 
/* Leave if no more decompositions */
if (lcode == 0x0)
break;
decompose_code(lcode, result, current);
}
}
Loading
Loading
@@ -509,7 +580,7 @@ utf8_conv_table(PG_FUNCTION_ARGS)
Datum values[3];
bool nulls[3];
pg_utf_decomposition entry = SASLPrepConv[i];
int size, count;
int count;
ArrayType *decomp = NULL;
int *decomp_ptr = NULL;
 
Loading
Loading
@@ -523,21 +594,19 @@ utf8_conv_table(PG_FUNCTION_ARGS)
values[1] = Int16GetDatum((int16) entry.class);
 
/* decomposition array */
size = 0;
for (count = 0; count < lengthof(entry.codes); count++)
{
if (entry.codes[count] == 0x0)
break;
size++;
}
if (size == 0)
if (entry.dec_size == 0)
nulls[2] = true;
else
{
decomp = new_intArrayType(size);
uint32 *entry_decomp;
/* Get decomposition of entry */
entry_decomp = get_code_decomposition(&entry);
decomp = new_intArrayType(entry.dec_size);
decomp_ptr = ARRPTR(decomp);
for (count = 0; count < size; count++)
decomp_ptr[count] = (int) entry.codes[count];
for (count = 0; count < entry.dec_size; count++)
decomp_ptr[count] = (int) entry_decomp[count];
values[2] = PointerGetDatum(decomp);
}
 
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment