pg_sasl_prepare: Use upstream implementation of SASLprep

No need to maintain twice the same code.

pg_sasl_prepare: Use upstream implementation of SASLprep
a4111375 · Michael Paquier · bed5a92c · a4111375 · a4111375 · bed5a92c
Commit a4111375 authored 7 years ago by Michael Paquier
--- a/pg_sasl_prepare/Makefile
+++ b/pg_sasl_prepare/Makefile
@@ -4,16 +4,6 @@ EXTENSION = pg_sasl_prepare
 DATA = pg_sasl_prepare--1.0.sql
 PGFILEDESC = "pg_sasl_prepare - SASLprepare for UTF-8 strings"
  
-DOWNLOAD = wget -O $@ --no-use-server-timestamps
-
-all: utf8_table pg_sasl_prepare.so
-
-UnicodeData.txt:
-	$(DOWNLOAD) http://unicode.org/Public/UNIDATA/UnicodeData.txt
-
-utf8_table: UnicodeData.txt
-	$(PERL) generate_conv.pl UnicodeData.txt utf8_table.h
-
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
 include $(PGXS)
--- a/pg_sasl_prepare/README
+++ b/pg_sasl_prepare/README
 pg_sasl_prepare
 ===============
  
-Implementation of RFC4013, SASLprepare to work on strings for SCRAM
-authentication protocol. A couple of utilities are included as well
-to manipulate UTF-8 strings as array of integers. UnicodeData.txt
-is as well used as a base to generate a conversion table that Postgres
-can directly refer to for decomposition and class.
+Simple utility wrapper on top of PostgreSQL implementation of SASLprep,
+as explained in RFC4013.
--- a/pg_sasl_prepare/generate_conv.pl
+++ b/pg_sasl_prepare/generate_conv.pl
-# Generate a conversion table using a Unicode data file as input, saving
-# in the output as a header file in the location specified by the caller
-# of this script.
-
-use strict;
-use warnings;
-
-use utf8;
-use open ':std', ':encoding(UTF-8)';
-
-# Convert a single unicode character using code given by caller to
-# hexadecimal and return it to caller. This is useful to treat the
-# first or sixth columns of UnicodeData.txt and print it in
-# hexadecimal format.
-sub get_hexa_code
-{
-	my $code = shift;
-
-	# First generate a unicode string, and then convert it.
-	my $s = sprintf("\\u%s", $code);
-	$s =~ s/\\u(....)/chr(hex($1))/eg;
-	# Encode it to get the set of bytes wanted.
-	utf8::encode($s);
-
-	# Compute result
-	my $result = "";
-	for (my $key = 0; $key < length($s); $key++)
-	{
-		my $char = substr($s, $key, 1);
-		$char = sprintf("%x", ord($char));
-		$result = $result . $char;
-	}
-
-	return $result;
-}
-
-die "Usage: $0 INPUT_FILE OUTPUT_PUT\n" if @ARGV != 2;
-my $input_file = $ARGV[0];
-my $output_file = $ARGV[1];
-
-# Script-specific and post composition that need to be excluded from the tables
-# generated per http://www.unicode.org/reports/tr15/.
-my @no_recomp_codes = (
-	'0958',  # DEVANAGARI LETTER QA
-	'0959',  # DEVANAGARI LETTER KHHA
-	'095A',  # DEVANAGARI LETTER GHHA
-	'095B',  # DEVANAGARI LETTER ZA
-	'095C',  # DEVANAGARI LETTER DDDHA
-	'095D',  # DEVANAGARI LETTER RHA
-	'095E',  # DEVANAGARI LETTER FA
-	'095F',  # DEVANAGARI LETTER YYA
-	'09DC',  # BENGALI LETTER RRA
-	'09DD',  # BENGALI LETTER RHA
-	'09DF',  # BENGALI LETTER YYA
-	'0A33',  # GURMUKHI LETTER LLA
-	'0A36',  # GURMUKHI LETTER SHA
-	'0A59',  # GURMUKHI LETTER KHHA
-	'0A5A',  # GURMUKHI LETTER GHHA
-	'0A5B',  # GURMUKHI LETTER ZA
-	'0A5E',  # GURMUKHI LETTER FA
-	'0B5C',  # ORIYA LETTER RRA
-	'0B5D',  # ORIYA LETTER RHA
-	'0F43',  # TIBETAN LETTER GHA
-	'0F4D',  # TIBETAN LETTER DDHA
-	'0F52',  # TIBETAN LETTER DHA
-	'0F57',  # TIBETAN LETTER BHA
-	'0F5C',  # TIBETAN LETTER DZHA
-	'0F69',  # TIBETAN LETTER KSSA
-	'0F76',  # TIBETAN VOWEL SIGN VOCALIC R
-	'0F78',  # TIBETAN VOWEL SIGN VOCALIC L
-	'0F93',  # TIBETAN SUBJOINED LETTER GHA
-	'0F9D',  # TIBETAN SUBJOINED LETTER DDHA
-	'0FA2',  # TIBETAN SUBJOINED LETTER DHA
-	'0FA7',  # TIBETAN SUBJOINED LETTER BHA
-	'0FAC',  # TIBETAN SUBJOINED LETTER DZHA
-	'0FB9',  # TIBETAN SUBJOINED LETTER KSSA
-	'FB1D',  # HEBREW LETTER YOD WITH HIRIQ:
-	'FB1F',  # HEBREW LIGATURE YIDDISH YOD YOD PATAH
-	'FB2A',  # HEBREW LETTER SHIN WITH SHIN DOT
-	'FB2B',  # HEBREW LETTER SHIN WITH SIN DOT
-	'FB2C',  # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
-	'FB2D',  # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
-	'FB2E',  # HEBREW LETTER ALEF WITH PATAH
-	'FB2F',  # HEBREW LETTER ALEF WITH QAMATS
-	'FB30',  # HEBREW LETTER ALEF WITH MAPIQ
-	'FB31',  # HEBREW LETTER BET WITH DAGESH
-	'FB32',  # HEBREW LETTER GIMEL WITH DAGESH
-	'FB33',  # HEBREW LETTER DALET WITH DAGESH
-	'FB34',  # HEBREW LETTER HE WITH MAPIQ
-	'FB35',  # HEBREW LETTER VAV WITH DAGESH
-	'FB36',  # HEBREW LETTER ZAYIN WITH DAGESH
-	'FB38',  # HEBREW LETTER TET WITH DAGESH
-	'FB39',  # HEBREW LETTER YOD WITH DAGESH
-	'FB3A',  # HEBREW LETTER FINAL KAF WITH DAGESH
-	'FB3B',  # HEBREW LETTER KAF WITH DAGESH
-	'FB3C',  # HEBREW LETTER LAMED WITH DAGESH
-	'FB3E',  # HEBREW LETTER MEM WITH DAGESH
-	'FB40',  # HEBREW LETTER NUN WITH DAGESH
-	'FB41',  # HEBREW LETTER SAMEKH WITH DAGESH
-	'FB43',  # HEBREW LETTER FINAL PE WITH DAGESH
-	'FB44',  # HEBREW LETTER PE WITH DAGESH
-	'FB46',  # HEBREW LETTER TSADI WITH DAGESH
-	'FB47',  # HEBREW LETTER QOF WITH DAGESH
-	'FB48',  # HEBREW LETTER RESH WITH DAGESH
-	'FB49',  # HEBREW LETTER SHIN WITH DAGESH
-	'FB4A',  # HEBREW LETTER TAV WITH DAGESH
-	'FB4B',  # HEBREW LETTER VAV WITH HOLAM
-	'FB4C',  # HEBREW LETTER BET WITH RAFE
-	'FB4D',  # HEBREW LETTER KAF WITH RAFE
-	'FB4E',  # HEBREW LETTER PE WITH RAFE
-	# post composition exclusion
-	'2ADC',  #  FORKING
-	'1D15E', # MUSICAL SYMBOL HALF NOTE
-	'1D15F', # MUSICAL SYMBOL QUARTER NOTE
-	'1D160', # MUSICAL SYMBOL EIGHTH NOTE
-	'1D161', # MUSICAL SYMBOL SIXTEENTH NOTE
-	'1D162', # MUSICAL SYMBOL THIRTY-SECOND NOTE
-	'1D163', # MUSICAL SYMBOL SIXTY-FOURTH NOTE
-	'1D164', # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
-	'1D1BB', # MUSICAL SYMBOL MINIMA
-	'1D1BC', # MUSICAL SYMBOL MINIMA BLACK
-	'1D1BD', # MUSICAL SYMBOL SEMIMINIMA WHITE
-	'1D1BE', # MUSICAL SYMBOL SEMIMINIMA BLACK
-	'1D1BF', # MUSICAL SYMBOL FUSA WHITE
-	'1D1C0'  # MUSICAL SYMBOL FUSA BLACK
-    );
-
-# Count number of lines in input file to get size of table.
-my $input_lines = 0;
-open(my $FH, $input_file) or die "Could not open input file $input_file: $!.";
-while (my $line = <$FH>)
-{
-	my @elts = split(';', $line);
-	my $code = get_hexa_code($elts[0]);
-
-	# Skip codes longer than 4 bytes, or 8 characters.
-	next if length($code) > 8;
-
-	# Skip codes that cannot be composed
-	my $found_no_recomp = 0;
-	foreach my $lcode  (@no_recomp_codes)
-	{
-		if ($lcode eq $elts[0])
-		{
-			$found_no_recomp = 1;
-			last;
-		}
-	}
-	next if $found_no_recomp;
-
-	# Skip characters with no decompositions and a class of 0.
-	next if $elts[3] eq '0' && $elts[5] eq '';
-
-	$input_lines++;
-}
-close $FH;
-
-# Open the input file and treat it line by line, one for each Unicode
-# character.
-open my $INPUT, $input_file or die "Could not open input file $input_file: $!";
-open my $OUTPUT, "> $output_file" or die "Could not open output file $output_file: $!\n";
-
-# Print header of output file.
-print $OUTPUT <<HEADER;
-/*
- * File auto-generated from generate_conv.pl, do not edit. There is
- * deliberately not an #ifndef PG_UTF8_TABLE_H here.
- */
-typedef struct
-{
-	uint32	utf;		/* UTF-8 */
-	uint8	class;		/* combining class of character */
-	uint8	dec_size;	/* size of decomposition code list */
-} pg_utf_decomposition;
-
-/* conversion table */
-HEADER
-print $OUTPUT "static const pg_utf_decomposition SASLPrepConv[ $input_lines ] =\n{\n";
-
-# Hash for decomposition tables made of string arrays (one for each
-# character decomposition, classified by size).
-my %decomp_tabs = ();
-
-my $first_item = 1;
-while ( my $line = <$INPUT> )
-{
-	# Split the line wanted and get the fields needed:
-	# - Unicode number
-	# - Combining class
-	# - Decomposition table
-	my @elts = split(';', $line);
-	my $code = get_hexa_code($elts[0]);
-	my $class = sprintf("0x%02x", $elts[3]);
-	my $decom = $elts[5];
-
-	# Skip codes longer than 4 bytes, or 8 characters.
-	next if length($code) > 8;
-
-	# Skip characters with no decompositions and a class of 0.
-	# to reduce the table size.
-	next if $elts[3] eq '0' && $elts[5] eq '';
-
-	# Skip codes that cannot be composed
-	my $found_no_recomp = 0;
-	foreach my $lcode  (@no_recomp_codes)
-	{
-		if ($lcode eq $elts[0])
-		{
-			$found_no_recomp = 1;
-			last;
-		}
-	}
-	next if $found_no_recomp;
-
-	# Print a comma for all items except the first one.
-	if ($first_item)
-	{
-	    $first_item = 0;
-	}
-	else
-	{
-	    print $OUTPUT ",\n";
-	}
-
-	# Remove decomposition type if any, keep only character codes and
-	# then print them.
-	$decom =~ s/\<[^][]*\>//g;
-	my @decom_elts = split(" ", $decom);
-
-	# Now print a single entry in the conversion table.
-	print $OUTPUT "\t{";
-	# Code number
-	print $OUTPUT "0x$code, ";
-	# Combining class
-	print $OUTPUT "$class, ";
-	# Decomposition size
-	# Print size of decomposition
-	my $decom_size = scalar(@decom_elts);
-
-	print $OUTPUT "$decom_size}";
-
-	# If the character has no decomposition we are done.
-	next if $decom_size == 0;
-
-	# Now save the decompositions into a dedicated area that will
-	# be written afterwards.  First build the entry dedicated to
-	# a sub-table with the code and decomposition.
-	my $first_decom = 1;
-	my $decomp_string = "{";
-	# Code number
-	$decomp_string .= "0x$code, {";
-	foreach(@decom_elts)
-	{
-		if ($first_decom)
-		{
-		    $first_decom = 0;
-		}
-		else
-		{
-		    $decomp_string .= ", ";
-		}
-		my $decom_data = get_hexa_code($_);
-		$decomp_string .= "0x$decom_data";
-	}
-	$decomp_string .= "}}";
-	# Store it in its dedicated list.
-	push(@{ $decomp_tabs{$decom_size} }, $decomp_string);
-}
-
-print $OUTPUT "\n};\n\n\n";
-
-# Print the decomposition tables by size.
-foreach my $decomp_size (sort keys %decomp_tabs )
-{
-	my @decomp_entries = @{ $decomp_tabs{$decomp_size}};
-	my $decomp_length = scalar(@decomp_entries);
-
-	# First print the header.
-	print $OUTPUT <<HEADER;
-\n\n/* Decomposition table with entries of list length of $decomp_size */
-typedef struct
-{
-	uint32	utf;		/* UTF-8 */
-	uint32	decomp[$decomp_size];	/* size of decomposition code list */
-} pg_utf_decomposition_size_$decomp_size;
-
-static const pg_utf_decomposition_size_$decomp_size UtfDecomp_$decomp_size [ $decomp_length ] =
-{
-HEADER
-
-	$first_item = 1;
-	# Print each entry.
-	foreach(@decomp_entries)
-	{
-		if ($first_item)
-		{
-		    $first_item = 0;
-		}
-		else
-		{
-		    print $OUTPUT ",\n";
-		}
-		print $OUTPUT "\t$_";
-	}
-	print $OUTPUT "\n};\n";
-}
-
-close $OUTPUT;
-close $INPUT;
--- a/pg_sasl_prepare/pg_sasl_prepare--1.0.sql
+++ b/pg_sasl_prepare/pg_sasl_prepare--1.0.sql
@@ -4,27 +4,7 @@
 \echo Use "CREATE EXTENSION pg_sasl_prepare" to load this file. \quit
  
 -- This is a pg_sasl_prepare
-CREATE FUNCTION pg_sasl_prepare(_int4)
-RETURNS _int4
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-- Conversion functions
-CREATE FUNCTION utf8_to_array(text)
-RETURNS _int4
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
-
-CREATE FUNCTION array_to_utf8(_int4)
+CREATE FUNCTION pg_sasl_prepare(text)
 RETURNS text
 AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
-
-- Conversion table fetch
-CREATE OR REPLACE FUNCTION utf8_conv_table(
-    OUT code int,
-    OUT class smallint,
-    OUT decomposition _int4)
-RETURNS SETOF record
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+LANGUAGE C STRICT;
--- a/pg_sasl_prepare/pg_sasl_prepare.c
+++ b/pg_sasl_prepare/pg_sasl_prepare.c
 /*-------------------------------------------------------------------------
 *
 * pg_sasl_prepare.c
- *		Set of functions for a minimal extension template
+ *		Wrapper on top of upstream implementation of SASLprep, changing
+ *		a UTF-8 string into a prepared string for a SCRAM exchange.
 *
 * Copyright (c) 1996-2017, PostgreSQL Global Development Group
 *
@@ -13,729 +14,37 @@
  
 #include "postgres.h"
  
-#include "access/tupdesc.h"
-#include "catalog/pg_type.h"
+#include "common/saslprep.h"
 #include "fmgr.h"
 #include "funcapi.h"
 #include "mb/pg_wchar.h"
-#include "miscadmin.h"
-#include "nodes/execnodes.h"
-#include "utils/array.h"
 #include "utils/builtins.h"
-#include "utils/tuplestore.h"
-
-/* local includes */
-#include "utf8_table.h"
  
 PG_MODULE_MAGIC;
  
-/* Utilities for array manipulation */
-#define ARRPTR(x)  ((int32 *) ARR_DATA_PTR(x))
-#define ARRNELEMS(x)  ArrayGetNItems(ARR_NDIM(x), ARR_DIMS(x))
-
-/* Constants for calculations with Hangul characters */
-#define SBASE		0xEAB080	/* U+AC00 */
-#define LBASE		0xE18480	/* U+1100 */
-#define VBASE		0xE185A1	/* U+1161 */
-#define TBASE		0xE186A7	/* U+11A7 */
-#define LCOUNT		19
-#define VCOUNT		21
-#define TCOUNT		28
-#define NCOUNT		VCOUNT * TCOUNT
-#define SCOUNT		LCOUNT * NCOUNT
-
-/*
- * Create a new int array with room for "num" elements.
- * Taken from contrib/intarray/.
- */
-static ArrayType *
-new_intArrayType(int num)
-{
-	ArrayType  *r;
-	int		 nbytes = ARR_OVERHEAD_NONULLS(1) + sizeof(int) * num;
-
-	r = (ArrayType *) palloc0(nbytes);
-
-	SET_VARSIZE(r, nbytes);
-	ARR_NDIM(r) = 1;
-	r->dataoffset = 0;		/* marker for no null bitmap */
-	ARR_ELEMTYPE(r) = INT4OID;
-	ARR_DIMS(r)[0] = num;
-	ARR_LBOUND(r)[0] = 1;
-
-	return r;
-}
-
-/*
- * comparison routine for bsearch() of main conversion table.
- * this routine is intended for UTF8 code -> conversion entry
- */
-static int
-conv_compare(const void *p1, const void *p2)
-{
-	uint32		v1, v2;
-
-	v1 = *(const uint32 *) p1;
-	v2 = ((const pg_utf_decomposition *) p2)->utf;
-	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
-}
-
-/*
- * Set of comparison functions for sub-tables.
- */
-#define CONV_COMPARE_SIZE(type)									\
-static int														\
-conv_compare_size_##type(const void *p1, const void *p2)		\
-{																\
-	uint32		v1, v2;											\
-	v1 = *(const uint32 *) p1;									\
-	v2 = ((const pg_utf_decomposition_size_##type *) p2)->utf;	\
-	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);				\
-}
-/* Update this list of new sub-tables are present in utf8_table.h */
-CONV_COMPARE_SIZE(1);
-CONV_COMPARE_SIZE(2);
-CONV_COMPARE_SIZE(3);
-CONV_COMPARE_SIZE(4);
-CONV_COMPARE_SIZE(5);
-CONV_COMPARE_SIZE(6);
-CONV_COMPARE_SIZE(7);
-CONV_COMPARE_SIZE(8);
-CONV_COMPARE_SIZE(18);
-
-/*
- * Get the entry corresponding to code in the main conversion table.
- * This is useful to avoid repeating the calls to bsearch everywhere.
- */
-static pg_utf_decomposition *
-get_code_entry(uint32 code)
-{
-	pg_utf_decomposition *entry;
-
-	/*
-	 * bsearch() works as follows:
-	 * - a key to check for matches.
-	 * - a pointer pointing to the base of the conversion table.
-	 * - number of elements in the array to look for,
-	 * - size of an array element.
-	 * - comparison function.
-	 * If a match cannot be found, NULL is returned.
-	 */
-	entry = bsearch(&code,
-					(void *) SASLPrepConv,
-					lengthof(SASLPrepConv),
-					sizeof(pg_utf_decomposition),
-					conv_compare);
-
-	return entry;
-}
-
-/*
- * Using an entry from the main decomposition table, return an
- * array which is a pointer to the decomposition.
- */
-#define CONV_SEARCH_SIZE(type)					\
-{												\
-	pg_utf_decomposition_size_##type *item;		\
-	uint32	*result;							\
-	item = bsearch(&code,						\
-		(void *) UtfDecomp_##type,				\
-		lengthof(UtfDecomp_##type),				\
-		sizeof(pg_utf_decomposition_size_##type), \
-		conv_compare_size_##type);				\
-	result = item->decomp;						\
-	return result;								\
-} while(0);
-static uint32 *
-get_code_decomposition(pg_utf_decomposition *entry)
-{
-	uint32	code = entry->utf;
-
-	switch (entry->dec_size)
-	{
-		case 1:
-			CONV_SEARCH_SIZE(1);
-		case 2:
-			CONV_SEARCH_SIZE(2);
-		case 3:
-			CONV_SEARCH_SIZE(3);
-		case 4:
-			CONV_SEARCH_SIZE(4);
-		case 5:
-			CONV_SEARCH_SIZE(5);
-		case 6:
-			CONV_SEARCH_SIZE(6);
-		case 7:
-			CONV_SEARCH_SIZE(7);
-		case 8:
-			CONV_SEARCH_SIZE(8);
-		case 18:
-			CONV_SEARCH_SIZE(18);
-		default:
-			Assert(false);
-	}
-
-	/* should not come here */
-	return NULL;
-}
-
-/*
- * Recursively look at the number of elements in the conversion table
- * to calculate how many characters are used for the given code.
- */
-static int
-get_decomposed_size(uint32 code)
-{
-	pg_utf_decomposition *entry;
-	int		size = 0;
-	int		i;
-	uint32 *decomp;
-
-	/*
-	 * Fast path for Hangul characters not stored in tables to save memory
-	 * as decomposition is algorithmic.
-	 * See http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details
-	 * on the matter.
-	 */
-	if (code >= SBASE && code < SBASE + SCOUNT)
-	{
-		uint32	tindex, sindex;
-
-		sindex = code - SBASE;
-		tindex = sindex % TCOUNT;
-
-		if (tindex != 0)
-			return 3;
-		return 2;
-	}
-
-	entry = get_code_entry(code);
-
-	/*
-	 * Just count current code if no other decompositions.  A NULL entry
-	 * is equivalent to a character with class 0 and no decompositions.
-	 */
-	if (entry == NULL || entry->dec_size == 0)
-		return 1;
-
-	/*
-	 * If this entry has other decomposition codes look at them as well.
-	 * First get its decomposition in the list of tables available.
-	 */
-	decomp = get_code_decomposition(entry);
-	for (i = 0; i < entry->dec_size; i++)
-	{
-		uint32 lcode = decomp[i];
-
-		size += get_decomposed_size(lcode);
-	}
-
-	return size;
-}
-
-/*
- * Recompose a set of characters. For hangul characters, the calculation
- * is algorithmic. For others, an inverse lookup at the decomposition
- * table is necessary. Returns true if a recomposition can be done, and
- * false otherwise.
- */
-static bool
-recompose_code(uint32 start, uint32 code, uint32 *result)
-{
-	/* No need to care about ascii characters */
-	if (start <= 0x7f || code <= 0x7f)
-		return false;
-
-	/* Hangul characters go here */
-	if (start >= LBASE && start < LBASE + LCOUNT &&
-		code >= VBASE && code < VBASE + VCOUNT)
-	{
-		*result = ((start - LBASE) * VCOUNT + code - VBASE) * TCOUNT + SBASE;
-		return true;
-	}
-	else if (start >= SBASE && start < (SBASE + SCOUNT) &&
-			 ((start - SBASE) % TCOUNT) == 0 &&
-			 code >= TBASE && code < (TBASE + TCOUNT))
-	{
-		*result = start + code - TBASE;
-		return true;
-	}
-	else
-	{
-		int i;
-
-		/*
-		 * Do an inverse lookup of the decomposition tables to see if
-		 * anything matches. The comparison just needs to be a perfect
-		 * match on the sub-table of size two, because the start character
-		 * has already been recomposed partially.
-		 */
-		for (i = 0; i < lengthof(UtfDecomp_2); i++)
-		{
-			pg_utf_decomposition_size_2 entry = UtfDecomp_2[i];
-			if (start == entry.decomp[0] &&
-				code == entry.decomp[1])
-			{
-				*result = entry.utf;
-				return true;
-			}
-		}
-    }
-
-	return false;
-}
-
-/*
- * Decompose the given code into the array given by caller. The
- * decomposition begins at the position given by caller, saving one
- * lookup at the conversion table. The current position needs to be
- * updated here to let the caller know from where to continue filling
- * in the array result.
- */
-static void
-decompose_code(uint32 code, int **result, int *current)
-{
-	pg_utf_decomposition *entry;
-	int		i;
-	uint32 *decomp;
-
-	/*
-	 * Fast path for Hangul characters not stored in tables to save memory
-	 * as decomposition is algorithmic.
-	 * See http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details
-	 * on the matter.
-	 */
-	if (code >= SBASE && code < SBASE + SCOUNT)
-	{
-		uint32	l, v, tindex, sindex;
-		int   *res = *result;
-
-		sindex = code - SBASE;
-		l = LBASE + sindex / (VCOUNT * TCOUNT);
-		v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT;
-		tindex = sindex % TCOUNT;
-
-		res[*current] = l;
-		(*current)++;
-		res[*current] = v;
-		(*current)++;
-
-		if (tindex != 0)
-		{
-			res[*current] = TBASE + tindex;
-			(*current)++;
-		}
-
-		return;
-	}
-
-	entry = get_code_entry(code);
-
-	/*
-	 * Just fill in with the current decomposition if there are no
-	 * decomposition codes to recurse to.  A NULL entry is equivalent
-	 * to a character with class 0 and no decompositions, so just leave
-	 * also in this case.
-	 */
-	if (entry == NULL || entry->dec_size == 0)
-	{
-		int *res = *result;
-
-		res[*current] = (int) code;
-		(*current)++;
-		return;
-	}
-
-	/*
-	 * If this entry has other decomposition codes look at them as well.
-	 */
-	decomp = get_code_decomposition(entry);
-	for (i = 0; i < entry->dec_size; i++)
-	{
-		uint32 lcode = decomp[i];
-
-		/* Leave if no more decompositions */
-		decompose_code(lcode, result, current);
-	}
-}
-
-
 /*
 * pg_sasl_prepare
 *
 * Perform SASLprepare (NKFC) on a integer array identifying individual
- * multibyte UTF-8 characters.
+ * multibyte UTF-8 characters. This is a simple wrapper on top of
+ * PostgreSQL implementation.
 */
 PG_FUNCTION_INFO_V1(pg_sasl_prepare);
 Datum
 pg_sasl_prepare(PG_FUNCTION_ARGS)
 {
-	ArrayType  *input = PG_GETARG_ARRAYTYPE_P(0);
-	int		   *input_ptr = ARRPTR(input);
-	ArrayType  *result;
-	int		   *result_ptr;
-	int		   *decomp_ptr;
-	int		   *recomp_ptr;
-	int			count;
-	int			size = 0;
-	int			decomp_size = 0;
-	int			recomp_size = 0;
-	/* variables for recomposition */
-	int			last_class;
-	int			starter_pos;
-	int			target_pos;
-	uint32		starter_ch;
-
-	/* First do the compatibility decomposition */
-
-	/*
-	 * Look recursively at the convertion table to understand the number
-	 * of elements that need to be created.
-	 */
-	for (count = 0; count < ARRNELEMS(input); count++)
-	{
-		uint32 code = input_ptr[count];
-
-		/*
-		 * Recursively look at the conversion table to determine into
-		 * how many characters the given code need to be decomposed.
-		 */
-		decomp_size += get_decomposed_size(code);
-	}
-
-	/*
-	 * Now fill in each entry recursively. This needs a second pass on
-	 * the conversion table.
-	 */
-	decomp_ptr = (int *) palloc(decomp_size * sizeof(int));
-	size = 0;
-	for (count = 0; count < ARRNELEMS(input); count++)
-	{
-		uint32 code = input_ptr[count];
-
-		decompose_code(code, &decomp_ptr, &size);
-
-		/*
-		 * XXX: Is it necessary to reorder the combining marks here?
-		 */
-	}
-
-	/*
-	 * Now that the decomposition is done, apply the combining class
-	 * for each multibyte character.
-	 */
-	for (count = 1; count < decomp_size; count++)
-	{
-		uint32	prev = decomp_ptr[count - 1];
-		uint32	next = decomp_ptr[count];
-		uint32	tmp;
-		pg_utf_decomposition *prevEntry = get_code_entry(prev);
-		pg_utf_decomposition *nextEntry = get_code_entry(next);
-
-		/*
-		 * If no entries are found, the character used is either an Hangul
-		 * character or a character with a class of 0 and no decompositions,
-		 * so move to next result.
-		 */
-		if (prevEntry == NULL || nextEntry == NULL)
-			continue;
-
-		/*
-		 * Per Unicode (http://unicode.org/reports/tr15/tr15-18.html) annex 4,
-		 * a sequence of two adjacent characters in a string is an exchangeable
-		 * pair if the combining class (from the Unicode Character Database)
-		 * for the first character is greater than the combining class for the
-		 * second, and the second is not a starter.  A character is a starter
-		 * if its combining class is 0.
-		 */
-		if (nextEntry->class == 0x0 || prevEntry->class == 0x0)
-			continue;
-
-		if (prevEntry->class <= nextEntry->class)
-			continue;
-
-		/* exchange can happen */
-		tmp = decomp_ptr[count - 1];
-		decomp_ptr[count - 1] = decomp_ptr[count];
-		decomp_ptr[count] = tmp;
-
-		/* backtrack to check again */
-		if (count > 1)
-			count -= 2;
-	}
-
-	/*
-	 * The last phase of NFKC is the recomposition of the multibyte string
-	 * that has been reordered previously using combining classes. The
-	 * recomposed string cannot be longer than the decomposed one, so
-	 * make the allocation of the recomposed string based on that assumption.
-	 */
-	recomp_ptr = (int *) palloc(decomp_size * sizeof(int));
-	last_class = -1;	 /* this eliminates a special check */
-	starter_pos = 0;
-	target_pos = 1;
-	starter_ch = recomp_ptr[0] = decomp_ptr[0];
-
-	for (count = 1; count < decomp_size; count++)
-	{
-		uint32 ch = (uint32) decomp_ptr[count];
-		pg_utf_decomposition *ch_entry = get_code_entry(ch);
-		int			ch_class = ch_entry == NULL ? 0 : ch_entry->class;
-		pg_wchar	composite;
-
-		if (last_class < ch_class &&
-			recompose_code(starter_ch, ch, &composite))
-		{
-			recomp_ptr[starter_pos] = composite;
-			starter_ch = composite;
-		}
-		else if (ch_class == 0)
-		{
-			starter_pos = target_pos;
-			starter_ch  = ch;
-			last_class  = -1;
-			recomp_ptr[target_pos++] = ch;
-		}
-		else
-		{
-			last_class = ch_class;
-			recomp_ptr[target_pos++] = ch;
-		}
-	}
-
-	recomp_size = target_pos;
-
-	/* And finally fill-in the result */
-	result = new_intArrayType(recomp_size);
-	result_ptr = ARRPTR(result);
-	memcpy(result_ptr, recomp_ptr, recomp_size * sizeof(uint32));
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * utf8_to_array
- * Convert a UTF-8 string into an integer array.
- */
-PG_FUNCTION_INFO_V1(utf8_to_array);
-Datum
-utf8_to_array(PG_FUNCTION_ARGS)
-{
-	char	   *input = text_to_cstring(PG_GETARG_TEXT_PP(0));
-	ArrayType  *result;
-	int		   *result_ptr;
-	int			size = 0;
-	int			count;
-	int			encoding = GetDatabaseEncoding();
-	const unsigned char *utf = (unsigned char *) input;
+	char	   *password = text_to_cstring(PG_GETARG_TEXT_PP(0));
+	char	   *prep_password = NULL;
  
-	if (encoding != PG_UTF8)
+	if (GetDatabaseEncoding() != PG_UTF8)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("Database encoding is not UTF-8")));
  
-	/*
-	 * Calculate the array size first by doing a first pass on the UTF-8 string
-	 */
-	while (*utf)
-	{
-		int l;
-
-		l = pg_utf_mblen(utf);
-
-		if (!pg_utf8_islegal(utf, l))
-			elog(ERROR, "incorrect utf-8 input");
-
-		size++;
-		utf += l;
-	}
-
-	/*
-	 * And now fill in the array with all the data from each character by
-	 * doing a second pass.
-	 */
-	result = new_intArrayType(size);
-	result_ptr = ARRPTR(result);
-	utf = (unsigned char *) input;
-	count = 0;
-	while (*utf)
-	{
-		uint32	iutf = 0;
-		int		l;
-
-		l = pg_utf_mblen(utf);
-
-		/* Calculate entry for character input for conversion table lookup */
-		if (l == 1)
-		{
-			iutf = *utf++;
-		}
-		else if (l == 2)
-		{
-			iutf = *utf++ << 8;
-			iutf |= *utf++;
-		}
-		else if (l == 3)
-		{
-			iutf = *utf++ << 16;
-			iutf |= *utf++ << 8;
-			iutf |= *utf++;
-		}
-		else if (l == 4)
-		{
-			iutf = *utf++ << 24;
-			iutf |= *utf++ << 16;
-			iutf |= *utf++ << 8;
-			iutf |= *utf++;
-		}
-		else
-			elog(ERROR, "incorrect multibyte length %d", l);
-
-		/* Let's not care about any signing */
-		result_ptr[count++] = (int32) iutf;
-	}
-
-	Assert(count == ARRNELEMS(result));
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * array_to_utf8
- * Convert a UTF-8 string into an integer array.
- */
-PG_FUNCTION_INFO_V1(array_to_utf8);
-Datum
-array_to_utf8(PG_FUNCTION_ARGS)
-{
-	ArrayType	   *input = PG_GETARG_ARRAYTYPE_P(0);
-	int			   *input_ptr = ARRPTR(input);
-	char		   *result;
-	int				size = 0;
-	int				count = 0;
-	int				i;
-
-	/*
-	 * Do a first pass on the array elements to calculate the size of the
-	 * string to return.
-	 */
-	for (i = 0; i < ARRNELEMS(input); i++)
-	{
-		uint32 code = input_ptr[i];
-
-		if (code & 0xff000000)
-			size++;
-		if (code & 0x00ff0000)
-			size++;
-		if (code & 0x0000ff00)
-			size++;
-		if (code & 0x000000ff)
-			size++;
-	}
-
-	/* Now fill in the string */
-	result = palloc0(size + 1);
-	for (i = 0; i < ARRNELEMS(input); i++)
-	{
-		uint32 code = input_ptr[i];
-
-		if (code & 0xff000000)
-			result[count++] = code >> 24;
-		if (code & 0x00ff0000)
-			result[count++] = code >> 16;
-		if (code & 0x0000ff00)
-			result[count++] = code >> 8;
-		if (code & 0x000000ff)
-			result[count++] = code;
-	}
-	result[count] = '\0';
-
-	Assert(count == size);
-	PG_RETURN_TEXT_P(cstring_to_text(result));
-}
-
-/*
- * utf8_conv_table
- * Return a full copy of the UTF-8 conversion table.
- */
-PG_FUNCTION_INFO_V1(utf8_conv_table);
-Datum
-utf8_conv_table(PG_FUNCTION_ARGS)
-{
-	TupleDesc		tupdesc;
-	Tuplestorestate *tupstore;
-	ReturnSetInfo  *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
-	MemoryContext	per_query_ctx;
-	MemoryContext	oldcontext;
-	int				i;
-
-	/* check to see if caller supports us returning a tuplestore */
-	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+	if (pg_saslprep(password, &prep_password) != SASLPREP_SUCCESS)
 		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("set-valued function called in context that cannot accept a set")));
-	if (!(rsinfo->allowedModes & SFRM_Materialize))
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("materialize mode required, but it is not " \
-						"allowed in this context")));
-
-	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
-	oldcontext = MemoryContextSwitchTo(per_query_ctx);
-
-	/* Build tuple descriptor */
-	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
-		elog(ERROR, "return type must be a row type");
-
-	tupstore = tuplestore_begin_heap(true, false, work_mem);
-	rsinfo->returnMode = SFRM_Materialize;
-	rsinfo->setResult = tupstore;
-	rsinfo->setDesc = tupdesc;
-	MemoryContextSwitchTo(oldcontext);
-
-	/* Print out all the values on the table */
-	for (i = 0; i < lengthof(SASLPrepConv); i++)
-	{
-		Datum		values[3];
-		bool		nulls[3];
-		pg_utf_decomposition entry = SASLPrepConv[i];
-		int			count;
-		ArrayType  *decomp = NULL;
-		int		   *decomp_ptr = NULL;
-
-		MemSet(values, 0, sizeof(values));
-		MemSet(nulls, 0, sizeof(nulls));
-
-		/* Fill in values, code first */
-		values[0] = Int32GetDatum(entry.utf);
-
-		/* class */
-		values[1] = Int16GetDatum((int16) entry.class);
-
-		/* decomposition array */
-		if (entry.dec_size == 0)
-			nulls[2] = true;
-		else
-		{
-			uint32     *entry_decomp;
-
-			/* Get decomposition of entry */
-			entry_decomp = get_code_decomposition(&entry);
-
-			decomp = new_intArrayType(entry.dec_size);
-			decomp_ptr = ARRPTR(decomp);
-			for (count = 0; count < entry.dec_size; count++)
-				decomp_ptr[count] = (int) entry_decomp[count];
-			values[2] = PointerGetDatum(decomp);
-		}
-
-		/* Save tuple values */
-		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
-		if (decomp != NULL)
-			pfree(decomp);
-	}
-
-	/* clean up and return the tuplestore */
-	tuplestore_donestoring(tupstore);
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errmsg("Error while processing SASLprep")));
  
-	return (Datum) 0;
+	PG_RETURN_TEXT_P(cstring_to_text(prep_password));
 }