Merge branch '1-initial-icu' into '1-initial-implementation'

Use icu4c for encoding to UTF-8 See merge request !3

Merge branch '1-initial-icu' into '1-initial-implementation'
e9abdd57 · Nick Thomas · f04f4f3e · 038f1a77 · e9abdd57 · e9abdd57
Commit e9abdd57 authored 7 years ago by Nick Thomas
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,10 +6,12 @@
    ELASTIC_CONNECTION_INFO: '{"url":["http://elasticsearch:9200"]}'
  stage: test
  script:
+    - apt-get update && apt-get -yy install libicu-dev
    - make setup
    - make format
-    - make test
    - make cover
+    - make
+    - make test
  
 test 1.7:
  <<: *test

--- a/indexer/encoding.go
+++ b/indexer/encoding.go
 package indexer
  
 import (
-	"fmt"
+	"github.com/goodsign/icu"
+)
  
-	"github.com/saintfish/chardet"
-	"golang.org/x/text/encoding/ianaindex"
+var (
+	detector  *icu.CharsetDetector
+	converter = icu.NewCharsetConverter(maxBlobSize)
 )
  
-var detector = chardet.NewTextDetector()
+func init() {
+	var err error
+	detector, err = icu.NewCharsetDetector()
+	if err != nil {
+		panic(err)
+	}
+}
  
 func tryEncodeString(s string) string {
 	encoded, err := encodeString(s)
@@ -34,39 +42,15 @@ func encodeString(s string) (string, error) {
  
 // encodeString converts a string from an arbitrary encoding to UTF-8
 func encodeBytes(b []byte) (string, error) {
-	best, err := detector.DetectBest(b)
+	matches, err := detector.GuessCharset(b)
 	if err != nil {
 		return "", err
 	}
  
-	charset := best.Charset
-
-	// chardet has some incompatibilities with ianaindex
-	switch charset {
-	case "UTF-8":
-		return string(b), nil
-	case "GB-18030":
-		charset = "GB18030"
-	case "IBM420_ltr", "IBM420_rtl":
-		charset = "cp420"
-	case "IBM424_ltr", "IBM424_rtl":
-		charset = "IBM424"
-	}
-
-	encoding, err := ianaindex.IANA.Encoding(charset)
-	if err != nil {
-		return "", fmt.Errorf("Encoding %q: %v", charset, err)
-	}
-
-	// TODO(nick): Does this actually mean 'nothing to do'?
-	if encoding == nil {
-		return string(b), nil
-	}
-
-	decoded, err := encoding.NewDecoder().Bytes(b)
+	utf8, err := converter.ConvertToUtf8(b, matches[0].Charset)
 	if err != nil {
 		return "", err
 	}
  
-	return string(decoded), nil
+	return string(utf8), nil
 }
--- a/vendor/github.com/goodsign/icu/LICENCE
+++ b/vendor/github.com/goodsign/icu/LICENCE
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 
+SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
--- a/vendor/github.com/goodsign/icu/LICENCE_icu
+++ b/vendor/github.com/goodsign/icu/LICENCE_icu
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+   "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
+<title>ICU License - ICU 1.8.1 and later</title>
+</head>
+
+<body BGCOLOR="#ffffff">
+<h2>ICU License - ICU 1.8.1 and later</h2>
+
+<p>COPYRIGHT AND PERMISSION NOTICE</p>
+
+<p>
+Copyright (c) 1995-2012 International Business Machines Corporation and others
+</p>
+<p>
+All rights reserved.
+</p>
+<p>
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies
+of the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+</p>
+<p>
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL
+THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM,
+OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+</p>
+<p>
+Except as contained in this notice, the name of a copyright holder shall not be
+used in advertising or otherwise to promote the sale, use or other dealings in
+this Software without prior written authorization of the copyright holder.
+</p>
+
+<hr style="color:gray;background-color:gray">
+<p><small>
+All trademarks and registered trademarks mentioned herein are the property of their respective owners.
+</small></p>
+
+<hr style="height:3px;color:black;background-color:black">
+
+<h2>Third-Party Software Licenses</h2>
+This section contains third-party software notices and/or additional terms for licensed
+third-party software components included within ICU libraries.
+
+<h3>1. Unicode Data Files and Software</h3>
+
+<h3 align="center"><a name="Exhibit1">EXHIBIT 1</a><br>
+UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE</h3>
+<blockquote>
+<p>Unicode Data Files include all data files under the directories 
+<a href="http://www.unicode.org/Public/">http://www.unicode.org/Public/</a>, 
+<a href="http://www.unicode.org/reports/">http://www.unicode.org/reports/</a>, 
+and
+<a title="http://www.unicode.org/cldr/data/" onClick="return top.js.OpenExtLink(window,event,this)" target="_blank" href="http://www.unicode.org/cldr/data/">
+http://www.unicode.org/cldr/data/</a>. Unicode Data Files do not include PDF online code charts under the directory  <a href="http://www.unicode.org/Public/">http://www.unicode.org/Public/</a>. Software includes any source code 
+published in the Unicode Standard or under the directories <a href="http://www.unicode.org/Public/">http://www.unicode.org/Public/</a>,
+<a href="http://www.unicode.org/reports/">http://www.unicode.org/reports/</a>, 
+and
+<a title="http://www.unicode.org/cldr/data/" onClick="return top.js.OpenExtLink(window,event,this)" target="_blank" href="http://www.unicode.org/cldr/data/">
+http://www.unicode.org/cldr/data/</a>.</p>
+
+<p>NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.</p>
+<p>COPYRIGHT AND PERMISSION NOTICE</p>
+
+<p>Copyright © 1991-2012 Unicode, Inc. All rights reserved. Distributed under the Terms of Use in 
+<a href="http://www.unicode.org/copyright.html">http://www.unicode.org/copyright.html</a>.</p>
+
+<p>Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode data files and 
+any associated documentation (the "Data Files") or Unicode software and any associated documentation (the "Software") to deal in the Data Files or Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Data Files or Software, and to permit persons to whom the Data Files or Software are furnished to do so, provided that (a) the above copyright notice(s) and this permission notice appear 
+with all copies of the Data Files or Software, (b) both the above copyright notice(s) and this permission notice appear in associated documentation, and (c) there is clear notice in each modified Data File or in the Software as well as in the documentation associated with the Data File(s) or Software that the data or software has been modified.</p>
+
+<p>THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.</p>
+
+<p>Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in these Data Files or Software without prior written authorization of the copyright holder.</p>
+
+            <hr width="80%">
+
+<p>Unicode and the Unicode logo are trademarks of Unicode, Inc. in the United States and other countries. All third party trademarks referenced herein are the property of their respective owners.</p>
+
+
+</blockquote>
+
+<h3>2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)</h3>
+<pre>
+ #    The Google Chrome software developed by Google is licensed under the BSD license. Other software included in this distribution is provided under other licenses, as set forth below.
+ #  
+ #  The BSD License
+ #  http://opensource.org/licenses/bsd-license.php 
+ #  Copyright (C) 2006-2008, Google Inc.
+ #  
+ #  All rights reserved.
+ #  
+ #  Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+ #  
+ #  Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+ #  Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+ #  Neither the name of  Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+ #   
+ #  
+ #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ #  
+ #                                               
+ #  The word list in cjdict.txt are generated by combining three word lists listed
+ #  below with further processing for compound word breaking. The frequency is generated
+ #  with an iterative training against Google web corpora. 
+ #  
+ #  * Libtabe (Chinese)
+ #    - https://sourceforge.net/project/?group_id=1519
+ #    - Its license terms and conditions are shown below.
+ #  
+ #  * IPADIC (Japanese)
+ #    - http://chasen.aist-nara.ac.jp/chasen/distribution.html
+ #    - Its license terms and conditions are shown below.
+ #  
+ #  ---------COPYING.libtabe ---- BEGIN--------------------
+ #  
+ #  /*
+ #   * Copyrighy (c) 1999 TaBE Project.
+ #   * Copyright (c) 1999 Pai-Hsiang Hsiao.
+ #   * All rights reserved.
+ #   *
+ #   * Redistribution and use in source and binary forms, with or without
+ #   * modification, are permitted provided that the following conditions
+ #   * are met:
+ #   *
+ #   * . Redistributions of source code must retain the above copyright
+ #   *   notice, this list of conditions and the following disclaimer.
+ #   * . Redistributions in binary form must reproduce the above copyright
+ #   *   notice, this list of conditions and the following disclaimer in
+ #   *   the documentation and/or other materials provided with the
+ #   *   distribution.
+ #   * . Neither the name of the TaBE Project nor the names of its
+ #   *   contributors may be used to endorse or promote products derived
+ #   *   from this software without specific prior written permission.
+ #   *
+ #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ #   * OF THE POSSIBILITY OF SUCH DAMAGE.
+ #   */
+ #  
+ #  /*
+ #   * Copyright (c) 1999 Computer Systems and Communication Lab,
+ #   *                    Institute of Information Science, Academia Sinica.
+ #   * All rights reserved.
+ #   *
+ #   * Redistribution and use in source and binary forms, with or without
+ #   * modification, are permitted provided that the following conditions
+ #   * are met:
+ #   *
+ #   * . Redistributions of source code must retain the above copyright
+ #   *   notice, this list of conditions and the following disclaimer.
+ #   * . Redistributions in binary form must reproduce the above copyright
+ #   *   notice, this list of conditions and the following disclaimer in
+ #   *   the documentation and/or other materials provided with the
+ #   *   distribution.
+ #   * . Neither the name of the Computer Systems and Communication Lab
+ #   *   nor the names of its contributors may be used to endorse or
+ #   *   promote products derived from this software without specific
+ #   *   prior written permission.
+ #   *
+ #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ #   * OF THE POSSIBILITY OF SUCH DAMAGE.
+ #   */
+ #  
+ #  Copyright 1996 Chih-Hao Tsai @ Beckman Institute, University of Illinois
+ #  c-tsai4@uiuc.edu  http://casper.beckman.uiuc.edu/~c-tsai4
+ #  
+ #  ---------------COPYING.libtabe-----END------------------------------------
+ #  
+ #  
+ #  ---------------COPYING.ipadic-----BEGIN------------------------------------
+ #  
+ #  Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
+ #  and Technology.  All Rights Reserved.
+ #  
+ #  Use, reproduction, and distribution of this software is permitted.
+ #  Any copy of this software, whether in its original form or modified,
+ #  must include both the above copyright notice and the following
+ #  paragraphs.
+ #  
+ #  Nara Institute of Science and Technology (NAIST),
+ #  the copyright holders, disclaims all warranties with regard to this
+ #  software, including all implied warranties of merchantability and
+ #  fitness, in no event shall NAIST be liable for
+ #  any special, indirect or consequential damages or any damages
+ #  whatsoever resulting from loss of use, data or profits, whether in an
+ #  action of contract, negligence or other tortuous action, arising out
+ #  of or in connection with the use or performance of this software.
+ #  
+ #  A large portion of the dictionary entries
+ #  originate from ICOT Free Software.  The following conditions for ICOT
+ #  Free Software applies to the current dictionary as well.
+ #  
+ #  Each User may also freely distribute the Program, whether in its
+ #  original form or modified, to any third party or parties, PROVIDED
+ #  that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
+ #  on, or be attached to, the Program, which is distributed substantially
+ #  in the same form as set out herein and that such intended
+ #  distribution, if actually made, will neither violate or otherwise
+ #  contravene any of the laws and regulations of the countries having
+ #  jurisdiction over the User or the intended distribution itself.
+ #  
+ #  NO WARRANTY
+ #  
+ #  The program was produced on an experimental basis in the course of the
+ #  research and development conducted during the project and is provided
+ #  to users as so produced on an experimental basis.  Accordingly, the
+ #  program is provided without any warranty whatsoever, whether express,
+ #  implied, statutory or otherwise.  The term "warranty" used herein
+ #  includes, but is not limited to, any warranty of the quality,
+ #  performance, merchantability and fitness for a particular purpose of
+ #  the program and the nonexistence of any infringement or violation of
+ #  any right of any third party.
+ #  
+ #  Each user of the program will agree and understand, and be deemed to
+ #  have agreed and understood, that there is no warranty whatsoever for
+ #  the program and, accordingly, the entire risk arising from or
+ #  otherwise connected with the program is assumed by the user.
+ #  
+ #  Therefore, neither ICOT, the copyright holder, or any other
+ #  organization that participated in or was otherwise related to the
+ #  development of the program and their respective officials, directors,
+ #  officers and other employees shall be held liable for any and all
+ #  damages, including, without limitation, general, special, incidental
+ #  and consequential damages, arising out of or otherwise in connection
+ #  with the use or inability to use the program or any product, material
+ #  or result produced or otherwise obtained by using the program,
+ #  regardless of whether they have been advised of, or otherwise had
+ #  knowledge of, the possibility of such damages at any time during the
+ #  project or thereafter.  Each user will be deemed to have agreed to the
+ #  foregoing by his or her commencement of use of the program.  The term
+ #  "use" as used herein includes, but is not limited to, the use,
+ #  modification, copying and distribution of the program and the
+ #  production of secondary products from the program.
+ #  
+ #  In the case where the program, whether in its original form or
+ #  modified, was distributed or delivered to or received by a user from
+ #  any person, organization or entity other than ICOT, unless it makes or
+ #  grants independently of ICOT any specific warranty to the user in
+ #  writing, such person, organization or entity, will also be exempted
+ #  from and not be held liable to the user for any such damages as noted
+ #  above as far as the program is concerned.
+ #  
+ #  ---------------COPYING.ipadic-----END------------------------------------
+</pre>
+
+<h3>3. Time Zone Database</h3>
+<p>ICU uses the public domain data and code derived from <a href="http://www.iana.org/time-zones">
+Time Zone Database</a> for its time zone support. The ownership of the TZ database is explained
+in <a href="http://tools.ietf.org/html/rfc6557">BCP 175: Procedure for Maintaining the Time Zone
+Database</a> section 7.<p>
+
+<pre>
+7.  Database Ownership
+
+   The TZ database itself is not an IETF Contribution or an IETF
+   document.  Rather it is a pre-existing and regularly updated work
+   that is in the public domain, and is intended to remain in the public
+   domain.  Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do not apply
+   to the TZ Database or contributions that individuals make to it.
+   Should any claims be made and substantiated against the TZ Database,
+   the organization that is providing the IANA Considerations defined in
+   this RFC, under the memorandum of understanding with the IETF,
+   currently ICANN, may act in accordance with all competent court
+   orders.  No ownership claims will be made by ICANN or the IETF Trust
+   on the database or the code.  Any person making a contribution to the
+   database or code waives all rights to future claims in that
+   contribution or in the TZ Database.
+
+</pre>
+
+
+</body>
+</html>
\ No newline at end of file
--- a/vendor/github.com/goodsign/icu/README.md
+++ b/vendor/github.com/goodsign/icu/README.md
+About
+==========
+
+Cgo binding for icu4c C library detection and conversion functions. Guaranteed compatibility with version 50.1.
+
+Installation
+==========
+
+Installation consists of several simple steps. They may be a bit different on your target system (e.g. require more permissions) so adapt them to the parameters of your system.
+
+### Install build-essential
+
+Make sure you have **build-essential** installed. Otherwise icu would fail on the configuration stage.
+
+Installation example using apt-get (Ubuntu):
+
+```
+sudo apt-get install build-essential
+```
+
+### Install pkg-config
+
+Make sure you have **pkg-config** installed.
+
+Installation example using apt-get (Ubuntu):
+
+```
+sudo apt-get install pkg-config
+```
+
+### Get icu4c C library code
+
+Download and unarchive original icu4c archive from [icu download section](http://site.icu-project.org/download).
+
+Example (for version 50.1):
+
+```
+wget http://download.icu-project.org/files/icu4c/50.1/icu4c-50_1-src.tgz
+tar -zxvf icu4c-50_1-src.tgz
+mv -i ./icu ~/where-you-store-libs
+```
+
+NOTE: If this link is not working or there are some problems with downloading, there is a stable version 50.1 snapshot saved in [Github Downloads](https://github.com/downloads/goodsign/icu/icu4c-50_1-src.tgz).
+
+### Build and install icu4c C library
+
+From the directory, where you unarchived icu4c, run:
+
+```
+cd source
+./configure
+make
+sudo make install
+sudo ldconfig
+```
+
+### Install Go wrapper
+
+```
+go get github.com/goodsign/icu
+go test github.com/goodsign/icu (must PASS)
+```
+
+Installation notes
+==========
+
+* Make sure that you have your local library paths set correctly and that installation was successful. Otherwise, **go build** or **go test** may fail.
+
+* icu4c is installed in your local library directory (e.g. **/usr/local/lib**) and puts its libraries there. This path should be registered in your system (using ldconfig or exporting LD_LIBRARY_PATH, etc.) or the linker would fail.
+
+* icu4c installs its header files to local include folders (e.g. **/usr/local/include/unicode**) so there is no need to have additional .h files with this package, but the system must be properly set up to detect .h files in those directories.
+
+Usage
+==========
+
+Note: check icu documentation for returned encoding identifiers.
+
+Detector
+----------
+
+```go
+// Create detector
+detector, err := NewCharsetDetector()
+    
+if err != nil {
+    //... Handle error ...
+}
+defer detector.Close()
+
+// Guess encoding
+encMatches, err := detector.GuessCharset(encodedText)
+
+if err != nil {
+    //... Handle error ...
+}
+
+// Get charset with max confidence (goes first)
+maxenc := encMatches[0].Charset
+
+// Use maxenc. 
+// ...
+```
+
+Converter
+----------
+
+```go
+...
+
+// Create converter
+converter := NewCharsetConverter(DefaultMaxTextSize)
+
+// Convert to utf-8
+converted, err := converter.ConvertToUtf8(encodedText, maxenc)
+
+if nil != err {
+    //... Handle error ...
+}
+```
+
+Usage notes
+==========
+
+* Check **NewCharsetConverter** func comments for details on max text size parameter.
+* Often you would use detector and converter in pair. So, the 'converter' usage example actually continues the 'detector' example and uses the 'maxenc' result from it.
+
+More info
+----------
+
+For more information on icu refer to the original [website](http://site.icu-project.org/), which contains links on theory and other details.
+
+icu4c Licence
+==========
+
+ICU is released under a nonrestrictive open source license that is suitable for use with both commercial software and with other open source or free software.
+
+[LICENCE file](https://github.com/goodsign/icu/blob/master/LICENCE_icu)
+
+Licence
+==========
+
+The goodsign/icu binding is released under the [BSD Licence](http://opensource.org/licenses/bsd-license.php)
+
+[LICENCE file](https://github.com/goodsign/icu/blob/master/LICENCE)
\ No newline at end of file
--- a/vendor/github.com/goodsign/icu/c_bridge.c
+++ b/vendor/github.com/goodsign/icu/c_bridge.c
+#include "c_bridge.h"
+#include <string.h>
+#include <unicode/utypes.h>
+#include <unicode/ucsdet.h>
+#include <stdlib.h>
+#include <unicode/ucnv.h>
+
+// See description in c_bridge.h
+const int detectCharset(void        *detector, 
+                        void        *input, 
+                        int         input_len, 
+                        int         *status, 
+                        MatchData   *matchBuffer, 
+                        int         matchBufferSize) {
+
+    // Put input bytes in the detector.
+    ucsdet_setText((UCharsetDetector*)detector, (char*)input, input_len, status);
+    if (*status != U_ZERO_ERROR) {
+        return 0;
+    }
+
+    // Prepare vars for returned count and guesses.
+    int matchCount;
+    const UCharsetMatch **bestGuesses;
+
+    // Perform analysis and return all guesses and their count.
+    bestGuesses = ucsdet_detectAll((UCharsetDetector*)detector, &matchCount, status);
+    if (*status != U_ZERO_ERROR) {
+        return 0;
+    }
+
+    // Fill the matchBuffer. Its size is matchBufferSize, so it is filled with
+    // less or equal to matchBufferSize number of entries.
+    int i;
+    int retCount = matchCount > matchBufferSize ? matchBufferSize : matchCount;
+
+    for (i = 0; i < retCount; i++) {
+
+        const UCharsetMatch* bestGuess = bestGuesses[i];
+        const char *bestGuessedCharset = NULL;
+        const char *bestGuessedLanguage = NULL;
+
+        // Fill guessed encoding
+        bestGuessedCharset = ucsdet_getName(bestGuess, status);
+        if (*status != U_ZERO_ERROR) {
+            return 0;
+        }
+
+        // Fill guessed language
+        bestGuessedLanguage = ucsdet_getLanguage(bestGuess, status);
+        if (*status != U_ZERO_ERROR) {
+            return 0;
+        }
+
+        // Fill its confidence rating
+        int32_t conf = ucsdet_getConfidence(bestGuess, status);
+        if (*status != U_ZERO_ERROR) {
+            return 0;
+        }
+
+        matchBuffer[i].confidence = conf;
+        matchBuffer[i].charset = bestGuessedCharset;
+        matchBuffer[i].language = bestGuessedLanguage;
+    }
+
+    // Return the number of guesses put into matchBuffer.
+    return retCount;
+}
+
+// See description in c_bridge.h
+int convertToUtf16(const char   *srcEncoding,
+                   UChar        *dest, 
+                   int32_t      destCapacity,
+                   const char   *src,
+                   int32_t      srcLength,
+                   int          *status){
+    UConverter *conv;
+
+    conv = ucnv_open(srcEncoding, status);
+    if (*status != U_ZERO_ERROR) {
+        return 0;
+    }
+
+    /* Convert from original encoding to UTF-16 */
+    int len = ucnv_toUChars(conv, dest, destCapacity, src, srcLength, status);
+    if (*status != U_ZERO_ERROR) {
+        return 0;
+    }
+
+    ucnv_close(conv);
+
+    return len;
+}
+
+// See description in c_bridge.h
+int convertFromUtf16(const char   *destEncoding,
+                     char         *dest, 
+                     int32_t      destCapacity,
+                     const UChar  *src,
+                     int32_t      srcLength,
+                     int          *status){
+    UConverter *conv;
+
+    conv = ucnv_open(destEncoding, status);
+    if (*status != U_ZERO_ERROR) {
+        return 0;
+    }
+
+    /* Convert from UTF-16 to destination encoding */
+    int len = ucnv_fromUChars(conv, dest, destCapacity, src, srcLength, status);
+    if (*status != U_ZERO_ERROR) {
+        return 0;
+    }
+
+    ucnv_close(conv);
+
+    return len;
+}
\ No newline at end of file
--- a/vendor/github.com/goodsign/icu/c_bridge.h
+++ b/vendor/github.com/goodsign/icu/c_bridge.h
+#ifndef __C_BRIDGE_H__
+#define __C_BRIDGE_H__
+
+// C_BRIDGE is a bridge between go and native pure c functions used to 
+// operate with ICU library code.
+
+#include <unicode/utypes.h>
+#include <unicode/ucsdet.h>
+
+// MatchData contains information about one 'guess' of the
+// encoding detector. It contains the guessed charset (ICU string identifiers,
+// see ICU documentation for them) and a confidence coefficient, which is a
+// number between 0 and 100 (100 is the best).
+typedef struct MatchData {
+  const char* charset;
+  const char* language;
+  short int confidence;
+} MatchData;
+
+// detectCharset performs the detection (guessing) operation using a given detector (ICU internals),
+// input data (bytes), input length and error status pointer (Read ICU docs abour error codes).
+//
+// After the detection is performed, all possible matches are put into the matchBuffer. If there are
+// more results than matchBufferSize, then only matchBufferSize entries are put (So no overflow can
+// ever happen).
+//
+// The results of this function are put into the matchBuffer, so it MUST NOT be called asynchronously.
+// Caller should guarantee thread safety and perform locks while working with it.
+const int detectCharset(void       *detector, 
+                        void       *input, 
+                        int        input_len, 
+                        int        *status, 
+                        MatchData  *matchBuffer, 
+                        int        matchBufferSize);
+
+// convertToUtf16 performs conversion from any encoding to utf16. Utf16 is the ICU standard so
+// it is easier to convert to/from it.
+// 
+// The results of this function are put into the dest buffer, so it MUST NOT be called asynchronously.
+// Caller should guarantee thread safety and perform locks while working with it.
+int convertToUtf16(const char   *srcEncoding,
+                   UChar        *dest, 
+                   int32_t      destCapacity,
+                   const char   *src,
+                   int32_t      srcLength,
+                   int          *status);
+
+// convertFromUtf16 performs conversion from utf16 to any other encoding. Utf16 is the ICU standard so
+// it is easier to convert to/from it.
+// 
+// The results of this function are put into the dest buffer, so it MUST NOT be called asynchronously.
+// Caller should guarantee thread safety and perform locks while working with it.
+int convertFromUtf16(const char   *destEncoding,
+                     char         *dest, 
+                     int32_t      destCapacity,
+                     const UChar  *src,
+                     int32_t      srcLength,
+                     int          *status);
+
+
+#endif //__C_BRIDGE_H__
\ No newline at end of file
--- a/vendor/github.com/goodsign/icu/convert.go
+++ b/vendor/github.com/goodsign/icu/convert.go
+package icu
+
+// #cgo pkg-config: icu-i18n
+// #include "c_bridge.h"
+// #include "stdlib.h"
+import "C"
+import (
+    "fmt"
+    "sync"
+    "unsafe"
+)
+
+const (
+    DefaultMaxTextSize = 1024 * 1024    // Default value for the max text length in conversion operations
+    utf8MaxCharSize = 4
+    utf16MaxCharSize = 4
+) 
+
+var (
+    Utf8CString = C.CString("UTF-8")
+)
+
+// CharsetConverter provides ICU charset conversion functionality.
+type CharsetConverter struct {
+    utf16Buffer   []byte
+    utf8Buffer    []byte
+    maxTextSize   int
+    cMutex        sync.Mutex // Mutex used to guarantee thread safety for ICU calls
+}
+
+// NewCharsetConverter creates a new charset converter. It doesn't need to be closed as
+// it doesn't allocate any resources.
+//
+// For better performance, conversion buffers are not allocated on each operation. Instead they
+// are created in memory once and then used. 'maxTextSize' sets the size of these buffers.
+// ICU library would return error if any processed text is longer than this parameter.
+//
+// NOTE: 
+//
+// UTF8 uses 1 to 4 bytes for each symbol.
+// UTF16 uses 2 bytes to 4 bytes for each symbol.
+//
+// So, to guarantee successful conversion of text with size = 'maxTextSize' we need:
+//     maxTextSize * 8 bytes    (utf8 buffer + utf16 buffer).
+func NewCharsetConverter(maxTextSize int) (*CharsetConverter) {
+    conv := new(CharsetConverter)
+
+    conv.utf16Buffer = make([]byte, utf16MaxCharSize * maxTextSize)
+    conv.utf8Buffer = make([]byte, utf8MaxCharSize * maxTextSize)
+
+    return conv
+}
+
+// ConvertToUtf8 converts input bytes encoded with srcEncoding to UTF-8.
+func (conv *CharsetConverter) ConvertToUtf8(input []byte, srcEncoding string) ([]byte, error) {
+    // As described in c_bridge.h, conversion operations are not thread safe and
+    // should be called consequently. So a mutex is used here.
+    conv.cMutex.Lock()
+    defer conv.cMutex.Unlock()
+
+    inputLen := len(input)
+    if inputLen == 0 {
+        return nil, fmt.Errorf("Nil length of input")
+    }
+
+    var status int
+
+    encCString := C.CString(srcEncoding)
+    inputCString := C.CString(string(input))
+
+    defer C.free(unsafe.Pointer(encCString))
+    defer C.free(unsafe.Pointer(inputCString))
+
+    convLen := C.convertToUtf16(
+            encCString,
+            (*C.UChar)(unsafe.Pointer(&conv.utf16Buffer[0])),
+            C.int32_t(len(conv.utf16Buffer)),
+            inputCString,
+            C.int32_t(len(input)),
+            (*C.int)(unsafe.Pointer(&status)))
+
+    if status == U_ZERO_ERROR {
+        nConvLen := C.convertFromUtf16(
+            Utf8CString,
+            (*C.char)(unsafe.Pointer(&conv.utf8Buffer[0])),
+            C.int32_t(len(conv.utf8Buffer)),
+            (*C.UChar)(unsafe.Pointer(&conv.utf16Buffer[0])),
+            C.int32_t(convLen),
+            (*C.int)(unsafe.Pointer(&status)))
+
+        if status == U_ZERO_ERROR {
+            resStr := conv.utf8Buffer[:nConvLen]
+            return ([]byte)(resStr), nil
+        }
+    }
+
+    return nil, fmt.Errorf("ICU Error code returned: %d", status)
+}
--- a/vendor/github.com/goodsign/icu/detect.go
+++ b/vendor/github.com/goodsign/icu/detect.go
+package icu
+
+// #cgo pkg-config: icu-i18n
+// #include "c_bridge.h"
+// #include "stdlib.h"
+import "C"
+import (
+    "fmt"
+    "sync"
+    "unsafe"
+)
+
+const (
+    U_ZERO_ERROR        = 0     // ICU common constant error code which means that no error occured
+    MatchDataBufferSize = 25    // Size of the buffer for detection results (Max count of returned guesses per detect call)
+) 
+
+// CharsetDetector provides ICU charset detection functionality.
+type CharsetDetector struct {
+    ptr         *C.UCharsetDetector // ICU struct needed for detection
+    resBuffer   [MatchDataBufferSize]C.MatchData
+    gMutex      sync.Mutex // Mutex used to guarantee thread safety for ICU calls
+}
+
+// An equivalent of MatchData C structure (see c_bridge.h)
+type Match struct {
+    Charset string
+    Language string
+    Confidence int
+}
+
+// Creates new charset detector. If it is successfully created, it
+// must be closed as it needs to free native ICU resources.
+func NewCharsetDetector() (*CharsetDetector, error) {
+    det := new(CharsetDetector)
+
+    var status int
+    statusPtr := unsafe.Pointer(&status)
+
+    det.ptr = C.ucsdet_open((*C.UErrorCode)(statusPtr))
+
+    if status != U_ZERO_ERROR {
+        return nil, fmt.Errorf("ICU Error code returned: %d", status)
+    }
+
+    return det, nil
+}
+
+func (det *CharsetDetector) GuessCharset(input []byte) (matches []Match, err error) {
+
+    // As described in c_bridge.h, detection operations are not thread safe and
+    // should be called consequently. So a mutex is used here.
+    det.gMutex.Lock()
+    defer det.gMutex.Unlock()
+
+    inputLen := len(input)
+    if inputLen == 0 {
+        return nil, fmt.Errorf("Input data len is 0")
+    }
+
+    var status int
+
+    // Perform detection. Guess count is the number of matches returned.
+    // The matches themself are put in the result buffer
+    guessCount := C.detectCharset(
+        unsafe.Pointer(det.ptr), 
+        unsafe.Pointer(&input[0]), 
+        C.int(inputLen), 
+        (*C.int)(unsafe.Pointer(&status)), 
+        (*C.MatchData)(unsafe.Pointer(&det.resBuffer[0])),
+        C.int(MatchDataBufferSize))
+
+    if status == U_ZERO_ERROR {
+        // Convert the returned number of entries from result buffer to a slice
+        // that will be returned
+        count := int(guessCount)
+        mt := make([]Match, count, count)
+
+        for i := 0; i < count; i++ {
+            mData := det.resBuffer[i]
+            charset := C.GoString(mData.charset)
+            language := C.GoString(mData.language)
+            mt[i] = Match{charset, language, int(mData.confidence)}
+        }
+
+        return mt, nil
+    }
+
+    return nil, fmt.Errorf("ICU Error code returned: %d", status)
+}
+
+// Close frees native C resources
+func (det *CharsetDetector) Close() {
+    det.gMutex.Lock()
+    defer det.gMutex.Unlock()
+
+    if det.ptr != nil {
+        C.ucsdet_close(det.ptr)
+    }
+}
--- a/vendor/github.com/saintfish/chardet/2022.go
+++ b/vendor/github.com/saintfish/chardet/2022.go
-package chardet
-
-import (
-	"bytes"
-)
-
-type recognizer2022 struct {
-	charset string
-	escapes [][]byte
-}
-
-func (r *recognizer2022) Match(input *recognizerInput) (output recognizerOutput) {
-	return recognizerOutput{
-		Charset:    r.charset,
-		Confidence: r.matchConfidence(input.input),
-	}
-}
-
-func (r *recognizer2022) matchConfidence(input []byte) int {
-	var hits, misses, shifts int
-input:
-	for i := 0; i < len(input); i++ {
-		c := input[i]
-		if c == 0x1B {
-			for _, esc := range r.escapes {
-				if bytes.HasPrefix(input[i+1:], esc) {
-					hits++
-					i += len(esc)
-					continue input
-				}
-			}
-			misses++
-		} else if c == 0x0E || c == 0x0F {
-			shifts++
-		}
-	}
-	if hits == 0 {
-		return 0
-	}
-	quality := (100*hits - 100*misses) / (hits + misses)
-	if hits+shifts < 5 {
-		quality -= (5 - (hits + shifts)) * 10
-	}
-	if quality < 0 {
-		quality = 0
-	}
-	return quality
-}
-
-var escapeSequences_2022JP = [][]byte{
-	{0x24, 0x28, 0x43}, // KS X 1001:1992
-	{0x24, 0x28, 0x44}, // JIS X 212-1990
-	{0x24, 0x40},       // JIS C 6226-1978
-	{0x24, 0x41},       // GB 2312-80
-	{0x24, 0x42},       // JIS X 208-1983
-	{0x26, 0x40},       // JIS X 208 1990, 1997
-	{0x28, 0x42},       // ASCII
-	{0x28, 0x48},       // JIS-Roman
-	{0x28, 0x49},       // Half-width katakana
-	{0x28, 0x4a},       // JIS-Roman
-	{0x2e, 0x41},       // ISO 8859-1
-	{0x2e, 0x46},       // ISO 8859-7
-}
-
-var escapeSequences_2022KR = [][]byte{
-	{0x24, 0x29, 0x43},
-}
-
-var escapeSequences_2022CN = [][]byte{
-	{0x24, 0x29, 0x41}, // GB 2312-80
-	{0x24, 0x29, 0x47}, // CNS 11643-1992 Plane 1
-	{0x24, 0x2A, 0x48}, // CNS 11643-1992 Plane 2
-	{0x24, 0x29, 0x45}, // ISO-IR-165
-	{0x24, 0x2B, 0x49}, // CNS 11643-1992 Plane 3
-	{0x24, 0x2B, 0x4A}, // CNS 11643-1992 Plane 4
-	{0x24, 0x2B, 0x4B}, // CNS 11643-1992 Plane 5
-	{0x24, 0x2B, 0x4C}, // CNS 11643-1992 Plane 6
-	{0x24, 0x2B, 0x4D}, // CNS 11643-1992 Plane 7
-	{0x4e},             // SS2
-	{0x4f},             // SS3
-}
-
-func newRecognizer_2022JP() *recognizer2022 {
-	return &recognizer2022{
-		"ISO-2022-JP",
-		escapeSequences_2022JP,
-	}
-}
-
-func newRecognizer_2022KR() *recognizer2022 {
-	return &recognizer2022{
-		"ISO-2022-KR",
-		escapeSequences_2022KR,
-	}
-}
-
-func newRecognizer_2022CN() *recognizer2022 {
-	return &recognizer2022{
-		"ISO-2022-CN",
-		escapeSequences_2022CN,
-	}
-}
--- a/vendor/github.com/saintfish/chardet/AUTHORS
+++ b/vendor/github.com/saintfish/chardet/AUTHORS
-Sheng Yu (yusheng dot sjtu at gmail dot com)
--- a/vendor/github.com/saintfish/chardet/LICENSE
+++ b/vendor/github.com/saintfish/chardet/LICENSE
-Copyright (c) 2012 chardet Authors
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-Partial of the Software is derived from ICU project. See icu-license.html for
-license of the derivative portions.
--- a/vendor/github.com/saintfish/chardet/README.md
+++ b/vendor/github.com/saintfish/chardet/README.md
-# chardet
-
-chardet is library to automatically detect
-[charset](http://en.wikipedia.org/wiki/Character_encoding) of texts for [Go
-programming language](http://golang.org/). It's based on the algorithm and data
-in [ICU](http://icu-project.org/)'s implementation.
-
-## Documentation and Usage
-
-See [pkgdoc](http://go.pkgdoc.org/github.com/saintfish/chardet)
--- a/vendor/github.com/saintfish/chardet/detector.go
+++ b/vendor/github.com/saintfish/chardet/detector.go
-// Package chardet ports character set detection from ICU.
-package chardet
-
-import (
-	"errors"
-	"sort"
-)
-
-// Result contains all the information that charset detector gives.
-type Result struct {
-	// IANA name of the detected charset.
-	Charset string
-	// IANA name of the detected language. It may be empty for some charsets.
-	Language string
-	// Confidence of the Result. Scale from 1 to 100. The bigger, the more confident.
-	Confidence int
-}
-
-// Detector implements charset detection.
-type Detector struct {
-	recognizers []recognizer
-	stripTag    bool
-}
-
-// List of charset recognizers
-var recognizers = []recognizer{
-	newRecognizer_utf8(),
-	newRecognizer_utf16be(),
-	newRecognizer_utf16le(),
-	newRecognizer_utf32be(),
-	newRecognizer_utf32le(),
-	newRecognizer_8859_1_en(),
-	newRecognizer_8859_1_da(),
-	newRecognizer_8859_1_de(),
-	newRecognizer_8859_1_es(),
-	newRecognizer_8859_1_fr(),
-	newRecognizer_8859_1_it(),
-	newRecognizer_8859_1_nl(),
-	newRecognizer_8859_1_no(),
-	newRecognizer_8859_1_pt(),
-	newRecognizer_8859_1_sv(),
-	newRecognizer_8859_2_cs(),
-	newRecognizer_8859_2_hu(),
-	newRecognizer_8859_2_pl(),
-	newRecognizer_8859_2_ro(),
-	newRecognizer_8859_5_ru(),
-	newRecognizer_8859_6_ar(),
-	newRecognizer_8859_7_el(),
-	newRecognizer_8859_8_I_he(),
-	newRecognizer_8859_8_he(),
-	newRecognizer_windows_1251(),
-	newRecognizer_windows_1256(),
-	newRecognizer_KOI8_R(),
-	newRecognizer_8859_9_tr(),
-
-	newRecognizer_sjis(),
-	newRecognizer_gb_18030(),
-	newRecognizer_euc_jp(),
-	newRecognizer_euc_kr(),
-	newRecognizer_big5(),
-
-	newRecognizer_2022JP(),
-	newRecognizer_2022KR(),
-	newRecognizer_2022CN(),
-
-	newRecognizer_IBM424_he_rtl(),
-	newRecognizer_IBM424_he_ltr(),
-	newRecognizer_IBM420_ar_rtl(),
-	newRecognizer_IBM420_ar_ltr(),
-}
-
-// NewTextDetector creates a Detector for plain text.
-func NewTextDetector() *Detector {
-	return &Detector{recognizers, false}
-}
-
-// NewHtmlDetector creates a Detector for Html.
-func NewHtmlDetector() *Detector {
-	return &Detector{recognizers, true}
-}
-
-var (
-	NotDetectedError = errors.New("Charset not detected.")
-)
-
-// DetectBest returns the Result with highest Confidence.
-func (d *Detector) DetectBest(b []byte) (r *Result, err error) {
-	var all []Result
-	if all, err = d.DetectAll(b); err == nil {
-		r = &all[0]
-	}
-	return
-}
-
-// DetectAll returns all Results which have non-zero Confidence. The Results are sorted by Confidence in descending order.
-func (d *Detector) DetectAll(b []byte) ([]Result, error) {
-	input := newRecognizerInput(b, d.stripTag)
-	outputChan := make(chan recognizerOutput)
-	for _, r := range d.recognizers {
-		go matchHelper(r, input, outputChan)
-	}
-	outputs := make([]recognizerOutput, 0, len(d.recognizers))
-	for i := 0; i < len(d.recognizers); i++ {
-		o := <-outputChan
-		if o.Confidence > 0 {
-			outputs = append(outputs, o)
-		}
-	}
-	if len(outputs) == 0 {
-		return nil, NotDetectedError
-	}
-
-	sort.Sort(recognizerOutputs(outputs))
-	dedupOutputs := make([]Result, 0, len(outputs))
-	foundCharsets := make(map[string]struct{}, len(outputs))
-	for _, o := range outputs {
-		if _, found := foundCharsets[o.Charset]; !found {
-			dedupOutputs = append(dedupOutputs, Result(o))
-			foundCharsets[o.Charset] = struct{}{}
-		}
-	}
-	if len(dedupOutputs) == 0 {
-		return nil, NotDetectedError
-	}
-	return dedupOutputs, nil
-}
-
-func matchHelper(r recognizer, input *recognizerInput, outputChan chan<- recognizerOutput) {
-	outputChan <- r.Match(input)
-}
-
-type recognizerOutputs []recognizerOutput
-
-func (r recognizerOutputs) Len() int           { return len(r) }
-func (r recognizerOutputs) Less(i, j int) bool { return r[i].Confidence > r[j].Confidence }
-func (r recognizerOutputs) Swap(i, j int)      { r[i], r[j] = r[j], r[i] }
--- a/vendor/github.com/saintfish/chardet/icu-license.html
+++ b/vendor/github.com/saintfish/chardet/icu-license.html
-<html>
-
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=us-ascii"></meta>
-<title>ICU License - ICU 1.8.1 and later</title>
-</head>
-
-<body BGCOLOR="#ffffff">
-<h2>ICU License - ICU 1.8.1 and later</h2>
-
-<p>COPYRIGHT AND PERMISSION NOTICE</p>
-
-<p>
-Copyright (c) 1995-2012 International Business Machines Corporation and others
-</p>
-<p>
-All rights reserved.
-</p>
-<p>
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-the rights to use, copy, modify, merge, publish, distribute, and/or sell
-copies of the Software, and to permit persons
-to whom the Software is furnished to do so, provided that the above
-copyright notice(s) and this permission notice appear in all copies
-of the Software and that both the above copyright notice(s) and this
-permission notice appear in supporting documentation.
-</p>
-<p>
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
-INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
-PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL
-THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM,
-OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
-RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
-NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
-USE OR PERFORMANCE OF THIS SOFTWARE.
-</p>
-<p>
-Except as contained in this notice, the name of a copyright holder shall not be
-used in advertising or otherwise to promote the sale, use or other dealings in
-this Software without prior written authorization of the copyright holder.
-</p>
-
-<hr>
-<p><small>
-All trademarks and registered trademarks mentioned herein are the property of their respective owners.
-</small></p>
-</body>
-</html>
--- a/vendor/github.com/saintfish/chardet/multi_byte.go
+++ b/vendor/github.com/saintfish/chardet/multi_byte.go
-package chardet
-
-import (
-	"errors"
-	"math"
-)
-
-type recognizerMultiByte struct {
-	charset     string
-	language    string
-	decoder     charDecoder
-	commonChars []uint16
-}
-
-type charDecoder interface {
-	DecodeOneChar([]byte) (c uint16, remain []byte, err error)
-}
-
-func (r *recognizerMultiByte) Match(input *recognizerInput) (output recognizerOutput) {
-	return recognizerOutput{
-		Charset:    r.charset,
-		Language:   r.language,
-		Confidence: r.matchConfidence(input),
-	}
-}
-
-func (r *recognizerMultiByte) matchConfidence(input *recognizerInput) int {
-	raw := input.raw
-	var c uint16
-	var err error
-	var totalCharCount, badCharCount, singleByteCharCount, doubleByteCharCount, commonCharCount int
-	for c, raw, err = r.decoder.DecodeOneChar(raw); len(raw) > 0; c, raw, err = r.decoder.DecodeOneChar(raw) {
-		totalCharCount++
-		if err != nil {
-			badCharCount++
-		} else if c <= 0xFF {
-			singleByteCharCount++
-		} else {
-			doubleByteCharCount++
-			if r.commonChars != nil && binarySearch(r.commonChars, c) {
-				commonCharCount++
-			}
-		}
-		if badCharCount >= 2 && badCharCount*5 >= doubleByteCharCount {
-			return 0
-		}
-	}
-
-	if doubleByteCharCount <= 10 && badCharCount == 0 {
-		if doubleByteCharCount == 0 && totalCharCount < 10 {
-			return 0
-		} else {
-			return 10
-		}
-	}
-
-	if doubleByteCharCount < 20*badCharCount {
-		return 0
-	}
-	if r.commonChars == nil {
-		confidence := 30 + doubleByteCharCount - 20*badCharCount
-		if confidence > 100 {
-			confidence = 100
-		}
-		return confidence
-	}
-	maxVal := math.Log(float64(doubleByteCharCount) / 4)
-	scaleFactor := 90 / maxVal
-	confidence := int(math.Log(float64(commonCharCount)+1)*scaleFactor + 10)
-	if confidence > 100 {
-		confidence = 100
-	}
-	if confidence < 0 {
-		confidence = 0
-	}
-	return confidence
-}
-
-func binarySearch(l []uint16, c uint16) bool {
-	start := 0
-	end := len(l) - 1
-	for start <= end {
-		mid := (start + end) / 2
-		if c == l[mid] {
-			return true
-		} else if c < l[mid] {
-			end = mid - 1
-		} else {
-			start = mid + 1
-		}
-	}
-	return false
-}
-
-var eobError = errors.New("End of input buffer")
-var badCharError = errors.New("Decode a bad char")
-
-type charDecoder_sjis struct {
-}
-
-func (charDecoder_sjis) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) {
-	if len(input) == 0 {
-		return 0, nil, eobError
-	}
-	first := input[0]
-	c = uint16(first)
-	remain = input[1:]
-	if first <= 0x7F || (first > 0xA0 && first <= 0xDF) {
-		return
-	}
-	if len(remain) == 0 {
-		return c, remain, badCharError
-	}
-	second := remain[0]
-	remain = remain[1:]
-	c = c<<8 | uint16(second)
-	if (second >= 0x40 && second <= 0x7F) || (second >= 0x80 && second <= 0xFE) {
-	} else {
-		err = badCharError
-	}
-	return
-}
-
-var commonChars_sjis = []uint16{
-	0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0,
-	0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5,
-	0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc,
-	0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341,
-	0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389,
-	0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa,
-}
-
-func newRecognizer_sjis() *recognizerMultiByte {
-	return &recognizerMultiByte{
-		"Shift_JIS",
-		"ja",
-		charDecoder_sjis{},
-		commonChars_sjis,
-	}
-}
-
-type charDecoder_euc struct {
-}
-
-func (charDecoder_euc) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) {
-	if len(input) == 0 {
-		return 0, nil, eobError
-	}
-	first := input[0]
-	remain = input[1:]
-	c = uint16(first)
-	if first <= 0x8D {
-		return uint16(first), remain, nil
-	}
-	if len(remain) == 0 {
-		return 0, nil, eobError
-	}
-	second := remain[0]
-	remain = remain[1:]
-	c = c<<8 | uint16(second)
-	if first >= 0xA1 && first <= 0xFE {
-		if second < 0xA1 {
-			err = badCharError
-		}
-		return
-	}
-	if first == 0x8E {
-		if second < 0xA1 {
-			err = badCharError
-		}
-		return
-	}
-	if first == 0x8F {
-		if len(remain) == 0 {
-			return 0, nil, eobError
-		}
-		third := remain[0]
-		remain = remain[1:]
-		c = c<<0 | uint16(third)
-		if third < 0xa1 {
-			err = badCharError
-		}
-	}
-	return
-}
-
-var commonChars_euc_jp = []uint16{
-	0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2,
-	0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3,
-	0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4,
-	0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de,
-	0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef,
-	0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af,
-	0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
-	0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
-	0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
-	0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1,
-}
-
-var commonChars_euc_kr = []uint16{
-	0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc,
-	0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9,
-	0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce,
-	0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce,
-	0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba,
-	0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee,
-	0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7,
-	0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6,
-	0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6,
-	0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad,
-}
-
-func newRecognizer_euc_jp() *recognizerMultiByte {
-	return &recognizerMultiByte{
-		"EUC-JP",
-		"ja",
-		charDecoder_euc{},
-		commonChars_euc_jp,
-	}
-}
-
-func newRecognizer_euc_kr() *recognizerMultiByte {
-	return &recognizerMultiByte{
-		"EUC-KR",
-		"ko",
-		charDecoder_euc{},
-		commonChars_euc_kr,
-	}
-}
-
-type charDecoder_big5 struct {
-}
-
-func (charDecoder_big5) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) {
-	if len(input) == 0 {
-		return 0, nil, eobError
-	}
-	first := input[0]
-	remain = input[1:]
-	c = uint16(first)
-	if first <= 0x7F || first == 0xFF {
-		return
-	}
-	if len(remain) == 0 {
-		return c, nil, eobError
-	}
-	second := remain[0]
-	remain = remain[1:]
-	c = c<<8 | uint16(second)
-	if second < 0x40 || second == 0x7F || second == 0xFF {
-		err = badCharError
-	}
-	return
-}
-
-var commonChars_big5 = []uint16{
-	0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446,
-	0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3,
-	0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548,
-	0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8,
-	0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da,
-	0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3,
-	0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59,
-	0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c,
-	0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44,
-	0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f,
-}
-
-func newRecognizer_big5() *recognizerMultiByte {
-	return &recognizerMultiByte{
-		"Big5",
-		"zh",
-		charDecoder_big5{},
-		commonChars_big5,
-	}
-}
-
-type charDecoder_gb_18030 struct {
-}
-
-func (charDecoder_gb_18030) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) {
-	if len(input) == 0 {
-		return 0, nil, eobError
-	}
-	first := input[0]
-	remain = input[1:]
-	c = uint16(first)
-	if first <= 0x80 {
-		return
-	}
-	if len(remain) == 0 {
-		return 0, nil, eobError
-	}
-	second := remain[0]
-	remain = remain[1:]
-	c = c<<8 | uint16(second)
-	if first >= 0x81 && first <= 0xFE {
-		if (second >= 0x40 && second <= 0x7E) || (second >= 0x80 && second <= 0xFE) {
-			return
-		}
-
-		if second >= 0x30 && second <= 0x39 {
-			if len(remain) == 0 {
-				return 0, nil, eobError
-			}
-			third := remain[0]
-			remain = remain[1:]
-			if third >= 0x81 && third <= 0xFE {
-				if len(remain) == 0 {
-					return 0, nil, eobError
-				}
-				fourth := remain[0]
-				remain = remain[1:]
-				if fourth >= 0x30 && fourth <= 0x39 {
-					c = c<<16 | uint16(third)<<8 | uint16(fourth)
-					return
-				}
-			}
-		}
-		err = badCharError
-	}
-	return
-}
-
-var commonChars_gb_18030 = []uint16{
-	0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac,
-	0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4,
-	0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4,
-	0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6,
-	0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6,
-	0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7,
-	0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7,
-	0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5,
-	0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2,
-	0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0,
-}
-
-func newRecognizer_gb_18030() *recognizerMultiByte {
-	return &recognizerMultiByte{
-		"GB-18030",
-		"zh",
-		charDecoder_gb_18030{},
-		commonChars_gb_18030,
-	}
-}
--- a/vendor/github.com/saintfish/chardet/recognizer.go
+++ b/vendor/github.com/saintfish/chardet/recognizer.go
-package chardet
-
-type recognizer interface {
-	Match(*recognizerInput) recognizerOutput
-}
-
-type recognizerOutput Result
-
-type recognizerInput struct {
-	raw         []byte
-	input       []byte
-	tagStripped bool
-	byteStats   []int
-	hasC1Bytes  bool
-}
-
-func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput {
-	input, stripped := mayStripInput(raw, stripTag)
-	byteStats := computeByteStats(input)
-	return &recognizerInput{
-		raw:         raw,
-		input:       input,
-		tagStripped: stripped,
-		byteStats:   byteStats,
-		hasC1Bytes:  computeHasC1Bytes(byteStats),
-	}
-}
-
-func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) {
-	const inputBufferSize = 8192
-	out = make([]byte, 0, inputBufferSize)
-	var badTags, openTags int32
-	var inMarkup bool = false
-	stripped = false
-	if stripTag {
-		stripped = true
-		for _, c := range raw {
-			if c == '<' {
-				if inMarkup {
-					badTags += 1
-				}
-				inMarkup = true
-				openTags += 1
-			}
-			if !inMarkup {
-				out = append(out, c)
-				if len(out) >= inputBufferSize {
-					break
-				}
-			}
-			if c == '>' {
-				inMarkup = false
-			}
-		}
-	}
-	if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) {
-		limit := len(raw)
-		if limit > inputBufferSize {
-			limit = inputBufferSize
-		}
-		out = make([]byte, limit)
-		copy(out, raw[:limit])
-		stripped = false
-	}
-	return
-}
-
-func computeByteStats(input []byte) []int {
-	r := make([]int, 256)
-	for _, c := range input {
-		r[c] += 1
-	}
-	return r
-}
-
-func computeHasC1Bytes(byteStats []int) bool {
-	for _, count := range byteStats[0x80 : 0x9F+1] {
-		if count > 0 {
-			return true
-		}
-	}
-	return false
-}
--- a/vendor/github.com/saintfish/chardet/single_byte.go
+++ b/vendor/github.com/saintfish/chardet/single_byte.go
--- a/vendor/github.com/saintfish/chardet/unicode.go
+++ b/vendor/github.com/saintfish/chardet/unicode.go
-package chardet
-
-import (
-	"bytes"
-)
-
-var (
-	utf16beBom = []byte{0xFE, 0xFF}
-	utf16leBom = []byte{0xFF, 0xFE}
-	utf32beBom = []byte{0x00, 0x00, 0xFE, 0xFF}
-	utf32leBom = []byte{0xFF, 0xFE, 0x00, 0x00}
-)
-
-type recognizerUtf16be struct {
-}
-
-func newRecognizer_utf16be() *recognizerUtf16be {
-	return &recognizerUtf16be{}
-}
-
-func (*recognizerUtf16be) Match(input *recognizerInput) (output recognizerOutput) {
-	output = recognizerOutput{
-		Charset: "UTF-16BE",
-	}
-	if bytes.HasPrefix(input.raw, utf16beBom) {
-		output.Confidence = 100
-	}
-	return
-}
-
-type recognizerUtf16le struct {
-}
-
-func newRecognizer_utf16le() *recognizerUtf16le {
-	return &recognizerUtf16le{}
-}
-
-func (*recognizerUtf16le) Match(input *recognizerInput) (output recognizerOutput) {
-	output = recognizerOutput{
-		Charset: "UTF-16LE",
-	}
-	if bytes.HasPrefix(input.raw, utf16leBom) && !bytes.HasPrefix(input.raw, utf32leBom) {
-		output.Confidence = 100
-	}
-	return
-}
-
-type recognizerUtf32 struct {
-	name       string
-	bom        []byte
-	decodeChar func(input []byte) uint32
-}
-
-func decodeUtf32be(input []byte) uint32 {
-	return uint32(input[0])<<24 | uint32(input[1])<<16 | uint32(input[2])<<8 | uint32(input[3])
-}
-
-func decodeUtf32le(input []byte) uint32 {
-	return uint32(input[3])<<24 | uint32(input[2])<<16 | uint32(input[1])<<8 | uint32(input[0])
-}
-
-func newRecognizer_utf32be() *recognizerUtf32 {
-	return &recognizerUtf32{
-		"UTF-32BE",
-		utf32beBom,
-		decodeUtf32be,
-	}
-}
-
-func newRecognizer_utf32le() *recognizerUtf32 {
-	return &recognizerUtf32{
-		"UTF-32LE",
-		utf32leBom,
-		decodeUtf32le,
-	}
-}
-
-func (r *recognizerUtf32) Match(input *recognizerInput) (output recognizerOutput) {
-	output = recognizerOutput{
-		Charset: r.name,
-	}
-	hasBom := bytes.HasPrefix(input.raw, r.bom)
-	var numValid, numInvalid uint32
-	for b := input.raw; len(b) >= 4; b = b[4:] {
-		if c := r.decodeChar(b); c >= 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF) {
-			numInvalid++
-		} else {
-			numValid++
-		}
-	}
-	if hasBom && numInvalid == 0 {
-		output.Confidence = 100
-	} else if hasBom && numValid > numInvalid*10 {
-		output.Confidence = 80
-	} else if numValid > 3 && numInvalid == 0 {
-		output.Confidence = 100
-	} else if numValid > 0 && numInvalid == 0 {
-		output.Confidence = 80
-	} else if numValid > numInvalid*10 {
-		output.Confidence = 25
-	}
-	return
-}
--- a/vendor/github.com/saintfish/chardet/utf8.go
+++ b/vendor/github.com/saintfish/chardet/utf8.go
-package chardet
-
-import (
-	"bytes"
-)
-
-var utf8Bom = []byte{0xEF, 0xBB, 0xBF}
-
-type recognizerUtf8 struct {
-}
-
-func newRecognizer_utf8() *recognizerUtf8 {
-	return &recognizerUtf8{}
-}
-
-func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) {
-	output = recognizerOutput{
-		Charset: "UTF-8",
-	}
-	hasBom := bytes.HasPrefix(input.raw, utf8Bom)
-	inputLen := len(input.raw)
-	var numValid, numInvalid uint32
-	var trailBytes uint8
-	for i := 0; i < inputLen; i++ {
-		c := input.raw[i]
-		if c&0x80 == 0 {
-			continue
-		}
-		if c&0xE0 == 0xC0 {
-			trailBytes = 1
-		} else if c&0xF0 == 0xE0 {
-			trailBytes = 2
-		} else if c&0xF8 == 0xF0 {
-			trailBytes = 3
-		} else {
-			numInvalid++
-			if numInvalid > 5 {
-				break
-			}
-			trailBytes = 0
-		}
-
-		for i++; i < inputLen; i++ {
-			c = input.raw[i]
-			if c&0xC0 != 0x80 {
-				numInvalid++
-				break
-			}
-			if trailBytes--; trailBytes == 0 {
-				numValid++
-				break
-			}
-		}
-	}
-
-	if hasBom && numInvalid == 0 {
-		output.Confidence = 100
-	} else if hasBom && numValid > numInvalid*10 {
-		output.Confidence = 80
-	} else if numValid > 3 && numInvalid == 0 {
-		output.Confidence = 100
-	} else if numValid > 0 && numInvalid == 0 {
-		output.Confidence = 80
-	} else if numValid == 0 && numInvalid == 0 {
-		// Plain ASCII
-		output.Confidence = 10
-	} else if numValid > numInvalid*10 {
-		output.Confidence = 25
-	}
-	return
-}