Initial encode-to-UTF8 support

f04f4f3e · Nick Thomas · 9a572119 · f04f4f3e · f04f4f3e · f04f4f3e
Commit f04f4f3e authored 7 years ago by Nick Thomas
--- a/indexer/blob.go
+++ b/indexer/blob.go
@@ -84,7 +84,8 @@ func (i *Indexer) BuildBlob(file *object.File, commitSHA string) (*Blob, error)
 		return nil, skipBinaryBlob
 	}
  
-	content := string(b)
+	content := tryEncodeBytes(b)
+	filename := tryEncodeString(file.Name)
  
 	return &Blob{
 		Type:      "blob",
@@ -93,9 +94,9 @@ func (i *Indexer) BuildBlob(file *object.File, commitSHA string) (*Blob, error)
 		RepoID:    parentID,
 		CommitSHA: commitSHA,
 		Content:   content,
-		Path:      file.Name,
-		Filename:  file.Name,
-		Language:  DetectLanguage(file.Name, b),
+		Path:      filename,
+		Filename:  filename,
+		Language:  DetectLanguage(filename, b),
 	}, nil
 }
  

--- a/indexer/commit.go
+++ b/indexer/commit.go
@@ -30,7 +30,7 @@ func (i *Indexer) BuildCommit(c *object.Commit) *Commit {
 		Committer: BuildPerson(c.Committer),
 		ID:        GenerateCommitID(parentID, sha),
 		RepoID:    parentID,
-		Message:   c.Message,
+		Message:   tryEncodeString(c.Message),
 		SHA:       sha,
 	}
 }
--- a/indexer/encoding.go
+++ b/indexer/encoding.go
+package indexer
+
+import (
+	"fmt"
+
+	"github.com/saintfish/chardet"
+	"golang.org/x/text/encoding/ianaindex"
+)
+
+var detector = chardet.NewTextDetector()
+
+func tryEncodeString(s string) string {
+	encoded, err := encodeString(s)
+	if err != nil {
+		return s // TODO: Run it through the UTF-8 replacement encoder
+	}
+
+	return encoded
+}
+
+func tryEncodeBytes(b []byte) string {
+	encoded, err := encodeBytes(b)
+	if err != nil {
+		s := string(b)
+		return s // TODO: Run it through the UTF-8 replacement encoder
+	}
+
+	return encoded
+}
+
+func encodeString(s string) (string, error) {
+	return encodeBytes([]byte(s))
+}
+
+// encodeString converts a string from an arbitrary encoding to UTF-8
+func encodeBytes(b []byte) (string, error) {
+	best, err := detector.DetectBest(b)
+	if err != nil {
+		return "", err
+	}
+
+	charset := best.Charset
+
+	// chardet has some incompatibilities with ianaindex
+	switch charset {
+	case "UTF-8":
+		return string(b), nil
+	case "GB-18030":
+		charset = "GB18030"
+	case "IBM420_ltr", "IBM420_rtl":
+		charset = "cp420"
+	case "IBM424_ltr", "IBM424_rtl":
+		charset = "IBM424"
+	}
+
+	encoding, err := ianaindex.IANA.Encoding(charset)
+	if err != nil {
+		return "", fmt.Errorf("Encoding %q: %v", charset, err)
+	}
+
+	// TODO(nick): Does this actually mean 'nothing to do'?
+	if encoding == nil {
+		return string(b), nil
+	}
+
+	decoded, err := encoding.NewDecoder().Bytes(b)
+	if err != nil {
+		return "", err
+	}
+
+	return string(decoded), nil
+}
--- a/indexer/person.go
+++ b/indexer/person.go
@@ -22,8 +22,8 @@ func GenerateDate(t time.Time) string {
  
 func BuildPerson(p object.Signature) *Person {
 	return &Person{
-		Name:  p.Name,
-		Email: p.Email,
+		Name:  tryEncodeString(p.Name),
+		Email: tryEncodeString(p.Email),
 		Time:  GenerateDate(p.When),
 	}
 }
--- a/vendor/github.com/saintfish/chardet/2022.go
+++ b/vendor/github.com/saintfish/chardet/2022.go
+package chardet
+
+import (
+	"bytes"
+)
+
+type recognizer2022 struct {
+	charset string
+	escapes [][]byte
+}
+
+func (r *recognizer2022) Match(input *recognizerInput) (output recognizerOutput) {
+	return recognizerOutput{
+		Charset:    r.charset,
+		Confidence: r.matchConfidence(input.input),
+	}
+}
+
+func (r *recognizer2022) matchConfidence(input []byte) int {
+	var hits, misses, shifts int
+input:
+	for i := 0; i < len(input); i++ {
+		c := input[i]
+		if c == 0x1B {
+			for _, esc := range r.escapes {
+				if bytes.HasPrefix(input[i+1:], esc) {
+					hits++
+					i += len(esc)
+					continue input
+				}
+			}
+			misses++
+		} else if c == 0x0E || c == 0x0F {
+			shifts++
+		}
+	}
+	if hits == 0 {
+		return 0
+	}
+	quality := (100*hits - 100*misses) / (hits + misses)
+	if hits+shifts < 5 {
+		quality -= (5 - (hits + shifts)) * 10
+	}
+	if quality < 0 {
+		quality = 0
+	}
+	return quality
+}
+
+var escapeSequences_2022JP = [][]byte{
+	{0x24, 0x28, 0x43}, // KS X 1001:1992
+	{0x24, 0x28, 0x44}, // JIS X 212-1990
+	{0x24, 0x40},       // JIS C 6226-1978
+	{0x24, 0x41},       // GB 2312-80
+	{0x24, 0x42},       // JIS X 208-1983
+	{0x26, 0x40},       // JIS X 208 1990, 1997
+	{0x28, 0x42},       // ASCII
+	{0x28, 0x48},       // JIS-Roman
+	{0x28, 0x49},       // Half-width katakana
+	{0x28, 0x4a},       // JIS-Roman
+	{0x2e, 0x41},       // ISO 8859-1
+	{0x2e, 0x46},       // ISO 8859-7
+}
+
+var escapeSequences_2022KR = [][]byte{
+	{0x24, 0x29, 0x43},
+}
+
+var escapeSequences_2022CN = [][]byte{
+	{0x24, 0x29, 0x41}, // GB 2312-80
+	{0x24, 0x29, 0x47}, // CNS 11643-1992 Plane 1
+	{0x24, 0x2A, 0x48}, // CNS 11643-1992 Plane 2
+	{0x24, 0x29, 0x45}, // ISO-IR-165
+	{0x24, 0x2B, 0x49}, // CNS 11643-1992 Plane 3
+	{0x24, 0x2B, 0x4A}, // CNS 11643-1992 Plane 4
+	{0x24, 0x2B, 0x4B}, // CNS 11643-1992 Plane 5
+	{0x24, 0x2B, 0x4C}, // CNS 11643-1992 Plane 6
+	{0x24, 0x2B, 0x4D}, // CNS 11643-1992 Plane 7
+	{0x4e},             // SS2
+	{0x4f},             // SS3
+}
+
+func newRecognizer_2022JP() *recognizer2022 {
+	return &recognizer2022{
+		"ISO-2022-JP",
+		escapeSequences_2022JP,
+	}
+}
+
+func newRecognizer_2022KR() *recognizer2022 {
+	return &recognizer2022{
+		"ISO-2022-KR",
+		escapeSequences_2022KR,
+	}
+}
+
+func newRecognizer_2022CN() *recognizer2022 {
+	return &recognizer2022{
+		"ISO-2022-CN",
+		escapeSequences_2022CN,
+	}
+}
--- a/vendor/github.com/saintfish/chardet/AUTHORS
+++ b/vendor/github.com/saintfish/chardet/AUTHORS
+Sheng Yu (yusheng dot sjtu at gmail dot com)
--- a/vendor/github.com/saintfish/chardet/LICENSE
+++ b/vendor/github.com/saintfish/chardet/LICENSE
+Copyright (c) 2012 chardet Authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+Partial of the Software is derived from ICU project. See icu-license.html for
+license of the derivative portions.
--- a/vendor/github.com/saintfish/chardet/README.md
+++ b/vendor/github.com/saintfish/chardet/README.md
+# chardet
+
+chardet is library to automatically detect
+[charset](http://en.wikipedia.org/wiki/Character_encoding) of texts for [Go
+programming language](http://golang.org/). It's based on the algorithm and data
+in [ICU](http://icu-project.org/)'s implementation.
+
+## Documentation and Usage
+
+See [pkgdoc](http://go.pkgdoc.org/github.com/saintfish/chardet)
--- a/vendor/github.com/saintfish/chardet/detector.go
+++ b/vendor/github.com/saintfish/chardet/detector.go
+// Package chardet ports character set detection from ICU.
+package chardet
+
+import (
+	"errors"
+	"sort"
+)
+
+// Result contains all the information that charset detector gives.
+type Result struct {
+	// IANA name of the detected charset.
+	Charset string
+	// IANA name of the detected language. It may be empty for some charsets.
+	Language string
+	// Confidence of the Result. Scale from 1 to 100. The bigger, the more confident.
+	Confidence int
+}
+
+// Detector implements charset detection.
+type Detector struct {
+	recognizers []recognizer
+	stripTag    bool
+}
+
+// List of charset recognizers
+var recognizers = []recognizer{
+	newRecognizer_utf8(),
+	newRecognizer_utf16be(),
+	newRecognizer_utf16le(),
+	newRecognizer_utf32be(),
+	newRecognizer_utf32le(),
+	newRecognizer_8859_1_en(),
+	newRecognizer_8859_1_da(),
+	newRecognizer_8859_1_de(),
+	newRecognizer_8859_1_es(),
+	newRecognizer_8859_1_fr(),
+	newRecognizer_8859_1_it(),
+	newRecognizer_8859_1_nl(),
+	newRecognizer_8859_1_no(),
+	newRecognizer_8859_1_pt(),
+	newRecognizer_8859_1_sv(),
+	newRecognizer_8859_2_cs(),
+	newRecognizer_8859_2_hu(),
+	newRecognizer_8859_2_pl(),
+	newRecognizer_8859_2_ro(),
+	newRecognizer_8859_5_ru(),
+	newRecognizer_8859_6_ar(),
+	newRecognizer_8859_7_el(),
+	newRecognizer_8859_8_I_he(),
+	newRecognizer_8859_8_he(),
+	newRecognizer_windows_1251(),
+	newRecognizer_windows_1256(),
+	newRecognizer_KOI8_R(),
+	newRecognizer_8859_9_tr(),
+
+	newRecognizer_sjis(),
+	newRecognizer_gb_18030(),
+	newRecognizer_euc_jp(),
+	newRecognizer_euc_kr(),
+	newRecognizer_big5(),
+
+	newRecognizer_2022JP(),
+	newRecognizer_2022KR(),
+	newRecognizer_2022CN(),
+
+	newRecognizer_IBM424_he_rtl(),
+	newRecognizer_IBM424_he_ltr(),
+	newRecognizer_IBM420_ar_rtl(),
+	newRecognizer_IBM420_ar_ltr(),
+}
+
+// NewTextDetector creates a Detector for plain text.
+func NewTextDetector() *Detector {
+	return &Detector{recognizers, false}
+}
+
+// NewHtmlDetector creates a Detector for Html.
+func NewHtmlDetector() *Detector {
+	return &Detector{recognizers, true}
+}
+
+var (
+	NotDetectedError = errors.New("Charset not detected.")
+)
+
+// DetectBest returns the Result with highest Confidence.
+func (d *Detector) DetectBest(b []byte) (r *Result, err error) {
+	var all []Result
+	if all, err = d.DetectAll(b); err == nil {
+		r = &all[0]
+	}
+	return
+}
+
+// DetectAll returns all Results which have non-zero Confidence. The Results are sorted by Confidence in descending order.
+func (d *Detector) DetectAll(b []byte) ([]Result, error) {
+	input := newRecognizerInput(b, d.stripTag)
+	outputChan := make(chan recognizerOutput)
+	for _, r := range d.recognizers {
+		go matchHelper(r, input, outputChan)
+	}
+	outputs := make([]recognizerOutput, 0, len(d.recognizers))
+	for i := 0; i < len(d.recognizers); i++ {
+		o := <-outputChan
+		if o.Confidence > 0 {
+			outputs = append(outputs, o)
+		}
+	}
+	if len(outputs) == 0 {
+		return nil, NotDetectedError
+	}
+
+	sort.Sort(recognizerOutputs(outputs))
+	dedupOutputs := make([]Result, 0, len(outputs))
+	foundCharsets := make(map[string]struct{}, len(outputs))
+	for _, o := range outputs {
+		if _, found := foundCharsets[o.Charset]; !found {
+			dedupOutputs = append(dedupOutputs, Result(o))
+			foundCharsets[o.Charset] = struct{}{}
+		}
+	}
+	if len(dedupOutputs) == 0 {
+		return nil, NotDetectedError
+	}
+	return dedupOutputs, nil
+}
+
+func matchHelper(r recognizer, input *recognizerInput, outputChan chan<- recognizerOutput) {
+	outputChan <- r.Match(input)
+}
+
+type recognizerOutputs []recognizerOutput
+
+func (r recognizerOutputs) Len() int           { return len(r) }
+func (r recognizerOutputs) Less(i, j int) bool { return r[i].Confidence > r[j].Confidence }
+func (r recognizerOutputs) Swap(i, j int)      { r[i], r[j] = r[j], r[i] }
--- a/vendor/github.com/saintfish/chardet/icu-license.html
+++ b/vendor/github.com/saintfish/chardet/icu-license.html
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=us-ascii"></meta>
+<title>ICU License - ICU 1.8.1 and later</title>
+</head>
+
+<body BGCOLOR="#ffffff">
+<h2>ICU License - ICU 1.8.1 and later</h2>
+
+<p>COPYRIGHT AND PERMISSION NOTICE</p>
+
+<p>
+Copyright (c) 1995-2012 International Business Machines Corporation and others
+</p>
+<p>
+All rights reserved.
+</p>
+<p>
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies
+of the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+</p>
+<p>
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL
+THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM,
+OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+</p>
+<p>
+Except as contained in this notice, the name of a copyright holder shall not be
+used in advertising or otherwise to promote the sale, use or other dealings in
+this Software without prior written authorization of the copyright holder.
+</p>
+
+<hr>
+<p><small>
+All trademarks and registered trademarks mentioned herein are the property of their respective owners.
+</small></p>
+</body>
+</html>
--- a/vendor/github.com/saintfish/chardet/multi_byte.go
+++ b/vendor/github.com/saintfish/chardet/multi_byte.go
+package chardet
+
+import (
+	"errors"
+	"math"
+)
+
+type recognizerMultiByte struct {
+	charset     string
+	language    string
+	decoder     charDecoder
+	commonChars []uint16
+}
+
+type charDecoder interface {
+	DecodeOneChar([]byte) (c uint16, remain []byte, err error)
+}
+
+func (r *recognizerMultiByte) Match(input *recognizerInput) (output recognizerOutput) {
+	return recognizerOutput{
+		Charset:    r.charset,
+		Language:   r.language,
+		Confidence: r.matchConfidence(input),
+	}
+}
+
+func (r *recognizerMultiByte) matchConfidence(input *recognizerInput) int {
+	raw := input.raw
+	var c uint16
+	var err error
+	var totalCharCount, badCharCount, singleByteCharCount, doubleByteCharCount, commonCharCount int
+	for c, raw, err = r.decoder.DecodeOneChar(raw); len(raw) > 0; c, raw, err = r.decoder.DecodeOneChar(raw) {
+		totalCharCount++
+		if err != nil {
+			badCharCount++
+		} else if c <= 0xFF {
+			singleByteCharCount++
+		} else {
+			doubleByteCharCount++
+			if r.commonChars != nil && binarySearch(r.commonChars, c) {
+				commonCharCount++
+			}
+		}
+		if badCharCount >= 2 && badCharCount*5 >= doubleByteCharCount {
+			return 0
+		}
+	}
+
+	if doubleByteCharCount <= 10 && badCharCount == 0 {
+		if doubleByteCharCount == 0 && totalCharCount < 10 {
+			return 0
+		} else {
+			return 10
+		}
+	}
+
+	if doubleByteCharCount < 20*badCharCount {
+		return 0
+	}
+	if r.commonChars == nil {
+		confidence := 30 + doubleByteCharCount - 20*badCharCount
+		if confidence > 100 {
+			confidence = 100
+		}
+		return confidence
+	}
+	maxVal := math.Log(float64(doubleByteCharCount) / 4)
+	scaleFactor := 90 / maxVal
+	confidence := int(math.Log(float64(commonCharCount)+1)*scaleFactor + 10)
+	if confidence > 100 {
+		confidence = 100
+	}
+	if confidence < 0 {
+		confidence = 0
+	}
+	return confidence
+}
+
+func binarySearch(l []uint16, c uint16) bool {
+	start := 0
+	end := len(l) - 1
+	for start <= end {
+		mid := (start + end) / 2
+		if c == l[mid] {
+			return true
+		} else if c < l[mid] {
+			end = mid - 1
+		} else {
+			start = mid + 1
+		}
+	}
+	return false
+}
+
+var eobError = errors.New("End of input buffer")
+var badCharError = errors.New("Decode a bad char")
+
+type charDecoder_sjis struct {
+}
+
+func (charDecoder_sjis) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) {
+	if len(input) == 0 {
+		return 0, nil, eobError
+	}
+	first := input[0]
+	c = uint16(first)
+	remain = input[1:]
+	if first <= 0x7F || (first > 0xA0 && first <= 0xDF) {
+		return
+	}
+	if len(remain) == 0 {
+		return c, remain, badCharError
+	}
+	second := remain[0]
+	remain = remain[1:]
+	c = c<<8 | uint16(second)
+	if (second >= 0x40 && second <= 0x7F) || (second >= 0x80 && second <= 0xFE) {
+	} else {
+		err = badCharError
+	}
+	return
+}
+
+var commonChars_sjis = []uint16{
+	0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0,
+	0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5,
+	0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc,
+	0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341,
+	0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389,
+	0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa,
+}
+
+func newRecognizer_sjis() *recognizerMultiByte {
+	return &recognizerMultiByte{
+		"Shift_JIS",
+		"ja",
+		charDecoder_sjis{},
+		commonChars_sjis,
+	}
+}
+
+type charDecoder_euc struct {
+}
+
+func (charDecoder_euc) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) {
+	if len(input) == 0 {
+		return 0, nil, eobError
+	}
+	first := input[0]
+	remain = input[1:]
+	c = uint16(first)
+	if first <= 0x8D {
+		return uint16(first), remain, nil
+	}
+	if len(remain) == 0 {
+		return 0, nil, eobError
+	}
+	second := remain[0]
+	remain = remain[1:]
+	c = c<<8 | uint16(second)
+	if first >= 0xA1 && first <= 0xFE {
+		if second < 0xA1 {
+			err = badCharError
+		}
+		return
+	}
+	if first == 0x8E {
+		if second < 0xA1 {
+			err = badCharError
+		}
+		return
+	}
+	if first == 0x8F {
+		if len(remain) == 0 {
+			return 0, nil, eobError
+		}
+		third := remain[0]
+		remain = remain[1:]
+		c = c<<0 | uint16(third)
+		if third < 0xa1 {
+			err = badCharError
+		}
+	}
+	return
+}
+
+var commonChars_euc_jp = []uint16{
+	0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2,
+	0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3,
+	0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4,
+	0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de,
+	0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef,
+	0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af,
+	0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
+	0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
+	0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
+	0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1,
+}
+
+var commonChars_euc_kr = []uint16{
+	0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc,
+	0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9,
+	0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce,
+	0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce,
+	0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba,
+	0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee,
+	0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7,
+	0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6,
+	0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6,
+	0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad,
+}
+
+func newRecognizer_euc_jp() *recognizerMultiByte {
+	return &recognizerMultiByte{
+		"EUC-JP",
+		"ja",
+		charDecoder_euc{},
+		commonChars_euc_jp,
+	}
+}
+
+func newRecognizer_euc_kr() *recognizerMultiByte {
+	return &recognizerMultiByte{
+		"EUC-KR",
+		"ko",
+		charDecoder_euc{},
+		commonChars_euc_kr,
+	}
+}
+
+type charDecoder_big5 struct {
+}
+
+func (charDecoder_big5) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) {
+	if len(input) == 0 {
+		return 0, nil, eobError
+	}
+	first := input[0]
+	remain = input[1:]
+	c = uint16(first)
+	if first <= 0x7F || first == 0xFF {
+		return
+	}
+	if len(remain) == 0 {
+		return c, nil, eobError
+	}
+	second := remain[0]
+	remain = remain[1:]
+	c = c<<8 | uint16(second)
+	if second < 0x40 || second == 0x7F || second == 0xFF {
+		err = badCharError
+	}
+	return
+}
+
+var commonChars_big5 = []uint16{
+	0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446,
+	0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3,
+	0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548,
+	0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8,
+	0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da,
+	0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3,
+	0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59,
+	0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c,
+	0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44,
+	0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f,
+}
+
+func newRecognizer_big5() *recognizerMultiByte {
+	return &recognizerMultiByte{
+		"Big5",
+		"zh",
+		charDecoder_big5{},
+		commonChars_big5,
+	}
+}
+
+type charDecoder_gb_18030 struct {
+}
+
+func (charDecoder_gb_18030) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) {
+	if len(input) == 0 {
+		return 0, nil, eobError
+	}
+	first := input[0]
+	remain = input[1:]
+	c = uint16(first)
+	if first <= 0x80 {
+		return
+	}
+	if len(remain) == 0 {
+		return 0, nil, eobError
+	}
+	second := remain[0]
+	remain = remain[1:]
+	c = c<<8 | uint16(second)
+	if first >= 0x81 && first <= 0xFE {
+		if (second >= 0x40 && second <= 0x7E) || (second >= 0x80 && second <= 0xFE) {
+			return
+		}
+
+		if second >= 0x30 && second <= 0x39 {
+			if len(remain) == 0 {
+				return 0, nil, eobError
+			}
+			third := remain[0]
+			remain = remain[1:]
+			if third >= 0x81 && third <= 0xFE {
+				if len(remain) == 0 {
+					return 0, nil, eobError
+				}
+				fourth := remain[0]
+				remain = remain[1:]
+				if fourth >= 0x30 && fourth <= 0x39 {
+					c = c<<16 | uint16(third)<<8 | uint16(fourth)
+					return
+				}
+			}
+		}
+		err = badCharError
+	}
+	return
+}
+
+var commonChars_gb_18030 = []uint16{
+	0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac,
+	0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4,
+	0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4,
+	0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6,
+	0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6,
+	0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7,
+	0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7,
+	0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5,
+	0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2,
+	0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0,
+}
+
+func newRecognizer_gb_18030() *recognizerMultiByte {
+	return &recognizerMultiByte{
+		"GB-18030",
+		"zh",
+		charDecoder_gb_18030{},
+		commonChars_gb_18030,
+	}
+}
--- a/vendor/github.com/saintfish/chardet/recognizer.go
+++ b/vendor/github.com/saintfish/chardet/recognizer.go
+package chardet
+
+type recognizer interface {
+	Match(*recognizerInput) recognizerOutput
+}
+
+type recognizerOutput Result
+
+type recognizerInput struct {
+	raw         []byte
+	input       []byte
+	tagStripped bool
+	byteStats   []int
+	hasC1Bytes  bool
+}
+
+func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput {
+	input, stripped := mayStripInput(raw, stripTag)
+	byteStats := computeByteStats(input)
+	return &recognizerInput{
+		raw:         raw,
+		input:       input,
+		tagStripped: stripped,
+		byteStats:   byteStats,
+		hasC1Bytes:  computeHasC1Bytes(byteStats),
+	}
+}
+
+func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) {
+	const inputBufferSize = 8192
+	out = make([]byte, 0, inputBufferSize)
+	var badTags, openTags int32
+	var inMarkup bool = false
+	stripped = false
+	if stripTag {
+		stripped = true
+		for _, c := range raw {
+			if c == '<' {
+				if inMarkup {
+					badTags += 1
+				}
+				inMarkup = true
+				openTags += 1
+			}
+			if !inMarkup {
+				out = append(out, c)
+				if len(out) >= inputBufferSize {
+					break
+				}
+			}
+			if c == '>' {
+				inMarkup = false
+			}
+		}
+	}
+	if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) {
+		limit := len(raw)
+		if limit > inputBufferSize {
+			limit = inputBufferSize
+		}
+		out = make([]byte, limit)
+		copy(out, raw[:limit])
+		stripped = false
+	}
+	return
+}
+
+func computeByteStats(input []byte) []int {
+	r := make([]int, 256)
+	for _, c := range input {
+		r[c] += 1
+	}
+	return r
+}
+
+func computeHasC1Bytes(byteStats []int) bool {
+	for _, count := range byteStats[0x80 : 0x9F+1] {
+		if count > 0 {
+			return true
+		}
+	}
+	return false
+}
--- a/vendor/github.com/saintfish/chardet/single_byte.go
+++ b/vendor/github.com/saintfish/chardet/single_byte.go
--- a/vendor/github.com/saintfish/chardet/unicode.go
+++ b/vendor/github.com/saintfish/chardet/unicode.go
+package chardet
+
+import (
+	"bytes"
+)
+
+var (
+	utf16beBom = []byte{0xFE, 0xFF}
+	utf16leBom = []byte{0xFF, 0xFE}
+	utf32beBom = []byte{0x00, 0x00, 0xFE, 0xFF}
+	utf32leBom = []byte{0xFF, 0xFE, 0x00, 0x00}
+)
+
+type recognizerUtf16be struct {
+}
+
+func newRecognizer_utf16be() *recognizerUtf16be {
+	return &recognizerUtf16be{}
+}
+
+func (*recognizerUtf16be) Match(input *recognizerInput) (output recognizerOutput) {
+	output = recognizerOutput{
+		Charset: "UTF-16BE",
+	}
+	if bytes.HasPrefix(input.raw, utf16beBom) {
+		output.Confidence = 100
+	}
+	return
+}
+
+type recognizerUtf16le struct {
+}
+
+func newRecognizer_utf16le() *recognizerUtf16le {
+	return &recognizerUtf16le{}
+}
+
+func (*recognizerUtf16le) Match(input *recognizerInput) (output recognizerOutput) {
+	output = recognizerOutput{
+		Charset: "UTF-16LE",
+	}
+	if bytes.HasPrefix(input.raw, utf16leBom) && !bytes.HasPrefix(input.raw, utf32leBom) {
+		output.Confidence = 100
+	}
+	return
+}
+
+type recognizerUtf32 struct {
+	name       string
+	bom        []byte
+	decodeChar func(input []byte) uint32
+}
+
+func decodeUtf32be(input []byte) uint32 {
+	return uint32(input[0])<<24 | uint32(input[1])<<16 | uint32(input[2])<<8 | uint32(input[3])
+}
+
+func decodeUtf32le(input []byte) uint32 {
+	return uint32(input[3])<<24 | uint32(input[2])<<16 | uint32(input[1])<<8 | uint32(input[0])
+}
+
+func newRecognizer_utf32be() *recognizerUtf32 {
+	return &recognizerUtf32{
+		"UTF-32BE",
+		utf32beBom,
+		decodeUtf32be,
+	}
+}
+
+func newRecognizer_utf32le() *recognizerUtf32 {
+	return &recognizerUtf32{
+		"UTF-32LE",
+		utf32leBom,
+		decodeUtf32le,
+	}
+}
+
+func (r *recognizerUtf32) Match(input *recognizerInput) (output recognizerOutput) {
+	output = recognizerOutput{
+		Charset: r.name,
+	}
+	hasBom := bytes.HasPrefix(input.raw, r.bom)
+	var numValid, numInvalid uint32
+	for b := input.raw; len(b) >= 4; b = b[4:] {
+		if c := r.decodeChar(b); c >= 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF) {
+			numInvalid++
+		} else {
+			numValid++
+		}
+	}
+	if hasBom && numInvalid == 0 {
+		output.Confidence = 100
+	} else if hasBom && numValid > numInvalid*10 {
+		output.Confidence = 80
+	} else if numValid > 3 && numInvalid == 0 {
+		output.Confidence = 100
+	} else if numValid > 0 && numInvalid == 0 {
+		output.Confidence = 80
+	} else if numValid > numInvalid*10 {
+		output.Confidence = 25
+	}
+	return
+}
--- a/vendor/github.com/saintfish/chardet/utf8.go
+++ b/vendor/github.com/saintfish/chardet/utf8.go
+package chardet
+
+import (
+	"bytes"
+)
+
+var utf8Bom = []byte{0xEF, 0xBB, 0xBF}
+
+type recognizerUtf8 struct {
+}
+
+func newRecognizer_utf8() *recognizerUtf8 {
+	return &recognizerUtf8{}
+}
+
+func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) {
+	output = recognizerOutput{
+		Charset: "UTF-8",
+	}
+	hasBom := bytes.HasPrefix(input.raw, utf8Bom)
+	inputLen := len(input.raw)
+	var numValid, numInvalid uint32
+	var trailBytes uint8
+	for i := 0; i < inputLen; i++ {
+		c := input.raw[i]
+		if c&0x80 == 0 {
+			continue
+		}
+		if c&0xE0 == 0xC0 {
+			trailBytes = 1
+		} else if c&0xF0 == 0xE0 {
+			trailBytes = 2
+		} else if c&0xF8 == 0xF0 {
+			trailBytes = 3
+		} else {
+			numInvalid++
+			if numInvalid > 5 {
+				break
+			}
+			trailBytes = 0
+		}
+
+		for i++; i < inputLen; i++ {
+			c = input.raw[i]
+			if c&0xC0 != 0x80 {
+				numInvalid++
+				break
+			}
+			if trailBytes--; trailBytes == 0 {
+				numValid++
+				break
+			}
+		}
+	}
+
+	if hasBom && numInvalid == 0 {
+		output.Confidence = 100
+	} else if hasBom && numValid > numInvalid*10 {
+		output.Confidence = 80
+	} else if numValid > 3 && numInvalid == 0 {
+		output.Confidence = 100
+	} else if numValid > 0 && numInvalid == 0 {
+		output.Confidence = 80
+	} else if numValid == 0 && numInvalid == 0 {
+		// Plain ASCII
+		output.Confidence = 10
+	} else if numValid > numInvalid*10 {
+		output.Confidence = 25
+	}
+	return
+}
--- a/vendor/golang.org/x/text/LICENSE
+++ b/vendor/golang.org/x/text/LICENSE
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/vendor/golang.org/x/text/PATENTS
+++ b/vendor/golang.org/x/text/PATENTS
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the Go project.
+
+Google hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer and otherwise run, modify and propagate the contents of this
+implementation of Go, where such license applies only to those patent
+claims, both currently owned or controlled by Google and acquired in
+the future, licensable by Google that are necessarily infringed by this
+implementation of Go.  This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation.  If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of Go or any code incorporated within this
+implementation of Go constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of Go
+shall terminate as of the date such litigation is filed.
--- a/vendor/golang.org/x/text/encoding/charmap/charmap.go
+++ b/vendor/golang.org/x/text/encoding/charmap/charmap.go
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run maketables.go
+
+// Package charmap provides simple character encodings such as IBM Code Page 437
+// and Windows 1252.
+package charmap // import "golang.org/x/text/encoding/charmap"
+
+import (
+	"unicode/utf8"
+
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/encoding/internal"
+	"golang.org/x/text/encoding/internal/identifier"
+	"golang.org/x/text/transform"
+)
+
+// These encodings vary only in the way clients should interpret them. Their
+// coded character set is identical and a single implementation can be shared.
+var (
+	// ISO8859_6E is the ISO 8859-6E encoding.
+	ISO8859_6E encoding.Encoding = &iso8859_6E
+
+	// ISO8859_6I is the ISO 8859-6I encoding.
+	ISO8859_6I encoding.Encoding = &iso8859_6I
+
+	// ISO8859_8E is the ISO 8859-8E encoding.
+	ISO8859_8E encoding.Encoding = &iso8859_8E
+
+	// ISO8859_8I is the ISO 8859-8I encoding.
+	ISO8859_8I encoding.Encoding = &iso8859_8I
+
+	iso8859_6E = internal.Encoding{
+		ISO8859_6,
+		"ISO-8859-6E",
+		identifier.ISO88596E,
+	}
+
+	iso8859_6I = internal.Encoding{
+		ISO8859_6,
+		"ISO-8859-6I",
+		identifier.ISO88596I,
+	}
+
+	iso8859_8E = internal.Encoding{
+		ISO8859_8,
+		"ISO-8859-8E",
+		identifier.ISO88598E,
+	}
+
+	iso8859_8I = internal.Encoding{
+		ISO8859_8,
+		"ISO-8859-8I",
+		identifier.ISO88598I,
+	}
+)
+
+// All is a list of all defined encodings in this package.
+var All = listAll
+
+// TODO: implement these encodings, in order of importance.
+// ASCII, ISO8859_1:       Rather common. Close to Windows 1252.
+// ISO8859_9:              Close to Windows 1254.
+
+// utf8Enc holds a rune's UTF-8 encoding in data[:len].
+type utf8Enc struct {
+	len  uint8
+	data [3]byte
+}
+
+// charmap describes an 8-bit character set encoding.
+type charmap struct {
+	// name is the encoding's name.
+	name string
+	// mib is the encoding type of this encoder.
+	mib identifier.MIB
+	// asciiSuperset states whether the encoding is a superset of ASCII.
+	asciiSuperset bool
+	// low is the lower bound of the encoded byte for a non-ASCII rune. If
+	// charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
+	low uint8
+	// replacement is the encoded replacement character.
+	replacement byte
+	// decode is the map from encoded byte to UTF-8.
+	decode [256]utf8Enc
+	// encoding is the map from runes to encoded bytes. Each entry is a
+	// uint32: the high 8 bits are the encoded byte and the low 24 bits are
+	// the rune. The table entries are sorted by ascending rune.
+	encode [256]uint32
+}
+
+func (m *charmap) NewDecoder() *encoding.Decoder {
+	return &encoding.Decoder{Transformer: charmapDecoder{charmap: m}}
+}
+
+func (m *charmap) NewEncoder() *encoding.Encoder {
+	return &encoding.Encoder{Transformer: charmapEncoder{charmap: m}}
+}
+
+func (m *charmap) String() string {
+	return m.name
+}
+
+func (m *charmap) ID() (mib identifier.MIB, other string) {
+	return m.mib, ""
+}
+
+// charmapDecoder implements transform.Transformer by decoding to UTF-8.
+type charmapDecoder struct {
+	transform.NopResetter
+	charmap *charmap
+}
+
+func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	for i, c := range src {
+		if m.charmap.asciiSuperset && c < utf8.RuneSelf {
+			if nDst >= len(dst) {
+				err = transform.ErrShortDst
+				break
+			}
+			dst[nDst] = c
+			nDst++
+			nSrc = i + 1
+			continue
+		}
+
+		decode := &m.charmap.decode[c]
+		n := int(decode.len)
+		if nDst+n > len(dst) {
+			err = transform.ErrShortDst
+			break
+		}
+		// It's 15% faster to avoid calling copy for these tiny slices.
+		for j := 0; j < n; j++ {
+			dst[nDst] = decode.data[j]
+			nDst++
+		}
+		nSrc = i + 1
+	}
+	return nDst, nSrc, err
+}
+
+// charmapEncoder implements transform.Transformer by encoding from UTF-8.
+type charmapEncoder struct {
+	transform.NopResetter
+	charmap *charmap
+}
+
+func (m charmapEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	r, size := rune(0), 0
+loop:
+	for nSrc < len(src) {
+		if nDst >= len(dst) {
+			err = transform.ErrShortDst
+			break
+		}
+		r = rune(src[nSrc])
+
+		// Decode a 1-byte rune.
+		if r < utf8.RuneSelf {
+			if m.charmap.asciiSuperset {
+				nSrc++
+				dst[nDst] = uint8(r)
+				nDst++
+				continue
+			}
+			size = 1
+
+		} else {
+			// Decode a multi-byte rune.
+			r, size = utf8.DecodeRune(src[nSrc:])
+			if size == 1 {
+				// All valid runes of size 1 (those below utf8.RuneSelf) were
+				// handled above. We have invalid UTF-8 or we haven't seen the
+				// full character yet.
+				if !atEOF && !utf8.FullRune(src[nSrc:]) {
+					err = transform.ErrShortSrc
+				} else {
+					err = internal.RepertoireError(m.charmap.replacement)
+				}
+				break
+			}
+		}
+
+		// Binary search in [low, high) for that rune in the m.charmap.encode table.
+		for low, high := int(m.charmap.low), 0x100; ; {
+			if low >= high {
+				err = internal.RepertoireError(m.charmap.replacement)
+				break loop
+			}
+			mid := (low + high) / 2
+			got := m.charmap.encode[mid]
+			gotRune := rune(got & (1<<24 - 1))
+			if gotRune < r {
+				low = mid + 1
+			} else if gotRune > r {
+				high = mid
+			} else {
+				dst[nDst] = byte(got >> 24)
+				nDst++
+				break
+			}
+		}
+		nSrc += size
+	}
+	return nDst, nSrc, err
+}
--- a/vendor/golang.org/x/text/encoding/charmap/maketables.go
+++ b/vendor/golang.org/x/text/encoding/charmap/maketables.go
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"net/http"
+	"sort"
+	"strings"
+	"unicode/utf8"
+
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/internal/gen"
+)
+
+const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
+	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+	` !"#$%&'()*+,-./0123456789:;<=>?` +
+	`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
+	"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
+
+var encodings = []struct {
+	name        string
+	mib         string
+	comment     string
+	varName     string
+	replacement byte
+	mapping     string
+}{
+	{
+		"IBM Code Page 037",
+		"IBM037",
+		"",
+		"CodePage037",
+		0x3f,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 437",
+		"PC8CodePage437",
+		"",
+		"CodePage437",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 850",
+		"PC850Multilingual",
+		"",
+		"CodePage850",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 852",
+		"PCp852",
+		"",
+		"CodePage852",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 855",
+		"IBM855",
+		"",
+		"CodePage855",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
+	},
+	{
+		"Windows Code Page 858", // PC latin1 with Euro
+		"IBM00858",
+		"",
+		"CodePage858",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
+	},
+	{
+		"IBM Code Page 860",
+		"IBM860",
+		"",
+		"CodePage860",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 862",
+		"PC862LatinHebrew",
+		"",
+		"CodePage862",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 863",
+		"IBM863",
+		"",
+		"CodePage863",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 865",
+		"IBM865",
+		"",
+		"CodePage865",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 866",
+		"IBM866",
+		"",
+		"CodePage866",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-ibm866.txt",
+	},
+	{
+		"IBM Code Page 1047",
+		"IBM1047",
+		"",
+		"CodePage1047",
+		0x3f,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 1140",
+		"IBM01140",
+		"",
+		"CodePage1140",
+		0x3f,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm",
+	},
+	{
+		"ISO 8859-1",
+		"ISOLatin1",
+		"",
+		"ISO8859_1",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
+	},
+	{
+		"ISO 8859-2",
+		"ISOLatin2",
+		"",
+		"ISO8859_2",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
+	},
+	{
+		"ISO 8859-3",
+		"ISOLatin3",
+		"",
+		"ISO8859_3",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
+	},
+	{
+		"ISO 8859-4",
+		"ISOLatin4",
+		"",
+		"ISO8859_4",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
+	},
+	{
+		"ISO 8859-5",
+		"ISOLatinCyrillic",
+		"",
+		"ISO8859_5",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
+	},
+	{
+		"ISO 8859-6",
+		"ISOLatinArabic",
+		"",
+		"ISO8859_6,ISO8859_6E,ISO8859_6I",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
+	},
+	{
+		"ISO 8859-7",
+		"ISOLatinGreek",
+		"",
+		"ISO8859_7",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
+	},
+	{
+		"ISO 8859-8",
+		"ISOLatinHebrew",
+		"",
+		"ISO8859_8,ISO8859_8E,ISO8859_8I",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
+	},
+	{
+		"ISO 8859-9",
+		"ISOLatin5",
+		"",
+		"ISO8859_9",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm",
+	},
+	{
+		"ISO 8859-10",
+		"ISOLatin6",
+		"",
+		"ISO8859_10",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
+	},
+	{
+		"ISO 8859-13",
+		"ISO885913",
+		"",
+		"ISO8859_13",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
+	},
+	{
+		"ISO 8859-14",
+		"ISO885914",
+		"",
+		"ISO8859_14",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
+	},
+	{
+		"ISO 8859-15",
+		"ISO885915",
+		"",
+		"ISO8859_15",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
+	},
+	{
+		"ISO 8859-16",
+		"ISO885916",
+		"",
+		"ISO8859_16",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
+	},
+	{
+		"KOI8-R",
+		"KOI8R",
+		"",
+		"KOI8R",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-koi8-r.txt",
+	},
+	{
+		"KOI8-U",
+		"KOI8U",
+		"",
+		"KOI8U",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-koi8-u.txt",
+	},
+	{
+		"Macintosh",
+		"Macintosh",
+		"",
+		"Macintosh",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-macintosh.txt",
+	},
+	{
+		"Macintosh Cyrillic",
+		"MacintoshCyrillic",
+		"",
+		"MacintoshCyrillic",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
+	},
+	{
+		"Windows 874",
+		"Windows874",
+		"",
+		"Windows874",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-874.txt",
+	},
+	{
+		"Windows 1250",
+		"Windows1250",
+		"",
+		"Windows1250",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1250.txt",
+	},
+	{
+		"Windows 1251",
+		"Windows1251",
+		"",
+		"Windows1251",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1251.txt",
+	},
+	{
+		"Windows 1252",
+		"Windows1252",
+		"",
+		"Windows1252",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1252.txt",
+	},
+	{
+		"Windows 1253",
+		"Windows1253",
+		"",
+		"Windows1253",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1253.txt",
+	},
+	{
+		"Windows 1254",
+		"Windows1254",
+		"",
+		"Windows1254",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1254.txt",
+	},
+	{
+		"Windows 1255",
+		"Windows1255",
+		"",
+		"Windows1255",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1255.txt",
+	},
+	{
+		"Windows 1256",
+		"Windows1256",
+		"",
+		"Windows1256",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1256.txt",
+	},
+	{
+		"Windows 1257",
+		"Windows1257",
+		"",
+		"Windows1257",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1257.txt",
+	},
+	{
+		"Windows 1258",
+		"Windows1258",
+		"",
+		"Windows1258",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1258.txt",
+	},
+	{
+		"X-User-Defined",
+		"XUserDefined",
+		"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
+		"XUserDefined",
+		encoding.ASCIISub,
+		ascii +
+			"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
+			"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
+			"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
+			"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
+			"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
+			"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
+			"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
+			"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
+			"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
+			"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
+			"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
+			"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
+			"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
+			"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
+			"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
+			"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
+	},
+}
+
+func getWHATWG(url string) string {
+	res, err := http.Get(url)
+	if err != nil {
+		log.Fatalf("%q: Get: %v", url, err)
+	}
+	defer res.Body.Close()
+
+	mapping := make([]rune, 128)
+	for i := range mapping {
+		mapping[i] = '\ufffd'
+	}
+
+	scanner := bufio.NewScanner(res.Body)
+	for scanner.Scan() {
+		s := strings.TrimSpace(scanner.Text())
+		if s == "" || s[0] == '#' {
+			continue
+		}
+		x, y := 0, 0
+		if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
+			log.Fatalf("could not parse %q", s)
+		}
+		if x < 0 || 128 <= x {
+			log.Fatalf("code %d is out of range", x)
+		}
+		if 0x80 <= y && y < 0xa0 {
+			// We diverge from the WHATWG spec by mapping control characters
+			// in the range [0x80, 0xa0) to U+FFFD.
+			continue
+		}
+		mapping[x] = rune(y)
+	}
+	return ascii + string(mapping)
+}
+
+func getUCM(url string) string {
+	res, err := http.Get(url)
+	if err != nil {
+		log.Fatalf("%q: Get: %v", url, err)
+	}
+	defer res.Body.Close()
+
+	mapping := make([]rune, 256)
+	for i := range mapping {
+		mapping[i] = '\ufffd'
+	}
+
+	charsFound := 0
+	scanner := bufio.NewScanner(res.Body)
+	for scanner.Scan() {
+		s := strings.TrimSpace(scanner.Text())
+		if s == "" || s[0] == '#' {
+			continue
+		}
+		var c byte
+		var r rune
+		if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
+			continue
+		}
+		mapping[c] = r
+		charsFound++
+	}
+
+	if charsFound < 200 {
+		log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
+	}
+
+	return string(mapping)
+}
+
+func main() {
+	mibs := map[string]bool{}
+	all := []string{}
+
+	w := gen.NewCodeWriter()
+	defer w.WriteGoFile("tables.go", "charmap")
+
+	printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
+
+	printf("import (\n")
+	printf("\t\"golang.org/x/text/encoding\"\n")
+	printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
+	printf(")\n\n")
+	for _, e := range encodings {
+		varNames := strings.Split(e.varName, ",")
+		all = append(all, varNames...)
+		varName := varNames[0]
+		switch {
+		case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
+			e.mapping = getWHATWG(e.mapping)
+		case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
+			e.mapping = getUCM(e.mapping)
+		}
+
+		asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
+		if asciiSuperset {
+			low = 0x80
+		}
+		lvn := 1
+		if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
+			lvn = 3
+		}
+		lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
+		printf("// %s is the %s encoding.\n", varName, e.name)
+		if e.comment != "" {
+			printf("//\n// %s\n", e.comment)
+		}
+		printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n",
+			varName, lowerVarName, lowerVarName, e.name)
+		if mibs[e.mib] {
+			log.Fatalf("MIB type %q declared multiple times.", e.mib)
+		}
+		printf("mib: identifier.%s,\n", e.mib)
+		printf("asciiSuperset: %t,\n", asciiSuperset)
+		printf("low: 0x%02x,\n", low)
+		printf("replacement: 0x%02x,\n", e.replacement)
+
+		printf("decode: [256]utf8Enc{\n")
+		i, backMapping := 0, map[rune]byte{}
+		for _, c := range e.mapping {
+			if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
+				backMapping[c] = byte(i)
+			}
+			var buf [8]byte
+			n := utf8.EncodeRune(buf[:], c)
+			if n > 3 {
+				panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
+			}
+			printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
+			if i%2 == 1 {
+				printf("\n")
+			}
+			i++
+		}
+		printf("},\n")
+
+		printf("encode: [256]uint32{\n")
+		encode := make([]uint32, 0, 256)
+		for c, i := range backMapping {
+			encode = append(encode, uint32(i)<<24|uint32(c))
+		}
+		sort.Sort(byRune(encode))
+		for len(encode) < cap(encode) {
+			encode = append(encode, encode[len(encode)-1])
+		}
+		for i, enc := range encode {
+			printf("0x%08x,", enc)
+			if i%8 == 7 {
+				printf("\n")
+			}
+		}
+		printf("},\n}\n")
+
+		// Add an estimate of the size of a single charmap{} struct value, which
+		// includes two 256 elem arrays of 4 bytes and some extra fields, which
+		// align to 3 uint64s on 64-bit architectures.
+		w.Size += 2*4*256 + 3*8
+	}
+	// TODO: add proper line breaking.
+	printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
+}
+
+type byRune []uint32
+
+func (b byRune) Len() int           { return len(b) }
+func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
+func (b byRune) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
--- a/vendor/golang.org/x/text/encoding/charmap/tables.go
+++ b/vendor/golang.org/x/text/encoding/charmap/tables.go