From 50c2c16a4d8ca52c4abcbef638f5105a9b0d1ee0 Mon Sep 17 00:00:00 2001
From: Gabriel Mazetto <brodock@gmail.com>
Date: Sat, 26 May 2012 20:15:06 -0300
Subject: [PATCH] Better algorithm to deal with encodings. Moved fallback
 rescue message from view to encode library.

This helps fix cases where UTF-8 is wrongly identified as ISO-8859-1. We will only try to convert strings if we are 100% sure about the charset, otherwise, we will fallback to UTF-8.
---
 app/views/commits/_commit.html.haml | 2 +-
 lib/gitlabhq/encode.rb              | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/app/views/commits/_commit.html.haml b/app/views/commits/_commit.html.haml
index a579cca96bb..f52dbcfe723 100644
--- a/app/views/commits/_commit.html.haml
+++ b/app/views/commits/_commit.html.haml
@@ -8,7 +8,7 @@
       %strong.cgray= commit.author_name
       &ndash;
       = image_tag gravatar_icon(commit.author_email), :class => "avatar", :width => 16
-      %span.row_title= truncate(commit.safe_message, :length => 50) rescue "--broken encoding"
+      %span.row_title= truncate(commit.safe_message, :length => 50)
 
       %span.right.cgray
         = time_ago_in_words(commit.committed_date)
diff --git a/lib/gitlabhq/encode.rb b/lib/gitlabhq/encode.rb
index e0e52f0a2a7..780d839f420 100644
--- a/lib/gitlabhq/encode.rb
+++ b/lib/gitlabhq/encode.rb
@@ -8,16 +8,19 @@ module Gitlabhq
     def utf8 message
       return nil unless message
 
-      encoding = detect_encoding(message)
-      if encoding
+      detect = CharlockHolmes::EncodingDetector.detect(message) rescue {}
+
+      # It's better to default to UTF-8 as sometimes it's wrongly detected as another charset
+      if detect[:encoding] && detect[:confidence] == 100
         CharlockHolmes::Converter.convert(message, encoding, 'UTF-8')
       else
         message
       end.force_encoding("utf-8")
+
     # Prevent app from crash cause of 
     # encoding errors
     rescue
-      ""
+      "--broken encoding: #{encoding}"
     end
 
     def detect_encoding message
-- 
GitLab