From a14ee68fe4815d2906ece670bcc333303fd3c816 Mon Sep 17 00:00:00 2001
From: Douwe Maan <douwe@gitlab.com>
Date: Mon, 7 Nov 2016 16:27:35 +0000
Subject: [PATCH] Merge branch 'markdown-xss-fix-option-2.1' into 'security'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix for HackerOne XSS vulnerability in markdown

This is an updated blacklist patch to fix https://dev.gitlab.org/gitlab/gitlabhq/merge_requests/2007. No text is removed. Dangerous schemes/protocols and invalid URIs are left intact but not linked.

Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/23153

See merge request !2015

Signed-off-by: Rémy Coutable <remy@rymai.me>
---
 lib/banzai/filter/autolink_filter.rb          | 38 ++++++++++++++-----
 .../lib/banzai/filter/autolink_filter_spec.rb | 22 +++++++++++
 2 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb
index 799b83b1069..80c844baecd 100644
--- a/lib/banzai/filter/autolink_filter.rb
+++ b/lib/banzai/filter/autolink_filter.rb
@@ -71,6 +71,14 @@ module Banzai
         @doc = parse_html(rinku)
       end
 
+      # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
+      def contains_unsafe?(scheme)
+        return false unless scheme
+
+        scheme = scheme.strip.downcase
+        Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
+      end
+
       # Autolinks any text matching LINK_PATTERN that Rinku didn't already
       # replace
       def text_parse
@@ -89,17 +97,27 @@ module Banzai
         doc
       end
 
-      def autolink_filter(text)
-        text.gsub(LINK_PATTERN) do |match|
-          # Remove any trailing HTML entities and store them for appending
-          # outside the link element. The entity must be marked HTML safe in
-          # order to be output literally rather than escaped.
-          match.gsub!(/((?:&[\w#]+;)+)\z/, '')
-          dropped = ($1 || '').html_safe
-
-          options = link_options.merge(href: match)
-          content_tag(:a, match, options) + dropped
+      def autolink_match(match)
+        # start by stripping out dangerous links
+        begin
+          uri = Addressable::URI.parse(match)
+          return match if contains_unsafe?(uri.scheme)
+        rescue Addressable::URI::InvalidURIError
+          return match
         end
+
+        # Remove any trailing HTML entities and store them for appending
+        # outside the link element. The entity must be marked HTML safe in
+        # order to be output literally rather than escaped.
+        match.gsub!(/((?:&[\w#]+;)+)\z/, '')
+        dropped = ($1 || '').html_safe
+
+        options = link_options.merge(href: match)
+        content_tag(:a, match, options) + dropped
+      end
+
+      def autolink_filter(text)
+        text.gsub(LINK_PATTERN) { |match| autolink_match(match) }
       end
 
       def link_options
diff --git a/spec/lib/banzai/filter/autolink_filter_spec.rb b/spec/lib/banzai/filter/autolink_filter_spec.rb
index dca7f997570..a6d2ea11fcc 100644
--- a/spec/lib/banzai/filter/autolink_filter_spec.rb
+++ b/spec/lib/banzai/filter/autolink_filter_spec.rb
@@ -99,6 +99,28 @@ describe Banzai::Filter::AutolinkFilter, lib: true do
       expect(doc.at_css('a')['href']).to eq link
     end
 
+    it 'autolinks rdar' do
+      link = 'rdar://localhost.com/blah'
+      doc = filter("See #{link}")
+
+      expect(doc.at_css('a').text).to eq link
+      expect(doc.at_css('a')['href']).to eq link
+    end
+
+    it 'does not autolink javascript' do
+      link = 'javascript://alert(document.cookie);'
+      doc = filter("See #{link}")
+
+      expect(doc.at_css('a')).to be_nil
+    end
+
+    it 'does not autolink bad URLs' do
+      link = 'foo://23423:::asdf'
+      doc = filter("See #{link}")
+
+      expect(doc.to_s).to eq("See #{link}")
+    end
+
     it 'does not include trailing punctuation' do
       doc = filter("See #{link}.")
       expect(doc.at_css('a').text).to eq link
-- 
GitLab