From 7424d2fa5bd52b7f41ec4359bf80b1649b59706b Mon Sep 17 00:00:00 2001
From: Robert Speicher <rspeicher@gmail.com>
Date: Wed, 27 May 2015 15:39:08 -0400
Subject: [PATCH] Add ExternalLinkFilter to Markdown pipeline

Forces a `rel="nofollow"` attribute on all external links.
---
 lib/gitlab/markdown.rb                        |  2 ++
 lib/gitlab/markdown/external_link_filter.rb   | 33 +++++++++++++++++++
 spec/features/markdown_spec.rb                | 14 +++++++-
 spec/fixtures/markdown.md.erb                 |  9 ++++-
 .../markdown/external_link_filter_spec.rb     | 33 +++++++++++++++++++
 5 files changed, 89 insertions(+), 2 deletions(-)
 create mode 100644 lib/gitlab/markdown/external_link_filter.rb
 create mode 100644 spec/lib/gitlab/markdown/external_link_filter_spec.rb

diff --git a/lib/gitlab/markdown.rb b/lib/gitlab/markdown.rb
index c0fb22e7f36..5db1566f55d 100644
--- a/lib/gitlab/markdown.rb
+++ b/lib/gitlab/markdown.rb
@@ -11,6 +11,7 @@ module Gitlab
     autoload :CommitReferenceFilter,        'gitlab/markdown/commit_reference_filter'
     autoload :EmojiFilter,                  'gitlab/markdown/emoji_filter'
     autoload :ExternalIssueReferenceFilter, 'gitlab/markdown/external_issue_reference_filter'
+    autoload :ExternalLinkFilter,           'gitlab/markdown/external_link_filter'
     autoload :IssueReferenceFilter,         'gitlab/markdown/issue_reference_filter'
     autoload :LabelReferenceFilter,         'gitlab/markdown/label_reference_filter'
     autoload :MergeRequestReferenceFilter,  'gitlab/markdown/merge_request_reference_filter'
@@ -103,6 +104,7 @@ module Gitlab
         Gitlab::Markdown::EmojiFilter,
         Gitlab::Markdown::TableOfContentsFilter,
         Gitlab::Markdown::AutolinkFilter,
+        Gitlab::Markdown::ExternalLinkFilter,
 
         Gitlab::Markdown::UserReferenceFilter,
         Gitlab::Markdown::IssueReferenceFilter,
diff --git a/lib/gitlab/markdown/external_link_filter.rb b/lib/gitlab/markdown/external_link_filter.rb
new file mode 100644
index 00000000000..c539e0fb823
--- /dev/null
+++ b/lib/gitlab/markdown/external_link_filter.rb
@@ -0,0 +1,33 @@
+require 'html/pipeline/filter'
+
+module Gitlab
+  module Markdown
+    # HTML Filter to add a `rel="nofollow"` attribute to external links
+    #
+    class ExternalLinkFilter < HTML::Pipeline::Filter
+      def call
+        doc.search('a').each do |node|
+          next unless node.has_attribute?('href')
+
+          link = node.attribute('href').value
+
+          # Skip non-HTTP(S) links
+          next unless link.start_with?('http')
+
+          # Skip internal links
+          next if link.start_with?(internal_url)
+
+          node.set_attribute('rel', 'nofollow')
+        end
+
+        doc
+      end
+
+      private
+
+      def internal_url
+        @internal_url ||= Gitlab.config.gitlab.url
+      end
+    end
+  end
+end
diff --git a/spec/features/markdown_spec.rb b/spec/features/markdown_spec.rb
index d6954174660..ee1b3bf749d 100644
--- a/spec/features/markdown_spec.rb
+++ b/spec/features/markdown_spec.rb
@@ -149,7 +149,7 @@ describe 'GitLab Markdown' do
 
       it 'removes `rel` attribute from links' do
         body = get_section('sanitizationfilter')
-        expect(body).not_to have_selector('a[rel]')
+        expect(body).not_to have_selector('a[rel="bookmark"]')
       end
 
       it "removes `href` from `a` elements if it's fishy" do
@@ -237,6 +237,18 @@ describe 'GitLab Markdown' do
       end
     end
 
+    describe 'ExternalLinkFilter' do
+      let(:links) { get_section('externallinkfilter').next_element }
+
+      it 'adds nofollow to external link' do
+        expect(links.css('a').first.to_html).to match 'nofollow'
+      end
+
+      it 'ignores internal link' do
+        expect(links.css('a').last.to_html).not_to match 'nofollow'
+      end
+    end
+
     describe 'ReferenceFilter' do
       it 'handles references in headers' do
         header = @doc.at_css('#reference-filters-eg-1').parent
diff --git a/spec/fixtures/markdown.md.erb b/spec/fixtures/markdown.md.erb
index 26fc4e38e5a..02ab46c905a 100644
--- a/spec/fixtures/markdown.md.erb
+++ b/spec/fixtures/markdown.md.erb
@@ -79,7 +79,7 @@ As permissive as it is, we've allowed even more stuff:
 
 <span>span tag</span>
 
-<a href="#" rel="nofollow">This is a link with a defined rel attribute, which should be removed</a>
+<a href="#" rel="bookmark">This is a link with a defined rel attribute, which should be removed</a>
 
 <a href="javascript:alert('Hi')">This is a link trying to be sneaky. It gets its link removed entirely.</a>
 
@@ -127,6 +127,13 @@ But it shouldn't autolink text inside certain tags:
 - <a>http://about.gitlab.com/</a>
 - <kbd>http://about.gitlab.com/</kbd>
 
+### ExternalLinkFilter
+
+External links get a `rel="nofollow"` attribute:
+
+- [Google](https://google.com/)
+- [GitLab Root](<%= Gitlab.config.gitlab.url %>)
+
 ### Reference Filters (e.g., <%= issue.to_reference %>)
 
 References should be parseable even inside _<%= merge_request.to_reference %>_ emphasis.
diff --git a/spec/lib/gitlab/markdown/external_link_filter_spec.rb b/spec/lib/gitlab/markdown/external_link_filter_spec.rb
new file mode 100644
index 00000000000..c2ff4f80a42
--- /dev/null
+++ b/spec/lib/gitlab/markdown/external_link_filter_spec.rb
@@ -0,0 +1,33 @@
+require 'spec_helper'
+
+module Gitlab::Markdown
+  describe ExternalLinkFilter do
+    def filter(html, options = {})
+      described_class.call(html, options)
+    end
+
+    it 'ignores elements without an href attribute' do
+      exp = act = %q(<a id="ignored">Ignore Me</a>)
+      expect(filter(act).to_html).to eq exp
+    end
+
+    it 'ignores non-HTTP(S) links' do
+      exp = act = %q(<a href="irc://irc.freenode.net/gitlab">IRC</a>)
+      expect(filter(act).to_html).to eq exp
+    end
+
+    it 'skips internal links' do
+      internal = Gitlab.config.gitlab.url
+      exp = act = %Q(<a href="#{internal}/sign_in">Login</a>)
+      expect(filter(act).to_html).to eq exp
+    end
+
+    it 'adds rel="nofollow" to external links' do
+      act = %q(<a href="https://google.com/">Google</a>)
+      doc = filter(act)
+
+      expect(doc.at_css('a')).to have_attribute('rel')
+      expect(doc.at_css('a')['rel']).to eq 'nofollow'
+    end
+  end
+end
-- 
GitLab