From e82010da288ab11f5af0d8c6b05874f0d22e6669 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Sun, 10 Jul 2016 15:12:45 -0500 Subject: [PATCH 1/5] Make AutolinkFilter smarter about trailing punctuation --- lib/banzai/filter/autolink_filter.rb | 2 +- spec/lib/banzai/filter/autolink_filter_spec.rb | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index b8d2673c1a67..a8ca2757612f 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -26,7 +26,7 @@ module Banzai # in the generated link. # # Rubular: http://rubular.com/r/cxjPyZc7Sb - LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?])} # Text matching LINK_PATTERN inside these elements will not be linked IGNORE_PARENTS = %w(a code kbd pre script style).to_set diff --git a/spec/lib/banzai/filter/autolink_filter_spec.rb b/spec/lib/banzai/filter/autolink_filter_spec.rb index a6d2ea11fcc6..7fdcd1148aa2 100644 --- a/spec/lib/banzai/filter/autolink_filter_spec.rb +++ b/spec/lib/banzai/filter/autolink_filter_spec.rb @@ -130,6 +130,15 @@ describe Banzai::Filter::AutolinkFilter, lib: true do doc = filter("See #{link}...") expect(doc.at_css('a').text).to eq link + + doc = filter("See #{link}\"") + expect(doc.at_css('a').text).to eq link + + doc = filter("See #{link}'") + expect(doc.at_css('a').text).to eq link + + doc = filter("See #{link})") + expect(doc.at_css('a').text).to eq link end it 'does not include trailing HTML entities' do -- GitLab From f678137bf95659ba0afd81e8a59b35c40ecdfdda Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Sun, 10 Jul 2016 15:38:46 -0500 Subject: [PATCH 2/5] Autolink URLs and emails in blobs and diffs --- lib/banzai/filter/autolink_filter.rb | 10 ++-- lib/banzai/pipeline/autolink_pipeline.rb | 12 +++++ lib/gitlab/highlight.rb | 20 +++++++- spec/lib/gitlab/highlight_spec.rb | 65 +++++++++++++++++++++++- 4 files changed, 98 insertions(+), 9 deletions(-) create mode 100644 lib/banzai/pipeline/autolink_pipeline.rb diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index a8ca2757612f..15f7da5d934a 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -54,15 +54,13 @@ module Banzai # # `@doc` will be re-parsed with the HTML String from Rinku. def rinku_parse - # Convert the options from a Hash to a String that Rinku expects - options = tag_options(link_options) - # NOTE: We don't parse email links because it will erroneously match # external Commit and CommitRange references. # # The final argument tells Rinku to link short URLs that don't include a # period (e.g., http://localhost:3000/) - rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1) + mode = context[:autolink_emails] ? :all : :urls + rinku = Rinku.auto_link(html, mode, tag_options(link_options), IGNORE_PARENTS.to_a, 1) return if rinku == html @@ -111,9 +109,9 @@ module Banzai # order to be output literally rather than escaped. match.gsub!(/((?:&[\w#]+;)+)\z/, '') dropped = ($1 || '').html_safe + match = ERB::Util.html_escape_once(match) - options = link_options.merge(href: match) - content_tag(:a, match, options) + dropped + %{#{match}#{dropped}}.html_safe end def autolink_filter(text) diff --git a/lib/banzai/pipeline/autolink_pipeline.rb b/lib/banzai/pipeline/autolink_pipeline.rb new file mode 100644 index 000000000000..53f2da5c7b58 --- /dev/null +++ b/lib/banzai/pipeline/autolink_pipeline.rb @@ -0,0 +1,12 @@ +module Banzai + module Pipeline + class AutolinkPipeline < BasePipeline + def self.filters + @filters ||= FilterArray[ + Filter::AutolinkFilter, + Filter::ExternalLinkFilter + ] + end + end + end +end diff --git a/lib/gitlab/highlight.rb b/lib/gitlab/highlight.rb index 83bc230df3e1..57ae127254be 100644 --- a/lib/gitlab/highlight.rb +++ b/lib/gitlab/highlight.rb @@ -25,7 +25,7 @@ module Gitlab def highlight(text, continue: true, plain: false) highlighted_text = highlight_text(text, continue: continue, plain: plain) highlighted_text = link_dependencies(text, highlighted_text) if blob_name - highlighted_text + autolink_strings(highlighted_text) end def lexer @@ -67,5 +67,23 @@ module Gitlab def link_dependencies(text, highlighted_text) Gitlab::DependencyLinker.link(blob_name, text, highlighted_text) end + + def autolink_strings(highlighted_text) + doc = Nokogiri::HTML::DocumentFragment.parse(highlighted_text) + + # Files without highlighting have all text in `span.line`. + # Files with highlighting have strings and comments in `span`s with a + # `class` starting with `c` or `s`. + doc.xpath('.//span[@class="line" or starts-with(@class, "c") or starts-with(@class, "s")]/text()').each do |node| + content = node.to_html + html = Banzai.render(content, pipeline: :autolink, autolink_emails: true) + + next if html == content + + node.replace(html) + end + + doc.to_html.html_safe + end end end diff --git a/spec/lib/gitlab/highlight_spec.rb b/spec/lib/gitlab/highlight_spec.rb index e57b3053871b..4a106fc675c0 100644 --- a/spec/lib/gitlab/highlight_spec.rb +++ b/spec/lib/gitlab/highlight_spec.rb @@ -9,7 +9,7 @@ describe Gitlab::Highlight, lib: true do describe '.highlight_lines' do let(:lines) do - Gitlab::Highlight.highlight_lines(project.repository, commit.id, 'files/ruby/popen.rb') + described_class.highlight_lines(project.repository, commit.id, 'files/ruby/popen.rb') end it 'highlights all the lines properly' do @@ -59,7 +59,7 @@ describe Gitlab::Highlight, lib: true do end describe '#highlight' do - subject { described_class.highlight(file_name, file_content, nowrap: false) } + subject { described_class.highlight(file_name, file_content) } it 'links dependencies via DependencyLinker' do expect(Gitlab::DependencyLinker).to receive(:link). @@ -67,5 +67,66 @@ describe Gitlab::Highlight, lib: true do described_class.highlight('file.name', 'Contents') end + + context "plain text file" do + let(:file_name) { "example.txt" } + let(:file_content) do + <<-CONTENT.strip_heredoc + URL: http://www.google.com + Email: hello@example.com + CONTENT + end + + it "links URLs" do + expect(subject).to include(%{http://www.google.com}) + end + + it "links emails" do + expect(subject).to include(%{hello@example.com}) + end + end + + context "file with highlighting" do + let(:file_name) { "example.rb" } + let(:file_content) do + <<-CONTENT.strip_heredoc + # URL in comment: http://www.google.com + # Email in comment: hello@example.com + + "URL in string: http://www.google.com" + "Email in string: hello@example.com" + + # + # http://www.google.com + CONTENT + end + + context "in a comment" do + it "links URLs" do + expect(subject).to include(%{URL in comment: http://www.google.com}) + end + + it "links emails" do + expect(subject).to include(%{Email in comment: hello@example.com}) + end + end + + context "in a string" do + it "links URLs" do + expect(subject).to include(%{URL in string: http://www.google.com}) + end + + it "links emails" do + expect(subject).to include(%{Email in string: hello@example.com}) + end + end + + context 'in HTML/XML tags' do + it "links URLs" do + expect(subject).to include(%{<http://www.google.com>}) + expect(subject).to include(%{<url>http://www.google.com</url>}) + end + end + end end end -- GitLab From 625ce63c7d5b7ff81a119ea237b244f0cb5beed7 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Sun, 10 Jul 2016 15:39:20 -0500 Subject: [PATCH 3/5] Update specs --- spec/lib/gitlab/diff/inline_diff_marker_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/lib/gitlab/diff/inline_diff_marker_spec.rb b/spec/lib/gitlab/diff/inline_diff_marker_spec.rb index 95da344802dc..e24241e70598 100644 --- a/spec/lib/gitlab/diff/inline_diff_marker_spec.rb +++ b/spec/lib/gitlab/diff/inline_diff_marker_spec.rb @@ -14,7 +14,7 @@ describe Gitlab::Diff::InlineDiffMarker, lib: true do end end - context "when the text text is not html safe" do + context "when the rich text is not html safe" do let(:raw) { "abc 'def'" } let(:inline_diffs) { [2..5] } let(:subject) { described_class.new(raw).mark(inline_diffs) } -- GitLab From ed6cbde202a290e3d64e3e86c22b059d8aaab2fe Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Tue, 9 May 2017 16:00:17 -0500 Subject: [PATCH 4/5] Autolink all of the highlighted text at once --- lib/gitlab/highlight.rb | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/lib/gitlab/highlight.rb b/lib/gitlab/highlight.rb index 57ae127254be..da9ac7cc51ed 100644 --- a/lib/gitlab/highlight.rb +++ b/lib/gitlab/highlight.rb @@ -69,21 +69,8 @@ module Gitlab end def autolink_strings(highlighted_text) - doc = Nokogiri::HTML::DocumentFragment.parse(highlighted_text) - - # Files without highlighting have all text in `span.line`. - # Files with highlighting have strings and comments in `span`s with a - # `class` starting with `c` or `s`. - doc.xpath('.//span[@class="line" or starts-with(@class, "c") or starts-with(@class, "s")]/text()').each do |node| - content = node.to_html - html = Banzai.render(content, pipeline: :autolink, autolink_emails: true) - - next if html == content - - node.replace(html) - end - - doc.to_html.html_safe + # TODO: Don't run pre-processing pipeline, because this may break the highlighting + Banzai.render(highlighted_text, pipeline: :autolink, autolink_emails: true).html_safe end end end -- GitLab From 5bcdc435e6f1c749aaf03d6ecb33195c5fd62bae Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Tue, 9 May 2017 16:02:55 -0500 Subject: [PATCH 5/5] =?UTF-8?q?Autolink=20the=20raw=20text=20and=20?= =?UTF-8?q?=E2=80=9Ctransfer=E2=80=9D=20the=20found=20links=20to=20the=20h?= =?UTF-8?q?ighlighted=20text?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/gitlab/highlight.rb | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/lib/gitlab/highlight.rb b/lib/gitlab/highlight.rb index da9ac7cc51ed..e25d0f060200 100644 --- a/lib/gitlab/highlight.rb +++ b/lib/gitlab/highlight.rb @@ -25,7 +25,7 @@ module Gitlab def highlight(text, continue: true, plain: false) highlighted_text = highlight_text(text, continue: continue, plain: plain) highlighted_text = link_dependencies(text, highlighted_text) if blob_name - autolink_strings(highlighted_text) + autolink_strings(text, highlighted_text) end def lexer @@ -68,9 +68,42 @@ module Gitlab Gitlab::DependencyLinker.link(blob_name, text, highlighted_text) end - def autolink_strings(highlighted_text) + def autolink_strings(text, highlighted_text) + raw_lines = text.lines + # TODO: Don't run pre-processing pipeline, because this may break the highlighting - Banzai.render(highlighted_text, pipeline: :autolink, autolink_emails: true).html_safe + linked_text = Banzai.render( + ERB::Util.html_escape(text), + pipeline: :autolink, + autolink_emails: true + ).html_safe + + linked_lines = linked_text.lines + + highlighted_lines = highlighted_text.lines + + highlighted_lines.map!.with_index do |rich_line, i| + matches = [] + linked_lines[i].scan(/(?]+>)(?[^<]+)(?<\/a>)/) { matches << Regexp.last_match } + next rich_line if matches.empty? + + raw_line = raw_lines[i] + marked_line = rich_line.html_safe + + matches.each do |match| + marker = StringRegexMarker.new(raw_line, marked_line) + + regex = /#{Regexp.escape(match[:content])}/ + + marked_line = marker.mark(regex) do |text, left:, right:| + "#{match[:start]}#{text}#{match[:end]}" + end + end + + marked_line + end + + highlighted_lines.join.html_safe end end end -- GitLab