Skip to content
Snippets Groups Projects
Commit 70bbf093 authored by Robert Speicher's avatar Robert Speicher
Browse files

Remove class and id attributes from SanitizationFilter whitelist

parent d9b6b920
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -423,7 +423,7 @@ Quote break.
 
You can also use raw HTML in your Markdown, and it'll mostly work pretty well.
 
See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes. In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements, as well as the `class`, and `id` attributes on all elements.
See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes. In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements.
 
```no-highlight
<dl>
Loading
Loading
Loading
Loading
@@ -10,8 +10,9 @@ module Gitlab
def whitelist
whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST
 
# Allow `class` and `id` on all elements
whitelist[:attributes][:all].push('class', 'id')
# Allow code highlighting
whitelist[:attributes]['pre'] = %w(class)
whitelist[:attributes]['span'] = %w(class)
 
# Allow table alignment
whitelist[:attributes]['th'] = %w(style)
Loading
Loading
@@ -23,6 +24,9 @@ module Gitlab
# Remove `rel` attribute from `a` elements
whitelist[:transformers].push(remove_rel)
 
# Remove `class` attribute from non-highlight spans
whitelist[:transformers].push(clean_spans)
whitelist
end
 
Loading
Loading
@@ -33,6 +37,17 @@ module Gitlab
end
end
end
def clean_spans
lambda do |env|
return unless env[:node_name] == 'span'
return unless env[:node].has_attribute?('class')
unless has_ancestor?(env[:node], 'pre')
env[:node].remove_attribute('class')
end
end
end
end
end
end
Loading
Loading
@@ -60,8 +60,8 @@ describe 'GitLab Markdown' do
@feat.teardown
end
 
# Given a header ID, goes to that element's parent (the header), then to its
# second sibling (the body).
# Given a header ID, goes to that element's parent (the header itself), then
# its next sibling element (the body).
def get_section(id)
@doc.at_css("##{id}").parent.next_element
end
Loading
Loading
@@ -119,18 +119,18 @@ describe 'GitLab Markdown' do
describe 'HTML::Pipeline' do
describe 'SanitizationFilter' do
it 'uses a permissive whitelist' do
expect(@doc).to have_selector('b#manual-b')
expect(@doc).to have_selector('em#manual-em')
expect(@doc).to have_selector("code#manual-code")
expect(@doc).to have_selector('b:contains("b tag")')
expect(@doc).to have_selector('em:contains("em tag")')
expect(@doc).to have_selector('code:contains("code tag")')
expect(@doc).to have_selector('kbd:contains("s")')
expect(@doc).to have_selector('strike:contains(Emoji)')
expect(@doc).to have_selector('img#manual-img')
expect(@doc).to have_selector('br#manual-br')
expect(@doc).to have_selector('hr#manual-hr')
expect(@doc).to have_selector('img[src*="smile.png"]')
expect(@doc).to have_selector('br')
expect(@doc).to have_selector('hr')
end
 
it 'permits span elements' do
expect(@doc).to have_selector('span#span-class-light.light')
expect(@doc).to have_selector('span:contains("span tag")')
end
 
it 'permits table alignment' do
Loading
Loading
@@ -144,13 +144,12 @@ describe 'GitLab Markdown' do
end
 
it 'removes `rel` attribute from links' do
expect(@doc).to have_selector('a#a-rel-nofollow')
expect(@doc).not_to have_selector('a#a-rel-nofollow[rel]')
body = get_section('sanitizationfilter')
expect(body).not_to have_selector('a[rel]')
end
 
it "removes `href` from `a` elements if it's fishy" do
expect(@doc).to have_selector('a#a-href-javascript')
expect(@doc).not_to have_selector('a#a-href-javascript[href]')
expect(@doc).not_to have_selector('a[href*="javascript"]')
end
end
 
Loading
Loading
@@ -228,7 +227,8 @@ describe 'GitLab Markdown' do
 
%w(code a kbd).each do |elem|
it "ignores links inside '#{elem}' element" do
expect(@doc.at_css("#{elem}#autolink-#{elem}").child).to be_text
body = get_section('autolinkfilter')
expect(body).not_to have_selector("#{elem} a")
end
end
end
Loading
Loading
Loading
Loading
@@ -54,36 +54,34 @@ After the Markdown has been turned into HTML, it gets passed through...
 
### SanitizationFilter
 
GitLab uses <a href="http://git.io/vfW8a" class="sanitize" id="sanitize-link">HTML::Pipeline::SanitizationFilter</a>
GitLab uses <a href="http://git.io/vfW8a">HTML::Pipeline::SanitizationFilter</a>
to sanitize the generated HTML, stripping dangerous or unwanted tags.
 
Its default whitelist is pretty permissive. Check it:
 
<b id="manual-b">This text is bold</b> and <em id="manual-em">this text is emphasized</em>.
<b>b tag</b> and <em>em tag</em>.
 
<code id="manual-code">echo "Hello, world!"</code>
<code>code tag</code>
 
Press <kbd>s</kbd> to search.
 
<strike>Emoji</strike> Plain old images! <img
src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20"
height="20" id="manual-img" />
<strike>Emoji</strike> Plain old images! <img src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20" height="20" />
 
Here comes a line break:
 
<br id="manual-br" />
<br />
 
And a horizontal rule:
 
<hr id="manual-hr" />
<hr />
 
As permissive as it is, we've allowed even more stuff:
 
<span class="light" id="span-class-light">Span elements</span>
<span>span tag</span>
 
<a href="#" rel="nofollow" id="a-rel-nofollow">This is a link with a defined rel attribute, which should be removed</a>
<a href="#" rel="nofollow">This is a link with a defined rel attribute, which should be removed</a>
 
<a href="javascript:alert('Hi')" id="a-href-javascript">This is a link trying to be sneaky. It gets its link removed entirely.</a>
<a href="javascript:alert('Hi')">This is a link trying to be sneaky. It gets its link removed entirely.</a>
 
### Escaping
 
Loading
Loading
@@ -125,9 +123,9 @@ These are all plain text that should get turned into links:
 
But it shouldn't autolink text inside certain tags:
 
- <code id="autolink-code">http://about.gitlab.com/</code>
- <a id="autolink-a">http://about.gitlab.com/</a>
- <kbd id="autolink-kbd">http://about.gitlab.com/</kbd>
- <code>http://about.gitlab.com/</code>
- <a>http://about.gitlab.com/</a>
- <kbd>http://about.gitlab.com/</kbd>
 
### Reference Filters (e.g., #<%= issue.iid %>)
 
Loading
Loading
Loading
Loading
@@ -29,17 +29,27 @@ module Gitlab::Markdown
exp = act = "<dl>\n<dt>Term</dt>\n<dd>Definition</dd>\n</dl>"
expect(filter(act).to_html).to eq exp
end
it 'sanitizes `class` attribute on any element' do
act = %q{<strong class="foo">Strong</strong>}
expect(filter(act).to_html).to eq %q{<strong>Strong</strong>}
end
it 'sanitizes `id` attribute on any element' do
act = %q{<em id="foo">Emphasis</em>}
expect(filter(act).to_html).to eq %q{<em>Emphasis</em>}
end
end
 
describe 'custom whitelist' do
it 'allows `class` attribute on any element' do
exp = act = %q{<strong class="foo">Strong</strong>}
it 'allows syntax highlighting' do
exp = act = %q{<pre class="code highlight white c"><code><span class="k">def</span></code></pre>}
expect(filter(act).to_html).to eq exp
end
 
it 'allows `id` attribute on any element' do
exp = act = %q{<em id="foo">Emphasis</em>}
expect(filter(act).to_html).to eq exp
it 'sanitizes `class` attribute from non-highlight spans' do
act = %q{<span class="k">def</span>}
expect(filter(act).to_html).to eq %q{<span>def</span>}
end
 
it 'allows `style` attribute on table elements' do
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment