Skip to content
Snippets Groups Projects
Commit fc290a7d authored by Grzegorz Bizon's avatar Grzegorz Bizon
Browse files

Validate GFM AST lexer process, add some specs for it

parent 6640dbd9
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -2,6 +2,8 @@ module Gitlab
module Gfm
module Ast
class Lexer
class LexerError < StandardError; end
##
# GFM AST Lexer
#
Loading
Loading
@@ -18,31 +20,43 @@ module Gitlab
# We expect that all text is covered by lexemes.
#
def process!
process_nodes!
@nodes.each(&:process!)
@nodes.sort!
end
private
##
# Processes lexeme nodes for each token in this lexer.
#
def process_nodes!
return if @tokens.empty?
@tokens.each do |token|
ranges_available.each do |range|
process_range(token, range)
process_range!(range, token)
end
end
 
# TODO, validate!
@nodes.each(&:process!)
@nodes.sort!
unless ranges_available.empty?
raise LexerError, 'Unprocessed nodes detected!'
end
end
 
private
##
# Processes a given range.
#
# If pattern is found in a range, but this range is already covered
# by an existing node, we ommit this one (flat search).
#
def process_range(token, range)
def process_range!(range, token)
(@text[range]).scan(token.pattern).each do
match, offset = Regexp.last_match, range.begin
range = (match.begin(0) + offset)...(match.end(0) + offset)
 
next if ranges_taken.any? { |taken| taken.include?(range.begin) }
@nodes << token.new(match[0], range, match, @parent)
end
end
Loading
Loading
@@ -59,8 +73,9 @@ module Gitlab
taken.concat(node.range.to_a)
end
 
text_indexes = (0..@text.length).to_a
text_indexes = (0..(@text.length - 1)).to_a
indexes_available = (text_indexes - indexes_taken).sort.uniq
indexes_available.inject([]) do |ranges, n|
if ranges.empty? || ranges.last.last != n - 1
ranges + [n..n]
Loading
Loading
require 'spec_helper'
 
describe Gitlab::Gfm::Ast::Lexer do
let(:parser) { described_class.new(text) }
let(:lexer) { described_class.new(text, tokens) }
let(:nodes) { lexer.process! }
context 'order of tokens' do
let(:tokens) do
[Gitlab::Gfm::Ast::Syntax::Text,
Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock]
end
let(:text) { "text and ```ruby\nblock\n```" }
it 'greedily matches tokens in order those are defined' do
expect(nodes.count).to eq 1
expect(nodes.first).to be_a Gitlab::Gfm::Ast::Syntax::Text
end
end
context 'uncovered ranges' do
let(:tokens) do
[Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock]
end
let(:text) { "text and ```ruby\nblock\n```" }
it 'raises error when uncovered ranges remain' do
expect { nodes }.to raise_error(Gitlab::Gfm::Ast::Lexer::LexerError,
/Unprocessed nodes detected/)
end
end
context 'intersecting tokens' do
let(:tokens) do
[Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock,
Gitlab::Gfm::Ast::Syntax::Text]
end
let(:text) { "```ruby\nsome text\n```" }
it 'does not match intersecting tokens' do
expect(nodes.count).to eq 1
expect(nodes.first.nodes.count).to eq 0
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment