Skip to content
Snippets Groups Projects
Commit 8ea9ee23 authored by Valery Sizov's avatar Valery Sizov
Browse files

Merge branch 'camel' into 'master'

Analyzer for CamelCase

Related https://gitlab.com/gitlab-org/gitlab-ee/issues/832

See merge request !15
parents aa1485e3 7a55a551
No related branches found
No related tags found
1 merge request!15Analyzer for CamelCase
Pipeline #
0.0.16
- Analyzer for CamelCased terms
0.0.15
- Search through the filenames
 
Loading
Loading
Loading
Loading
@@ -24,11 +24,6 @@ module Elasticsearch
index: {
analysis: {
analyzer: {
human_analyzer: {
type: 'custom',
tokenizer: 'human_tokenizer',
filter: %w(lowercase asciifolding)
},
path_analyzer: {
type: 'custom',
tokenizer: 'path_tokenizer',
Loading
Loading
@@ -42,32 +37,30 @@ module Elasticsearch
code_analyzer: {
type: 'custom',
tokenizer: 'standard',
filter: %w(lowercase asciifolding code_stemmer),
filter: %w(code lowercase asciifolding),
char_filter: ["code_mapping"]
}
},
tokenizer: {
sha_tokenizer: {
type: "edgeNGram",
min_gram: 8,
min_gram: 5,
max_gram: 40,
token_chars: %w(letter digit)
},
human_tokenizer: {
type: "nGram",
min_gram: 1,
max_gram: 20,
token_chars: %w(letter digit)
},
path_tokenizer: {
type: 'path_hierarchy',
reverse: true
},
},
filter: {
code_stemmer: {
type: "stemmer",
name: "minimal_english"
code: {
type: "pattern_capture",
preserve_original: 1,
patterns: [
"(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
"(\\d+)"
]
}
},
char_filter: {
Loading
Loading
Loading
Loading
@@ -23,34 +23,51 @@ module Elasticsearch
 
mapping _timestamp: { enabled: true } do
indexes :blob do
indexes :id, type: :string, index_options: 'offsets', analyzer: :human_analyzer
indexes :rid, type: :string, index: :not_analyzed
indexes :oid, type: :string, index_options: 'offsets', analyzer: :code_analyzer
indexes :commit_sha, type: :string, index_options: 'offsets', analyzer: :sha_analyzer
indexes :path, type: :string, analyzer: :path_analyzer
indexes :file_name, type: :string, analyzer: :code_analyzer
indexes :content, type: :string, index_options: 'offsets', analyzer: :code_analyzer
indexes :language, type: :string, index: :not_analyzed
indexes :id, type: :string,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :rid, type: :string,
index: :not_analyzed
indexes :oid, type: :string,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :commit_sha, type: :string,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :path, type: :string,
analyzer: :path_analyzer
indexes :file_name, type: :string,
analyzer: :code_analyzer
indexes :content, type: :string,
index_options: 'offsets',
analyzer: :code_analyzer
indexes :language, type: :string,
index: :not_analyzed
end
 
indexes :commit do
indexes :id, type: :string, index_options: 'offsets', analyzer: :human_analyzer
indexes :rid, type: :string, index: :not_analyzed
indexes :sha, type: :string, index_options: 'offsets', analyzer: :sha_analyzer
indexes :id, type: :string,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :rid, type: :string,
index: :not_analyzed
indexes :sha, type: :string,
index_options: 'offsets',
analyzer: :sha_analyzer
 
indexes :author do
indexes :name, type: :string, index_options: 'offsets', analyzer: :code_analyzer
indexes :email, type: :string, index_options: 'offsets', analyzer: :code_analyzer
indexes :name, type: :string, index_options: 'offsets'
indexes :email, type: :string, index_options: 'offsets'
indexes :time, type: :date, format: :basic_date_time_no_millis
end
 
indexes :commiter do
indexes :name, type: :string, index_options: 'offsets', analyzer: :code_analyzer
indexes :email, type: :string, index_options: 'offsets', analyzer: :code_analyzer
indexes :name, type: :string, index_options: 'offsets'
indexes :email, type: :string, index_options: 'offsets'
indexes :time, type: :date, format: :basic_date_time_no_millis
end
 
indexes :message, type: :string, index_options: 'offsets', analyzer: :code_analyzer
indexes :message, type: :string, index_options: 'offsets'
end
end
 
Loading
Loading
Loading
Loading
@@ -3,18 +3,16 @@ require 'spec_helper'
describe TestRepository do
before do
remove_index(TestRepository.index_name)
TestRepository.__elasticsearch__.create_index!
end
 
it "creates an index" do
expect(index_exist?(TestRepository.index_name)).to be_falsey
TestRepository.__elasticsearch__.create_index!
let(:repo) { TestRepository.new }
 
it "creates an index" do
expect(index_exist?(TestRepository.index_name)).to be_truthy
end
 
it "indexes all blobs and searches" do
repo = TestRepository.new
repo.index_blobs
 
TestRepository.__elasticsearch__.refresh_index!
Loading
Loading
@@ -23,27 +21,24 @@ describe TestRepository do
end
 
it "indexes all commits and searches" do
repo = TestRepository.new
repo.index_commits
 
TestRepository.__elasticsearch__.refresh_index!
 
expect(repo.search('test', type: :commit)[:commits][:total_count]).to eq(2)
expect(repo.search('test', type: :commit)[:commits][:total_count]).to eq(3)
end
 
it "searches through all types" do
repo = TestRepository.new
repo.index_commits
repo.index_blobs
 
TestRepository.__elasticsearch__.refresh_index!
 
expect(repo.search('test')[:commits][:total_count]).to eq(2)
expect(repo.search('test')[:commits][:total_count]).to eq(3)
expect(repo.search('def')[:blobs][:total_count]).to eq(4)
end
 
it "searches through filename" do
repo = TestRepository.new
repo.index_blobs
 
TestRepository.__elasticsearch__.refresh_index!
Loading
Loading
@@ -55,8 +50,20 @@ describe TestRepository do
expect(found_version_file).to be_truthy
end
 
it "searches through camel cased words" do
TestRepository.__elasticsearch__.create_index!(force: true)
repo.index_blobs
TestRepository.__elasticsearch__.refresh_index!
found_version_file = repo.search('Hip')[:blobs][:results].any? do |result|
result["_source"]["blob"]["file_name"] == "camelCase.rb"
end
expect(found_version_file).to be_truthy
end
it "indexes specified commits" do
repo = TestRepository.new
repo.index_commits(
from_rev: '40f4a7a617393735a95a0bb67b08385bc1e7c66d',
to_rev: '732401c65e924df81435deb12891ef570167d2e2'
Loading
Loading
@@ -68,7 +75,6 @@ describe TestRepository do
end
 
it "indexes specified blobs" do
repo = TestRepository.new
repo.index_blobs(
from_rev: '40f4a7a617393735a95a0bb67b08385bc1e7c66d',
to_rev: '732401c65e924df81435deb12891ef570167d2e2'
Loading
Loading
@@ -80,7 +86,6 @@ describe TestRepository do
end
 
it "applies repository_id filter for blobs" do
repo = TestRepository.new
repo.index_blobs
 
TestRepository.new("repo_second").index_blobs
Loading
Loading
@@ -92,14 +97,13 @@ describe TestRepository do
end
 
it "applies repository_id filter for commits" do
repo = TestRepository.new
repo.index_commits
 
TestRepository.new("repo_second").index_commits
 
TestRepository.__elasticsearch__.refresh_index!
 
expect(TestRepository.__elasticsearch__.search('test').results.count).to eq(4)
expect(repo.search('test')[:commits][:total_count]).to eq(2)
expect(TestRepository.__elasticsearch__.search('test').results.count).to eq(6)
expect(repo.search('test')[:commits][:total_count]).to eq(3)
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment