Skip to content
Snippets Groups Projects
Commit a00578ce authored by Robert Speicher's avatar Robert Speicher
Browse files

Absorb gitlab_git

parent aec04a47
No related branches found
No related tags found
No related merge requests found
Showing
with 2937 additions and 12 deletions
Loading
Loading
@@ -16,6 +16,8 @@ gem 'default_value_for', '~> 3.0.0'
gem 'mysql2', '~> 0.3.16', group: :mysql
gem 'pg', '~> 0.18.2', group: :postgres
 
gem 'rugged', '~> 0.24.0'
# Authentication libraries
gem 'devise', '~> 4.2'
gem 'doorkeeper', '~> 4.2.0'
Loading
Loading
@@ -49,10 +51,6 @@ gem 'u2f', '~> 0.2.1'
# Browser detection
gem 'browser', '~> 2.2'
 
# Extracting information from a git repository
# Provide access to Gitlab::Git library
gem 'gitlab_git', '~> 10.7.0'
# LDAP Auth
# GitLab fork with several improvements to original library. For full list of changes
# see https://github.com/intridea/omniauth-ldap/compare/master...gitlabhq:master
Loading
Loading
Loading
Loading
@@ -255,11 +255,6 @@ GEM
mime-types (>= 1.16, < 3)
posix-spawn (~> 0.3)
gitlab-markup (1.5.0)
gitlab_git (10.7.0)
activesupport (~> 4.0)
charlock_holmes (~> 0.7.3)
github-linguist (~> 4.7.0)
rugged (~> 0.24.0)
gitlab_omniauth-ldap (1.2.1)
net-ldap (~> 0.9)
omniauth (~> 1.0)
Loading
Loading
@@ -857,7 +852,6 @@ DEPENDENCIES
github-linguist (~> 4.7.0)
gitlab-flowdock-git-hook (~> 1.0.1)
gitlab-markup (~> 1.5.0)
gitlab_git (~> 10.7.0)
gitlab_omniauth-ldap (~> 1.2.1)
gollum-lib (~> 4.2)
gollum-rugged_adapter (~> 0.4.2)
Loading
Loading
@@ -942,6 +936,7 @@ DEPENDENCIES
rubocop-rspec (~> 1.5.0)
ruby-fogbugz (~> 0.2.1)
ruby-prof (~> 0.16.2)
rugged (~> 0.24.0)
sanitize (~> 2.0)
sass-rails (~> 5.0.6)
scss_lint (~> 0.47.0)
Loading
Loading
@@ -988,4 +983,4 @@ DEPENDENCIES
wikicloth (= 0.8.1)
 
BUNDLED WITH
1.13.6
1.13.7
class MergeRequestDiff < ActiveRecord::Base
include Sortable
include Importable
include EncodingHelper
include Gitlab::Git::EncodingHelper
 
# Prevent store of diff if commits amount more then 500
COMMITS_SAFE_SIZE = 100
Loading
Loading
module Gitlab
module Git
# Class for parsing Git attribute files and extracting the attributes for
# file patterns.
#
# Unlike Rugged this parser only needs a single IO call (a call to `open`),
# vastly reducing the time spent in extracting attributes.
#
# This class _only_ supports parsing the attributes file located at
# `$GIT_DIR/info/attributes` as GitLab doesn't use any other files
# (`.gitattributes` is copied to this particular path).
#
# Basic usage:
#
# attributes = Gitlab::Git::Attributes.new(some_repo.path)
#
# attributes.attributes('README.md') # => { "eol" => "lf }
class Attributes
# path - The path to the Git repository.
def initialize(path)
@path = File.expand_path(path)
@patterns = nil
end
# Returns all the Git attributes for the given path.
#
# path - A path to a file for which to get the attributes.
#
# Returns a Hash.
def attributes(path)
full_path = File.join(@path, path)
patterns.each do |pattern, attrs|
return attrs if File.fnmatch?(pattern, full_path)
end
{}
end
# Returns a Hash containing the file patterns and their attributes.
def patterns
@patterns ||= parse_file
end
# Parses an attribute string.
#
# These strings can be in the following formats:
#
# text # => { "text" => true }
# -text # => { "text" => false }
# key=value # => { "key" => "value" }
#
# string - The string to parse.
#
# Returns a Hash containing the attributes and their values.
def parse_attributes(string)
values = {}
dash = '-'
equal = '='
binary = 'binary'
string.split(/\s+/).each do |chunk|
# Data such as "foo = bar" should be treated as "foo" and "bar" being
# separate boolean attributes.
next if chunk == equal
key = chunk
# Input: "-foo"
if chunk.start_with?(dash)
key = chunk.byteslice(1, chunk.length - 1)
value = false
# Input: "foo=bar"
elsif chunk.include?(equal)
key, value = chunk.split(equal, 2)
# Input: "foo"
else
value = true
end
values[key] = value
# When the "binary" option is set the "diff" option should be set to
# the inverse. If "diff" is later set it should overwrite the
# automatically set value.
values['diff'] = false if key == binary && value
end
values
end
# Iterates over every line in the attributes file.
def each_line
full_path = File.join(@path, 'info/attributes')
return unless File.exist?(full_path)
File.open(full_path, 'r') do |handle|
handle.each_line do |line|
break unless line.valid_encoding?
yield line.strip
end
end
end
private
# Parses the Git attributes file.
def parse_file
pairs = []
comment = '#'
each_line do |line|
next if line.start_with?(comment) || line.empty?
pattern, attrs = line.split(/\s+/, 2)
parsed = attrs ? parse_attributes(attrs) : {}
pairs << [File.join(@path, pattern), parsed]
end
# Newer entries take precedence over older entries.
pairs.reverse.to_h
end
end
end
end
require_relative 'encoding_helper'
module Gitlab
module Git
class Blame
include Gitlab::Git::EncodingHelper
attr_reader :lines, :blames
def initialize(repository, sha, path)
@repo = repository
@sha = sha
@path = path
@lines = []
@blames = load_blame
end
def each
@blames.each do |blame|
yield(
Gitlab::Git::Commit.new(blame.commit),
blame.line
)
end
end
private
def load_blame
cmd = %W(git --git-dir=#{@repo.path} blame -p #{@sha} -- #{@path})
# Read in binary mode to ensure ASCII-8BIT
raw_output = IO.popen(cmd, 'rb') {|io| io.read }
output = encode_utf8(raw_output)
process_raw_blame output
end
def process_raw_blame(output)
lines, final = [], []
info, commits = {}, {}
# process the output
output.split("\n").each do |line|
if line[0, 1] == "\t"
lines << line[1, line.size]
elsif m = /^(\w{40}) (\d+) (\d+)/.match(line)
commit_id, old_lineno, lineno = m[1], m[2].to_i, m[3].to_i
commits[commit_id] = nil unless commits.key?(commit_id)
info[lineno] = [commit_id, old_lineno]
end
end
# load all commits in single call
commits.keys.each do |key|
commits[key] = @repo.lookup(key)
end
# get it together
info.sort.each do |lineno, (commit_id, old_lineno)|
commit = commits[commit_id]
final << BlameLine.new(lineno, old_lineno, commit, lines[lineno - 1])
end
@lines = final
end
end
class BlameLine
attr_accessor :lineno, :oldlineno, :commit, :line
def initialize(lineno, oldlineno, commit, line)
@lineno = lineno
@oldlineno = oldlineno
@commit = commit
@line = line
end
end
end
end
require_relative 'encoding_helper'
require_relative 'path_helper'
module Gitlab
module Git
class Blob
include Linguist::BlobHelper
include Gitlab::Git::EncodingHelper
# This number is the maximum amount of data that we want to display to
# the user. We load as much as we can for encoding detection
# (Linguist) and LFS pointer parsing. All other cases where we need full
# blob data should use load_all_data!.
MAX_DATA_DISPLAY_SIZE = 10485760
attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary
class << self
def find(repository, sha, path)
commit = repository.lookup(sha)
root_tree = commit.tree
blob_entry = find_entry_by_path(repository, root_tree.oid, path)
return nil unless blob_entry
if blob_entry[:type] == :commit
submodule_blob(blob_entry, path, sha)
else
blob = repository.lookup(blob_entry[:oid])
if blob
Blob.new(
id: blob.oid,
name: blob_entry[:name],
size: blob.size,
data: blob.content(MAX_DATA_DISPLAY_SIZE),
mode: blob_entry[:filemode].to_s(8),
path: path,
commit_id: sha,
binary: blob.binary?
)
end
end
end
def raw(repository, sha)
blob = repository.lookup(sha)
Blob.new(
id: blob.oid,
size: blob.size,
data: blob.content(MAX_DATA_DISPLAY_SIZE),
binary: blob.binary?
)
end
# Recursive search of blob id by path
#
# Ex.
# blog/ # oid: 1a
# app/ # oid: 2a
# models/ # oid: 3a
# file.rb # oid: 4a
#
#
# Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a'
#
def find_entry_by_path(repository, root_id, path)
root_tree = repository.lookup(root_id)
# Strip leading slashes
path[/^\/*/] = ''
path_arr = path.split('/')
entry = root_tree.find do |entry|
entry[:name] == path_arr[0]
end
return nil unless entry
if path_arr.size > 1
return nil unless entry[:type] == :tree
path_arr.shift
find_entry_by_path(repository, entry[:oid], path_arr.join('/'))
else
[:blob, :commit].include?(entry[:type]) ? entry : nil
end
end
def submodule_blob(blob_entry, path, sha)
Blob.new(
id: blob_entry[:oid],
name: blob_entry[:name],
data: '',
path: path,
commit_id: sha,
)
end
# Commit file in repository and return commit sha
#
# options should contain next structure:
# file: {
# content: 'Lorem ipsum...',
# path: 'documents/story.txt',
# update: true
# },
# author: {
# email: 'user@example.com',
# name: 'Test User',
# time: Time.now
# },
# committer: {
# email: 'user@example.com',
# name: 'Test User',
# time: Time.now
# },
# commit: {
# message: 'Wow such commit',
# branch: 'master',
# update_ref: false
# }
#
# rubocop:disable Metrics/AbcSize
# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/PerceivedComplexity
def commit(repository, options, action = :add)
file = options[:file]
update = file[:update].nil? ? true : file[:update]
author = options[:author]
committer = options[:committer]
commit = options[:commit]
repo = repository.rugged
ref = commit[:branch]
update_ref = commit[:update_ref].nil? ? true : commit[:update_ref]
parents = []
mode = 0o100644
unless ref.start_with?('refs/')
ref = 'refs/heads/' + ref
end
path_name = PathHelper.normalize_path(file[:path])
# Abort if any invalid characters remain (e.g. ../foo)
raise Repository::InvalidBlobName.new("Invalid path") if path_name.each_filename.to_a.include?('..')
filename = path_name.to_s
index = repo.index
unless repo.empty?
rugged_ref = repo.references[ref]
raise Repository::InvalidRef.new("Invalid branch name") unless rugged_ref
last_commit = rugged_ref.target
index.read_tree(last_commit.tree)
parents = [last_commit]
end
if action == :remove
index.remove(filename)
else
file_entry = index.get(filename)
if action == :rename
old_path_name = PathHelper.normalize_path(file[:previous_path])
old_filename = old_path_name.to_s
file_entry = index.get(old_filename)
index.remove(old_filename) unless file_entry.blank?
end
if file_entry
raise Repository::InvalidBlobName.new("Filename already exists; update not allowed") unless update
# Preserve the current file mode if one is available
mode = file_entry[:mode] if file_entry[:mode]
end
content = file[:content]
detect = CharlockHolmes::EncodingDetector.new.detect(content) if content
unless detect && detect[:type] == :binary
# When writing to the repo directly as we are doing here,
# the `core.autocrlf` config isn't taken into account.
content.gsub!("\r\n", "\n") if repository.autocrlf
end
oid = repo.write(content, :blob)
index.add(path: filename, oid: oid, mode: mode)
end
opts = {}
opts[:tree] = index.write_tree(repo)
opts[:author] = author
opts[:committer] = committer
opts[:message] = commit[:message]
opts[:parents] = parents
opts[:update_ref] = ref if update_ref
Rugged::Commit.create(repo, opts)
end
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/CyclomaticComplexity
# rubocop:enable Metrics/PerceivedComplexity
# Remove file from repository and return commit sha
#
# options should contain next structure:
# file: {
# path: 'documents/story.txt'
# },
# author: {
# email: 'user@example.com',
# name: 'Test User',
# time: Time.now
# },
# committer: {
# email: 'user@example.com',
# name: 'Test User',
# time: Time.now
# },
# commit: {
# message: 'Remove FILENAME',
# branch: 'master'
# }
#
def remove(repository, options)
commit(repository, options, :remove)
end
# Rename file from repository and return commit sha
#
# options should contain next structure:
# file: {
# previous_path: 'documents/old_story.txt'
# path: 'documents/story.txt'
# content: 'Lorem ipsum...',
# update: true
# },
# author: {
# email: 'user@example.com',
# name: 'Test User',
# time: Time.now
# },
# committer: {
# email: 'user@example.com',
# name: 'Test User',
# time: Time.now
# },
# commit: {
# message: 'Rename FILENAME',
# branch: 'master'
# }
#
def rename(repository, options)
commit(repository, options, :rename)
end
end
def initialize(options)
%w(id name path size data mode commit_id binary).each do |key|
self.send("#{key}=", options[key.to_sym])
end
@loaded_all_data = false
# Retain the actual size before it is encoded
@loaded_size = @data.bytesize if @data
end
def binary?
@binary.nil? ? super : @binary == true
end
def empty?
!data || data == ''
end
def data
encode! @data
end
# Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into
# memory as a Ruby string.
def load_all_data!(repository)
return if @data == '' # don't mess with submodule blobs
return @data if @loaded_all_data
@loaded_all_data = true
@data = repository.lookup(id).content
@loaded_size = @data.bytesize
end
def name
encode! @name
end
# Valid LFS object pointer is a text file consisting of
# version
# oid
# size
# see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer
def lfs_pointer?
has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
end
def lfs_oid
if has_lfs_version_key?
oid = data.match(/(?<=sha256:)([0-9a-f]{64})/)
return oid[1] if oid
end
nil
end
def lfs_size
if has_lfs_version_key?
size = data.match(/(?<=size )([0-9]+)/)
return size[1] if size
end
nil
end
def truncated?
size && (size > loaded_size)
end
private
def has_lfs_version_key?
!empty? && text? && data.start_with?("version https://git-lfs.github.com/spec")
end
end
end
end
module Gitlab
module Git
class BlobSnippet
include Linguist::BlobHelper
attr_accessor :ref
attr_accessor :lines
attr_accessor :filename
attr_accessor :startline
def initialize(ref, lines, startline, filename)
@ref, @lines, @startline, @filename = ref, lines, startline, filename
end
def data
lines.join("\n") if lines
end
def name
filename
end
def size
data.length
end
def mode
nil
end
end
end
end
module Gitlab
module Git
class Branch < Ref
end
end
end
# Gitlab::Git::Commit is a wrapper around native Rugged::Commit object
module Gitlab
module Git
class Commit
include Gitlab::Git::EncodingHelper
attr_accessor :raw_commit, :head, :refs
SERIALIZE_KEYS = [
:id, :message, :parent_ids,
:authored_date, :author_name, :author_email,
:committed_date, :committer_name, :committer_email
].freeze
attr_accessor *SERIALIZE_KEYS # rubocop:disable Lint/AmbiguousOperator
def ==(other)
return false unless other.is_a?(Gitlab::Git::Commit)
methods = [:message, :parent_ids, :authored_date, :author_name,
:author_email, :committed_date, :committer_name,
:committer_email]
methods.all? do |method|
send(method) == other.send(method)
end
end
class << self
# Get commits collection
#
# Ex.
# Commit.where(
# repo: repo,
# ref: 'master',
# path: 'app/models',
# limit: 10,
# offset: 5,
# )
#
def where(options)
repo = options.delete(:repo)
raise 'Gitlab::Git::Repository is required' unless repo.respond_to?(:log)
repo.log(options).map { |c| decorate(c) }
end
# Get single commit
#
# Ex.
# Commit.find(repo, '29eda46b')
#
# Commit.find(repo, 'master')
#
def find(repo, commit_id = "HEAD")
return decorate(commit_id) if commit_id.is_a?(Rugged::Commit)
obj = if commit_id.is_a?(String)
repo.rev_parse_target(commit_id)
else
Ref.dereference_object(commit_id)
end
return nil unless obj.is_a?(Rugged::Commit)
decorate(obj)
rescue Rugged::ReferenceError, Rugged::InvalidError, Rugged::ObjectError, Gitlab::Git::Repository::NoRepository
nil
end
# Get last commit for HEAD
#
# Ex.
# Commit.last(repo)
#
def last(repo)
find(repo)
end
# Get last commit for specified path and ref
#
# Ex.
# Commit.last_for_path(repo, '29eda46b', 'app/models')
#
# Commit.last_for_path(repo, 'master', 'Gemfile')
#
def last_for_path(repo, ref, path = nil)
where(
repo: repo,
ref: ref,
path: path,
limit: 1
).first
end
# Get commits between two revspecs
# See also #repository.commits_between
#
# Ex.
# Commit.between(repo, '29eda46b', 'master')
#
def between(repo, base, head)
repo.commits_between(base, head).map do |commit|
decorate(commit)
end
rescue Rugged::ReferenceError
[]
end
# Delegate Repository#find_commits
def find_all(repo, options = {})
repo.find_commits(options)
end
def decorate(commit, ref = nil)
Gitlab::Git::Commit.new(commit, ref)
end
# Returns a diff object for the changes introduced by +rugged_commit+.
# If +rugged_commit+ doesn't have a parent, then the diff is between
# this commit and an empty repo. See Repository#diff for the keys
# allowed in the +options+ hash.
def diff_from_parent(rugged_commit, options = {})
options ||= {}
break_rewrites = options[:break_rewrites]
actual_options = Diff.filter_diff_options(options)
diff = if rugged_commit.parents.empty?
rugged_commit.diff(actual_options.merge(reverse: true))
else
rugged_commit.parents[0].diff(rugged_commit, actual_options)
end
diff.find_similar!(break_rewrites: break_rewrites)
diff
end
end
def initialize(raw_commit, head = nil)
raise "Nil as raw commit passed" unless raw_commit
if raw_commit.is_a?(Hash)
init_from_hash(raw_commit)
elsif raw_commit.is_a?(Rugged::Commit)
init_from_rugged(raw_commit)
else
raise "Invalid raw commit type: #{raw_commit.class}"
end
@head = head
end
def sha
id
end
def short_id(length = 10)
id.to_s[0..length]
end
def safe_message
@safe_message ||= message
end
def created_at
committed_date
end
# Was this commit committed by a different person than the original author?
def different_committer?
author_name != committer_name || author_email != committer_email
end
def parent_id
parent_ids.first
end
# Shows the diff between the commit's parent and the commit.
#
# Cuts out the header and stats from #to_patch and returns only the diff.
def to_diff(options = {})
diff_from_parent(options).patch
end
# Returns a diff object for the changes from this commit's first parent.
# If there is no parent, then the diff is between this commit and an
# empty repo. See Repository#diff for keys allowed in the +options+
# hash.
def diff_from_parent(options = {})
Commit.diff_from_parent(raw_commit, options)
end
def has_zero_stats?
stats.total.zero?
rescue
true
end
def no_commit_message
"--no commit message"
end
def to_hash
serialize_keys.map.with_object({}) do |key, hash|
hash[key] = send(key)
end
end
def date
committed_date
end
def diffs(options = {})
DiffCollection.new(diff_from_parent(options), options)
end
def parents
raw_commit.parents.map { |c| Gitlab::Git::Commit.new(c) }
end
def tree
raw_commit.tree
end
def stats
Gitlab::Git::CommitStats.new(self)
end
def to_patch(options = {})
begin
raw_commit.to_mbox(options)
rescue Rugged::InvalidError => ex
if ex.message =~ /Commit \w+ is a merge commit/
'Patch format is not currently supported for merge commits.'
end
end
end
# Get a collection of Rugged::Reference objects for this commit.
#
# Ex.
# commit.ref(repo)
#
def refs(repo)
repo.refs_hash[id]
end
# Get ref names collection
#
# Ex.
# commit.ref_names(repo)
#
def ref_names(repo)
refs(repo).map do |ref|
ref.name.sub(%r{^refs/(heads|remotes|tags)/}, "")
end
end
def message
encode! @message
end
def author_name
encode! @author_name
end
def author_email
encode! @author_email
end
def committer_name
encode! @committer_name
end
def committer_email
encode! @committer_email
end
private
def init_from_hash(hash)
raw_commit = hash.symbolize_keys
serialize_keys.each do |key|
send("#{key}=", raw_commit[key])
end
end
def init_from_rugged(commit)
author = commit.author
committer = commit.committer
@raw_commit = commit
@id = commit.oid
@message = commit.message
@authored_date = author[:time]
@committed_date = committer[:time]
@author_name = author[:name]
@author_email = author[:email]
@committer_name = committer[:name]
@committer_email = committer[:email]
@parent_ids = commit.parents.map(&:oid)
end
def serialize_keys
SERIALIZE_KEYS
end
end
end
end
# Gitlab::Git::CommitStats counts the additions, deletions, and total changes
# in a commit.
module Gitlab
module Git
class CommitStats
attr_reader :id, :additions, :deletions, :total
# Instantiate a CommitStats object
def initialize(commit)
@id = commit.id
@additions = 0
@deletions = 0
@total = 0
diff = commit.diff_from_parent
diff.each_patch do |p|
# TODO: Use the new Rugged convenience methods when they're released
@additions += p.stat[0]
@deletions += p.stat[1]
@total += p.changes
end
end
end
end
end
module Gitlab
module Git
class Compare
attr_reader :head, :base, :straight
def initialize(repository, base, head, straight = false)
@repository = repository
@straight = straight
unless base && head
@commits = []
return
end
@base = Gitlab::Git::Commit.find(repository, base.try(:strip))
@head = Gitlab::Git::Commit.find(repository, head.try(:strip))
@commits = [] unless @base && @head
@commits = [] if same
end
def same
@base && @head && @base.id == @head.id
end
def commits
return @commits if defined?(@commits)
@commits = Gitlab::Git::Commit.between(@repository, @base.id, @head.id)
end
def diffs(options = {})
unless @head && @base
return Gitlab::Git::DiffCollection.new([])
end
paths = options.delete(:paths) || []
options[:straight] = @straight
Gitlab::Git::Diff.between(@repository, @head.id, @base.id, options, *paths)
end
end
end
end
# Gitlab::Git::Diff is a wrapper around native Rugged::Diff object
module Gitlab
module Git
class Diff
class TimeoutError < StandardError; end
include Gitlab::Git::EncodingHelper
# Diff properties
attr_accessor :old_path, :new_path, :a_mode, :b_mode, :diff
# Stats properties
attr_accessor :new_file, :renamed_file, :deleted_file
attr_accessor :too_large
# The maximum size of a diff to display.
DIFF_SIZE_LIMIT = 102400 # 100 KB
# The maximum size before a diff is collapsed.
DIFF_COLLAPSE_LIMIT = 10240 # 10 KB
class << self
def between(repo, head, base, options = {}, *paths)
straight = options.delete(:straight) || false
common_commit = if straight
base
else
# Only show what is new in the source branch
# compared to the target branch, not the other way
# around. The linex below with merge_base is
# equivalent to diff with three dots (git diff
# branch1...branch2) From the git documentation:
# "git diff A...B" is equivalent to "git diff
# $(git-merge-base A B) B"
repo.merge_base_commit(head, base)
end
options ||= {}
actual_options = filter_diff_options(options)
repo.diff(common_commit, head, actual_options, *paths)
end
# Return a copy of the +options+ hash containing only keys that can be
# passed to Rugged. Allowed options are:
#
# :max_size ::
# An integer specifying the maximum byte size of a file before a it
# will be treated as binary. The default value is 512MB.
#
# :context_lines ::
# The number of unchanged lines that define the boundary of a hunk
# (and to display before and after the actual changes). The default is
# 3.
#
# :interhunk_lines ::
# The maximum number of unchanged lines between hunk boundaries before
# the hunks will be merged into a one. The default is 0.
#
# :old_prefix ::
# The virtual "directory" to prefix to old filenames in hunk headers.
# The default is "a".
#
# :new_prefix ::
# The virtual "directory" to prefix to new filenames in hunk headers.
# The default is "b".
#
# :reverse ::
# If true, the sides of the diff will be reversed.
#
# :force_text ::
# If true, all files will be treated as text, disabling binary
# attributes & detection.
#
# :ignore_whitespace ::
# If true, all whitespace will be ignored.
#
# :ignore_whitespace_change ::
# If true, changes in amount of whitespace will be ignored.
#
# :ignore_whitespace_eol ::
# If true, whitespace at end of line will be ignored.
#
# :ignore_submodules ::
# if true, submodules will be excluded from the diff completely.
#
# :patience ::
# If true, the "patience diff" algorithm will be used (currenlty
# unimplemented).
#
# :include_ignored ::
# If true, ignored files will be included in the diff.
#
# :include_untracked ::
# If true, untracked files will be included in the diff.
#
# :include_unmodified ::
# If true, unmodified files will be included in the diff.
#
# :recurse_untracked_dirs ::
# Even if +:include_untracked+ is true, untracked directories will
# only be marked with a single entry in the diff. If this flag is set
# to true, all files under ignored directories will be included in the
# diff, too.
#
# :disable_pathspec_match ::
# If true, the given +*paths+ will be applied as exact matches,
# instead of as fnmatch patterns.
#
# :deltas_are_icase ::
# If true, filename comparisons will be made with case-insensitivity.
#
# :include_untracked_content ::
# if true, untracked content will be contained in the the diff patch
# text.
#
# :skip_binary_check ::
# If true, diff deltas will be generated without spending time on
# binary detection. This is useful to improve performance in cases
# where the actual file content difference is not needed.
#
# :include_typechange ::
# If true, type changes for files will not be interpreted as deletion
# of the "old file" and addition of the "new file", but will generate
# typechange records.
#
# :include_typechange_trees ::
# Even if +:include_typechange+ is true, blob -> tree changes will
# still usually be handled as a deletion of the blob. If this flag is
# set to true, blob -> tree changes will be marked as typechanges.
#
# :ignore_filemode ::
# If true, file mode changes will be ignored.
#
# :recurse_ignored_dirs ::
# Even if +:include_ignored+ is true, ignored directories will only be
# marked with a single entry in the diff. If this flag is set to true,
# all files under ignored directories will be included in the diff,
# too.
def filter_diff_options(options, default_options = {})
allowed_options = [:max_size, :context_lines, :interhunk_lines,
:old_prefix, :new_prefix, :reverse, :force_text,
:ignore_whitespace, :ignore_whitespace_change,
:ignore_whitespace_eol, :ignore_submodules,
:patience, :include_ignored, :include_untracked,
:include_unmodified, :recurse_untracked_dirs,
:disable_pathspec_match, :deltas_are_icase,
:include_untracked_content, :skip_binary_check,
:include_typechange, :include_typechange_trees,
:ignore_filemode, :recurse_ignored_dirs, :paths,
:max_files, :max_lines, :all_diffs, :no_collapse]
if default_options
actual_defaults = default_options.dup
actual_defaults.keep_if do |key|
allowed_options.include?(key)
end
else
actual_defaults = {}
end
if options
filtered_opts = options.dup
filtered_opts.keep_if do |key|
allowed_options.include?(key)
end
filtered_opts = actual_defaults.merge(filtered_opts)
else
filtered_opts = actual_defaults
end
filtered_opts
end
end
def initialize(raw_diff, collapse: false)
case raw_diff
when Hash
init_from_hash(raw_diff, collapse: collapse)
when Rugged::Patch, Rugged::Diff::Delta
init_from_rugged(raw_diff, collapse: collapse)
when nil
raise "Nil as raw diff passed"
else
raise "Invalid raw diff type: #{raw_diff.class}"
end
end
def serialize_keys
@serialize_keys ||= %i(diff new_path old_path a_mode b_mode new_file renamed_file deleted_file too_large)
end
def to_hash
hash = {}
keys = serialize_keys
keys.each do |key|
hash[key] = send(key)
end
hash
end
def submodule?
a_mode == '160000' || b_mode == '160000'
end
def line_count
@line_count ||= Util.count_lines(@diff)
end
def too_large?
if @too_large.nil?
@too_large = @diff.bytesize >= DIFF_SIZE_LIMIT
else
@too_large
end
end
def collapsible?
@diff.bytesize >= DIFF_COLLAPSE_LIMIT
end
def prune_large_diff!
@diff = ''
@line_count = 0
@too_large = true
end
def collapsed?
return @collapsed if defined?(@collapsed)
false
end
def prune_collapsed_diff!
@diff = ''
@line_count = 0
@collapsed = true
end
private
def init_from_rugged(rugged, collapse: false)
if rugged.is_a?(Rugged::Patch)
init_from_rugged_patch(rugged, collapse: collapse)
d = rugged.delta
else
d = rugged
end
@new_path = encode!(d.new_file[:path])
@old_path = encode!(d.old_file[:path])
@a_mode = d.old_file[:mode].to_s(8)
@b_mode = d.new_file[:mode].to_s(8)
@new_file = d.added?
@renamed_file = d.renamed?
@deleted_file = d.deleted?
end
def init_from_rugged_patch(patch, collapse: false)
# Don't bother initializing diffs that are too large. If a diff is
# binary we're not going to display anything so we skip the size check.
return if !patch.delta.binary? && prune_large_patch(patch, collapse)
@diff = encode!(strip_diff_headers(patch.to_s))
end
def init_from_hash(hash, collapse: false)
raw_diff = hash.symbolize_keys
serialize_keys.each do |key|
send(:"#{key}=", raw_diff[key.to_sym])
end
prune_large_diff! if too_large?
prune_collapsed_diff! if collapse && collapsible?
end
# If the patch surpasses any of the diff limits it calls the appropiate
# prune method and returns true. Otherwise returns false.
def prune_large_patch(patch, collapse)
size = 0
patch.each_hunk do |hunk|
hunk.each_line do |line|
size += line.content.bytesize
if size >= DIFF_SIZE_LIMIT
prune_large_diff!
return true
end
end
end
if collapse && size >= DIFF_COLLAPSE_LIMIT
prune_collapsed_diff!
return true
end
false
end
# Strip out the information at the beginning of the patch's text to match
# Grit's output
def strip_diff_headers(diff_text)
# Delete everything up to the first line that starts with '---' or
# 'Binary'
diff_text.sub!(/\A.*?^(---|Binary)/m, '\1')
if diff_text.start_with?('---', 'Binary')
diff_text
else
# If the diff_text did not contain a line starting with '---' or
# 'Binary', return the empty string. No idea why; we are just
# preserving behavior from before the refactor.
''
end
end
end
end
end
module Gitlab
module Git
class DiffCollection
include Enumerable
DEFAULT_LIMITS = { max_files: 100, max_lines: 5000 }.freeze
def initialize(iterator, options = {})
@iterator = iterator
@max_files = options.fetch(:max_files, DEFAULT_LIMITS[:max_files])
@max_lines = options.fetch(:max_lines, DEFAULT_LIMITS[:max_lines])
@max_bytes = @max_files * 5120 # Average 5 KB per file
@safe_max_files = [@max_files, DEFAULT_LIMITS[:max_files]].min
@safe_max_lines = [@max_lines, DEFAULT_LIMITS[:max_lines]].min
@safe_max_bytes = @safe_max_files * 5120 # Average 5 KB per file
@all_diffs = !!options.fetch(:all_diffs, false)
@no_collapse = !!options.fetch(:no_collapse, true)
@deltas_only = !!options.fetch(:deltas_only, false)
@line_count = 0
@byte_count = 0
@overflow = false
@array = Array.new
end
def each(&block)
if @populated
# @iterator.each is slower than just iterating the array in place
@array.each(&block)
elsif @deltas_only
each_delta(&block)
else
each_patch(&block)
end
end
def empty?
!@iterator.any?
end
def overflow?
populate!
!!@overflow
end
def size
@size ||= count # forces a loop using each method
end
def real_size
populate!
if @overflow
"#{size}+"
else
size.to_s
end
end
def decorate!
collection = each_with_index do |element, i|
@array[i] = yield(element)
end
@populated = true
collection
end
private
def populate!
return if @populated
each { nil } # force a loop through all diffs
@populated = true
nil
end
def over_safe_limits?(files)
files >= @safe_max_files || @line_count > @safe_max_lines || @byte_count >= @safe_max_bytes
end
def each_delta
@iterator.each_delta.with_index do |delta, i|
diff = Gitlab::Git::Diff.new(delta)
yield @array[i] = diff
end
end
def each_patch
@iterator.each_with_index do |raw, i|
# First yield cached Diff instances from @array
if @array[i]
yield @array[i]
next
end
# We have exhausted @array, time to create new Diff instances or stop.
break if @overflow
if !@all_diffs && i >= @max_files
@overflow = true
break
end
collapse = !@all_diffs && !@no_collapse
diff = Gitlab::Git::Diff.new(raw, collapse: collapse)
if collapse && over_safe_limits?(i)
diff.prune_collapsed_diff!
end
@line_count += diff.line_count
@byte_count += diff.diff.bytesize
if !@all_diffs && (@line_count >= @max_lines || @byte_count >= @max_bytes)
# This last Diff instance pushes us over the lines limit. We stop and
# discard it.
@overflow = true
break
end
yield @array[i] = diff
end
end
end
end
end
module Gitlab
module Git
module EncodingHelper
extend self
# This threshold is carefully tweaked to prevent usage of encodings detected
# by CharlockHolmes with low confidence. If CharlockHolmes confidence is low,
# we're better off sticking with utf8 encoding.
# Reason: git diff can return strings with invalid utf8 byte sequences if it
# truncates a diff in the middle of a multibyte character. In this case
# CharlockHolmes will try to guess the encoding and will likely suggest an
# obscure encoding with low confidence.
# There is a lot more info with this merge request:
# https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193
ENCODING_CONFIDENCE_THRESHOLD = 40
def encode!(message)
return nil unless message.respond_to? :force_encoding
# if message is utf-8 encoding, just return it
message.force_encoding("UTF-8")
return message if message.valid_encoding?
# return message if message type is binary
detect = CharlockHolmes::EncodingDetector.detect(message)
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
# force detected encoding if we have sufficient confidence.
if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD
message.force_encoding(detect[:encoding])
end
# encode and clean the bad chars
message.replace clean(message)
rescue
encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}"
end
def encode_utf8(message)
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect
CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8')
else
clean(message)
end
end
private
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8")
.gsub("\0".encode("UTF-8"), "")
end
end
end
end
module Gitlab
module Git
class PathHelper
class << self
def normalize_path(filename)
# Strip all leading slashes so that //foo -> foo
filename[/^\/*/] = ''
# Expand relative paths (e.g. foo/../bar)
filename = Pathname.new(filename)
filename.relative_path_from(Pathname.new(''))
end
end
end
end
end
require 'open3'
module Gitlab
module Git
module Popen
def popen(cmd, path)
unless cmd.is_a?(Array)
raise "System commands must be given as an array of strings"
end
vars = { "PWD" => path }
options = { chdir: path }
@cmd_output = ""
@cmd_status = 0
Open3.popen3(vars, *cmd, options) do |stdin, stdout, stderr, wait_thr|
@cmd_output << stdout.read
@cmd_output << stderr.read
@cmd_status = wait_thr.value.exitstatus
end
[@cmd_output, @cmd_status]
end
end
end
end
module Gitlab
module Git
class Ref
include Gitlab::Git::EncodingHelper
# Branch or tag name
# without "refs/tags|heads" prefix
attr_reader :name
# Target sha.
# Usually it is commit sha but in case
# when tag reference on other tag it can be tag sha
attr_reader :target
# Dereferenced target
# Commit object to which the Ref points to
attr_reader :dereferenced_target
# Extract branch name from full ref path
#
# Ex.
# Ref.extract_branch_name('refs/heads/master') #=> 'master'
def self.extract_branch_name(str)
str.gsub(/\Arefs\/heads\//, '')
end
def self.dereference_object(object)
object = object.target while object.is_a?(Rugged::Tag::Annotation)
object
end
def initialize(repository, name, target)
encode! name
@name = name.gsub(/\Arefs\/(tags|heads)\//, '')
@dereferenced_target = Commit.find(repository, target)
@target = if target.respond_to?(:oid)
target.oid
elsif target.respond_to?(:name)
target.name
elsif target.is_a? String
target
else
nil
end
end
end
end
end
This diff is collapsed.
module Gitlab
module Git
class Tag < Ref
attr_reader :object_sha
def initialize(repository, name, target, message = nil)
super(repository, name, target)
@message = message
end
def message
encode! @message
end
end
end
end
module Gitlab
module Git
class Tree
include Gitlab::Git::EncodingHelper
attr_accessor :id, :root_id, :name, :path, :type,
:mode, :commit_id, :submodule_url
class << self
# Get list of tree objects
# for repository based on commit sha and path
# Uses rugged for raw objects
def where(repository, sha, path = nil)
path = nil if path == '' || path == '/'
commit = repository.lookup(sha)
root_tree = commit.tree
tree = if path
id = Tree.find_id_by_path(repository, root_tree.oid, path)
if id
repository.lookup(id)
else
[]
end
else
root_tree
end
tree.map do |entry|
Tree.new(
id: entry[:oid],
root_id: root_tree.oid,
name: entry[:name],
type: entry[:type],
mode: entry[:filemode],
path: path ? File.join(path, entry[:name]) : entry[:name],
commit_id: sha,
)
end
end
# Recursive search of tree id for path
#
# Ex.
# blog/ # oid: 1a
# app/ # oid: 2a
# models/ # oid: 3a
# views/ # oid: 4a
#
#
# Tree.find_id_by_path(repo, '1a', 'app/models') # => '3a'
#
def find_id_by_path(repository, root_id, path)
root_tree = repository.lookup(root_id)
path_arr = path.split('/')
entry = root_tree.find do |entry|
entry[:name] == path_arr[0] && entry[:type] == :tree
end
return nil unless entry
if path_arr.size > 1
path_arr.shift
find_id_by_path(repository, entry[:oid], path_arr.join('/'))
else
entry[:oid]
end
end
end
def initialize(options)
%w(id root_id name path type mode commit_id).each do |key|
self.send("#{key}=", options[key.to_sym])
end
end
def name
encode! @name
end
def dir?
type == :tree
end
def file?
type == :blob
end
def submodule?
type == :commit
end
def readme?
name =~ /^readme/i
end
def contributing?
name =~ /^contributing/i
end
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment