From 918397797178f667d3d08f36893b7c17bc11f263 Mon Sep 17 00:00:00 2001 From: Stan Hu <stanhu@gmail.com> Date: Sat, 4 Jun 2016 16:37:48 -0700 Subject: [PATCH 1/3] Increase the max amount of blob data to load from 1K to 10 MB Before we were only loading 1024 bytes, which could lead to false negative detection of binary data since binary characters could come after the 1024-byte mark. Instead, we should load as much data as we are willing to display to ensure that this does not happen. Relates to gitlab-org/gitlab-ce#13826 --- CHANGELOG | 3 +++ lib/gitlab_git/blob.rb | 15 ++++++++------- spec/blob_spec.rb | 8 ++++---- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 771e4ce..704f986 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,6 @@ +v 10.1.3 + - Increase the max amount of blob data to load from 1K to 10 MB + v 10.1.2 - DiffCollections now enforce a total byte limit of 5 KB * max_files diff --git a/lib/gitlab_git/blob.rb b/lib/gitlab_git/blob.rb index 275d88d..9ab33b9 100644 --- a/lib/gitlab_git/blob.rb +++ b/lib/gitlab_git/blob.rb @@ -7,10 +7,11 @@ module Gitlab include Linguist::BlobHelper include EncodingHelper - # This number needs to be large enough to allow reliable content / - # encoding detection (Linguist) and LFS pointer parsing. All other cases - # where we need full blob data should use load_all_data!. - DATA_FRAGMENT_SIZE = 1024 + # This number is the maximum amount of data that we want to display to + # the user. We load as much as we can for encoding detection + # (Linguist) and LFS pointer parsing. All other cases where we need full + # blob data should use load_all_data!. + MAX_DATA_DISPLAY_SIZE = 10485760 attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id @@ -33,7 +34,7 @@ module Gitlab id: blob.oid, name: blob_entry[:name], size: blob.size, - data: blob.content(DATA_FRAGMENT_SIZE), + data: blob.content(MAX_DATA_DISPLAY_SIZE), mode: blob_entry[:filemode].to_s(8), path: path, commit_id: sha, @@ -48,7 +49,7 @@ module Gitlab Blob.new( id: blob.oid, size: blob.size, - data: blob.content(DATA_FRAGMENT_SIZE), + data: blob.content(MAX_DATA_DISPLAY_SIZE), ) end @@ -224,7 +225,7 @@ module Gitlab encode! @data end - # Load all blob data (not just the first DATA_FRAGMENT_SIZE bytes) into + # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into # memory as a Ruby string. def load_all_data!(repository) return if @data == '' # don't mess with submodule blobs diff --git a/spec/blob_spec.rb b/spec/blob_spec.rb index 100f826..2ef7d0e 100644 --- a/spec/blob_spec.rb +++ b/spec/blob_spec.rb @@ -65,10 +65,10 @@ describe Gitlab::Git::Blob do let(:blob_size) { 111803 } it { expect(blob.size).to eq(blob_size) } - it { expect(blob.data.length).to eq(Gitlab::Git::Blob::DATA_FRAGMENT_SIZE) } + it { expect(blob.data.length).to eq(Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE) } it 'check that this test is sane' do - expect(blob.size).to be > Gitlab::Git::Blob::DATA_FRAGMENT_SIZE + expect(blob.size).to be > Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE end it 'can load all data' do @@ -89,10 +89,10 @@ describe Gitlab::Git::Blob do let(:blob_size) { 111803 } it { expect(blob.size).to eq(blob_size) } - it { expect(blob.data.length).to eq(Gitlab::Git::Blob::DATA_FRAGMENT_SIZE) } + it { expect(blob.data.length).to eq(Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE) } it 'check that this test is sane' do - expect(blob.size).to be > Gitlab::Git::Blob::DATA_FRAGMENT_SIZE + expect(blob.size).to be > Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE end end end -- GitLab From 264be9ae9b4a5f8a30f26b79fcdce24ca697d68a Mon Sep 17 00:00:00 2001 From: Stan Hu <stanhu@gmail.com> Date: Sat, 4 Jun 2016 21:34:48 -0700 Subject: [PATCH 2/3] Make specs pass for whatever data size limits used by Blob --- spec/blob_spec.rb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/spec/blob_spec.rb b/spec/blob_spec.rb index 2ef7d0e..0abb69f 100644 --- a/spec/blob_spec.rb +++ b/spec/blob_spec.rb @@ -65,10 +65,10 @@ describe Gitlab::Git::Blob do let(:blob_size) { 111803 } it { expect(blob.size).to eq(blob_size) } - it { expect(blob.data.length).to eq(Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE) } + it { expect(blob.data.length).to eq(blob_size) } it 'check that this test is sane' do - expect(blob.size).to be > Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE + expect(blob.size).to be <= Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE end it 'can load all data' do @@ -85,14 +85,14 @@ describe Gitlab::Git::Blob do it { expect(raw_blob.size).to eq(669) } context 'large file' do - let(:blob) { Gitlab::Git::Blob.raw(repository, '08cf843fd8fe1c50757df0a13fcc44661996b4df') } - let(:blob_size) { 111803 } - - it { expect(blob.size).to eq(blob_size) } - it { expect(blob.data.length).to eq(Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE) } - - it 'check that this test is sane' do - expect(blob.size).to be > Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE + it 'limits the size of a large file' do + blob_size = Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE + 1 + buffer = Array.new(blob_size, 0) + rugged_blob = Rugged::Blob.from_buffer(repository.rugged, buffer.join('')) + blob = Gitlab::Git::Blob.raw(repository, rugged_blob) + + expect(blob.size).to eq(blob_size) + expect(blob.data.length).to eq(Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE) end end end -- GitLab From a5b4fc84116005ba575fba239e394b8785db05a8 Mon Sep 17 00:00:00 2001 From: Stan Hu <stanhu@gmail.com> Date: Wed, 8 Jun 2016 21:24:29 -0700 Subject: [PATCH 3/3] Add method to indicate blob was truncated --- lib/gitlab_git/blob.rb | 4 ++++ spec/blob_spec.rb | 2 ++ 2 files changed, 6 insertions(+) diff --git a/lib/gitlab_git/blob.rb b/lib/gitlab_git/blob.rb index 9ab33b9..b95a97a 100644 --- a/lib/gitlab_git/blob.rb +++ b/lib/gitlab_git/blob.rb @@ -266,6 +266,10 @@ module Gitlab nil end + def truncated? + size > data.size + end + private def has_lfs_version_key? diff --git a/spec/blob_spec.rb b/spec/blob_spec.rb index 0abb69f..415f346 100644 --- a/spec/blob_spec.rb +++ b/spec/blob_spec.rb @@ -83,6 +83,7 @@ describe Gitlab::Git::Blob do it { expect(raw_blob.id).to eq(SeedRepo::RubyBlob::ID) } it { expect(raw_blob.data[0..10]).to eq("require \'fi") } it { expect(raw_blob.size).to eq(669) } + it { expect(raw_blob.truncated?).to be_falsey } context 'large file' do it 'limits the size of a large file' do @@ -93,6 +94,7 @@ describe Gitlab::Git::Blob do expect(blob.size).to eq(blob_size) expect(blob.data.length).to eq(Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE) + expect(blob.truncated?).to be_truthy end end end -- GitLab