diff --git a/CHANGELOG b/CHANGELOG index 771e4ceec2516e250f35336cb7f623304975c1a2..704f98667360a3774f2d0484e8ebde84379113c7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,6 @@ +v 10.1.3 + - Increase the max amount of blob data to load from 1K to 10 MB + v 10.1.2 - DiffCollections now enforce a total byte limit of 5 KB * max_files diff --git a/lib/gitlab_git/blob.rb b/lib/gitlab_git/blob.rb index 275d88d9aa4a7c16015bb2cb82f20b1060a897e7..b95a97acc1fff68320f904f68b5558fc53de1505 100644 --- a/lib/gitlab_git/blob.rb +++ b/lib/gitlab_git/blob.rb @@ -7,10 +7,11 @@ module Gitlab include Linguist::BlobHelper include EncodingHelper - # This number needs to be large enough to allow reliable content / - # encoding detection (Linguist) and LFS pointer parsing. All other cases - # where we need full blob data should use load_all_data!. - DATA_FRAGMENT_SIZE = 1024 + # This number is the maximum amount of data that we want to display to + # the user. We load as much as we can for encoding detection + # (Linguist) and LFS pointer parsing. All other cases where we need full + # blob data should use load_all_data!. + MAX_DATA_DISPLAY_SIZE = 10485760 attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id @@ -33,7 +34,7 @@ module Gitlab id: blob.oid, name: blob_entry[:name], size: blob.size, - data: blob.content(DATA_FRAGMENT_SIZE), + data: blob.content(MAX_DATA_DISPLAY_SIZE), mode: blob_entry[:filemode].to_s(8), path: path, commit_id: sha, @@ -48,7 +49,7 @@ module Gitlab Blob.new( id: blob.oid, size: blob.size, - data: blob.content(DATA_FRAGMENT_SIZE), + data: blob.content(MAX_DATA_DISPLAY_SIZE), ) end @@ -224,7 +225,7 @@ module Gitlab encode! @data end - # Load all blob data (not just the first DATA_FRAGMENT_SIZE bytes) into + # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into # memory as a Ruby string. def load_all_data!(repository) return if @data == '' # don't mess with submodule blobs @@ -265,6 +266,10 @@ module Gitlab nil end + def truncated? + size > data.size + end + private def has_lfs_version_key? diff --git a/spec/blob_spec.rb b/spec/blob_spec.rb index 100f8261200c697a9b242e774927a18de8a242ed..415f346435bf375c29e2afea2a1061e8c8d7102a 100644 --- a/spec/blob_spec.rb +++ b/spec/blob_spec.rb @@ -65,10 +65,10 @@ describe Gitlab::Git::Blob do let(:blob_size) { 111803 } it { expect(blob.size).to eq(blob_size) } - it { expect(blob.data.length).to eq(Gitlab::Git::Blob::DATA_FRAGMENT_SIZE) } + it { expect(blob.data.length).to eq(blob_size) } it 'check that this test is sane' do - expect(blob.size).to be > Gitlab::Git::Blob::DATA_FRAGMENT_SIZE + expect(blob.size).to be <= Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE end it 'can load all data' do @@ -83,16 +83,18 @@ describe Gitlab::Git::Blob do it { expect(raw_blob.id).to eq(SeedRepo::RubyBlob::ID) } it { expect(raw_blob.data[0..10]).to eq("require \'fi") } it { expect(raw_blob.size).to eq(669) } + it { expect(raw_blob.truncated?).to be_falsey } context 'large file' do - let(:blob) { Gitlab::Git::Blob.raw(repository, '08cf843fd8fe1c50757df0a13fcc44661996b4df') } - let(:blob_size) { 111803 } - - it { expect(blob.size).to eq(blob_size) } - it { expect(blob.data.length).to eq(Gitlab::Git::Blob::DATA_FRAGMENT_SIZE) } - - it 'check that this test is sane' do - expect(blob.size).to be > Gitlab::Git::Blob::DATA_FRAGMENT_SIZE + it 'limits the size of a large file' do + blob_size = Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE + 1 + buffer = Array.new(blob_size, 0) + rugged_blob = Rugged::Blob.from_buffer(repository.rugged, buffer.join('')) + blob = Gitlab::Git::Blob.raw(repository, rugged_blob) + + expect(blob.size).to eq(blob_size) + expect(blob.data.length).to eq(Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE) + expect(blob.truncated?).to be_truthy end end end