diff --git a/app/models/concerns/sha_attribute.rb b/app/models/concerns/sha_attribute.rb new file mode 100644 index 0000000000000000000000000000000000000000..c28974a3cdfed941bb940f23743ad45e67e02dd1 --- /dev/null +++ b/app/models/concerns/sha_attribute.rb @@ -0,0 +1,18 @@ +module ShaAttribute + extend ActiveSupport::Concern + + module ClassMethods + def sha_attribute(name) + column = columns.find { |c| c.name == name.to_s } + + # In case the table doesn't exist we won't be able to find the column, + # thus we will only check the type if the column is present. + if column && column.type != :binary + raise ArgumentError, + "sha_attribute #{name.inspect} is invalid since the column type is not :binary" + end + + attribute(name, Gitlab::Database::ShaAttribute.new) + end + end +end diff --git a/doc/development/README.md b/doc/development/README.md index 9496a87d84d3e610da46da10560cbd51ede152c8..a2a07c37ced5047a75a0774e04c11bf262554a41 100644 --- a/doc/development/README.md +++ b/doc/development/README.md @@ -54,6 +54,7 @@ - [Polymorphic Associations](polymorphic_associations.md) - [Single Table Inheritance](single_table_inheritance.md) - [Background Migrations](background_migrations.md) +- [Storing SHA1 Hashes As Binary](sha1_as_binary.md) ## i18n diff --git a/doc/development/sha1_as_binary.md b/doc/development/sha1_as_binary.md new file mode 100644 index 0000000000000000000000000000000000000000..3151cc29bbc67decfb378268ce5f99a36c089151 --- /dev/null +++ b/doc/development/sha1_as_binary.md @@ -0,0 +1,36 @@ +# Storing SHA1 Hashes As Binary + +Storing SHA1 hashes as strings is not very space efficient. A SHA1 as a string +requires at least 40 bytes, an additional byte to store the encoding, and +perhaps more space depending on the internals of PostgreSQL and MySQL. + +On the other hand, if one were to store a SHA1 as binary one would only need 20 +bytes for the actual SHA1, and 1 or 4 bytes of additional space (again depending +on database internals). This means that in the best case scenario we can reduce +the space usage by 50%. + +To make this easier to work with you can include the concern `ShaAttribute` into +a model and define a SHA attribute using the `sha_attribute` class method. For +example: + +```ruby +class Commit < ActiveRecord::Base + include ShaAttribute + + sha_attribute :sha +end +``` + +This allows you to use the value of the `sha` attribute as if it were a string, +while storing it as binary. This means that you can do something like this, +without having to worry about converting data to the right binary format: + +```ruby +commit = Commit.find_by(sha: '88c60307bd1f215095834f09a1a5cb18701ac8ad') +commit.sha = '971604de4cfa324d91c41650fabc129420c8d1cc' +commit.save +``` + +There is however one requirement: the column used to store the SHA has _must_ be +a binary type. For Rails this means you need to use the `:binary` type instead +of `:text` or `:string`. diff --git a/lib/gitlab/database/sha_attribute.rb b/lib/gitlab/database/sha_attribute.rb new file mode 100644 index 0000000000000000000000000000000000000000..d9400e04b83997e0e3ecd9fa289848ba47375942 --- /dev/null +++ b/lib/gitlab/database/sha_attribute.rb @@ -0,0 +1,34 @@ +module Gitlab + module Database + BINARY_TYPE = if Gitlab::Database.postgresql? + # PostgreSQL defines its own class with slightly different + # behaviour from the default Binary type. + ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Bytea + else + ActiveRecord::Type::Binary + end + + # Class for casting binary data to hexadecimal SHA1 hashes (and vice-versa). + # + # Using ShaAttribute allows you to store SHA1 values as binary while still + # using them as if they were stored as string values. This gives you the + # ease of use of string values, but without the storage overhead. + class ShaAttribute < BINARY_TYPE + PACK_FORMAT = 'H*'.freeze + + # Casts binary data to a SHA1 in hexadecimal. + def type_cast_from_database(value) + value = super + + value ? value.unpack(PACK_FORMAT)[0] : nil + end + + # Casts a SHA1 in hexadecimal to the proper binary format. + def type_cast_for_database(value) + arg = value ? [value].pack(PACK_FORMAT) : nil + + super(arg) + end + end + end +end diff --git a/spec/lib/gitlab/database/sha_attribute_spec.rb b/spec/lib/gitlab/database/sha_attribute_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..62c1d37ea1c8d422e2065343367e4f330d99147a --- /dev/null +++ b/spec/lib/gitlab/database/sha_attribute_spec.rb @@ -0,0 +1,33 @@ +require 'spec_helper' + +describe Gitlab::Database::ShaAttribute do + let(:sha) do + '9a573a369a5bfbb9a4a36e98852c21af8a44ea8b' + end + + let(:binary_sha) do + [sha].pack('H*') + end + + let(:binary_from_db) do + if Gitlab::Database.postgresql? + "\\x#{sha}" + else + binary_sha + end + end + + let(:attribute) { described_class.new } + + describe '#type_cast_from_database' do + it 'converts the binary SHA to a String' do + expect(attribute.type_cast_from_database(binary_from_db)).to eq(sha) + end + end + + describe '#type_cast_for_database' do + it 'converts a SHA String to binary data' do + expect(attribute.type_cast_for_database(sha).to_s).to eq(binary_sha) + end + end +end diff --git a/spec/models/concerns/sha_attribute_spec.rb b/spec/models/concerns/sha_attribute_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..9e37c2b20c4aa50d850728c3dd626f67a38c785c --- /dev/null +++ b/spec/models/concerns/sha_attribute_spec.rb @@ -0,0 +1,27 @@ +require 'spec_helper' + +describe ShaAttribute do + let(:model) { Class.new { include ShaAttribute } } + + before do + columns = [ + double(:column, name: 'name', type: :text), + double(:column, name: 'sha1', type: :binary) + ] + + allow(model).to receive(:columns).and_return(columns) + end + + describe '#sha_attribute' do + it 'defines a SHA attribute for a binary column' do + expect(model).to receive(:attribute) + .with(:sha1, an_instance_of(Gitlab::Database::ShaAttribute)) + + model.sha_attribute(:sha1) + end + + it 'raises ArgumentError when the column type is not :binary' do + expect { model.sha_attribute(:name) }.to raise_error(ArgumentError) + end + end +end