From 4c622b71fd284058deee483bf0009f8179b792bc Mon Sep 17 00:00:00 2001
From: Robert Speicher <rspeicher@gmail.com>
Date: Wed, 22 Feb 2017 14:25:06 -0500
Subject: [PATCH] Add Upload model and UploadChecksumWorker worker

---
 app/models/upload.rb                        |  63 ++++++++
 app/workers/upload_checksum_worker.rb       |  12 ++
 config/sidekiq_queues.yml                   |   1 +
 db/migrate/20170130221926_create_uploads.rb |  20 +++
 db/schema.rb                                |  14 ++
 spec/models/upload_spec.rb                  | 151 ++++++++++++++++++++
 spec/support/carrierwave.rb                 |   4 +-
 spec/workers/upload_checksum_worker_spec.rb |  19 +++
 8 files changed, 282 insertions(+), 2 deletions(-)
 create mode 100644 app/models/upload.rb
 create mode 100644 app/workers/upload_checksum_worker.rb
 create mode 100644 db/migrate/20170130221926_create_uploads.rb
 create mode 100644 spec/models/upload_spec.rb
 create mode 100644 spec/workers/upload_checksum_worker_spec.rb

diff --git a/app/models/upload.rb b/app/models/upload.rb
new file mode 100644
index 00000000000..13987931b05
--- /dev/null
+++ b/app/models/upload.rb
@@ -0,0 +1,63 @@
+class Upload < ActiveRecord::Base
+  # Upper limit for foreground checksum processing
+  CHECKSUM_THRESHOLD = 100.megabytes
+
+  belongs_to :model, polymorphic: true
+
+  validates :size, presence: true
+  validates :path, presence: true
+  validates :model, presence: true
+  validates :uploader, presence: true
+
+  before_save  :calculate_checksum, if:     :foreground_checksum?
+  after_commit :schedule_checksum,  unless: :foreground_checksum?
+
+  def self.remove_path(path)
+    where(path: path).destroy_all
+  end
+
+  def self.record(uploader)
+    remove_path(uploader.relative_path)
+
+    create(
+      size: uploader.file.size,
+      path: uploader.relative_path,
+      model: uploader.model,
+      uploader: uploader.class.to_s
+    )
+  end
+
+  def absolute_path
+    return path unless relative_path?
+
+    uploader_class.absolute_path(self)
+  end
+
+  def calculate_checksum
+    return unless exist?
+
+    self.checksum = Digest::SHA256.file(absolute_path).hexdigest
+  end
+
+  def exist?
+    File.exist?(absolute_path)
+  end
+
+  private
+
+  def foreground_checksum?
+    size <= CHECKSUM_THRESHOLD
+  end
+
+  def schedule_checksum
+    UploadChecksumWorker.perform_async(id)
+  end
+
+  def relative_path?
+    !path.start_with?('/')
+  end
+
+  def uploader_class
+    Object.const_get(uploader)
+  end
+end
diff --git a/app/workers/upload_checksum_worker.rb b/app/workers/upload_checksum_worker.rb
new file mode 100644
index 00000000000..78931f1258f
--- /dev/null
+++ b/app/workers/upload_checksum_worker.rb
@@ -0,0 +1,12 @@
+class UploadChecksumWorker
+  include Sidekiq::Worker
+  include DedicatedSidekiqQueue
+
+  def perform(upload_id)
+    upload = Upload.find(upload_id)
+    upload.calculate_checksum
+    upload.save!
+  rescue ActiveRecord::RecordNotFound
+    Rails.logger.error("UploadChecksumWorker: couldn't find upload #{upload_id}, skipping")
+  end
+end
diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml
index 97620cc9c7f..824f99e687e 100644
--- a/config/sidekiq_queues.yml
+++ b/config/sidekiq_queues.yml
@@ -29,6 +29,7 @@
   - [email_receiver, 2]
   - [emails_on_push, 2]
   - [mailers, 2]
+  - [upload_checksum, 1]
   - [use_key, 1]
   - [repository_fork, 1]
   - [repository_import, 1]
diff --git a/db/migrate/20170130221926_create_uploads.rb b/db/migrate/20170130221926_create_uploads.rb
new file mode 100644
index 00000000000..6f06c5dd840
--- /dev/null
+++ b/db/migrate/20170130221926_create_uploads.rb
@@ -0,0 +1,20 @@
+class CreateUploads < ActiveRecord::Migration
+  include Gitlab::Database::MigrationHelpers
+
+  DOWNTIME = false
+
+  def change
+    create_table :uploads do |t|
+      t.integer :size, limit: 8, null: false
+      t.string :path, null: false
+      t.string :checksum, limit: 64
+      t.references :model, polymorphic: true
+      t.string :uploader, null: false
+      t.datetime :created_at, null: false
+    end
+
+    add_index :uploads, :path
+    add_index :uploads, :checksum
+    add_index :uploads, [:model_id, :model_type]
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 6f7dd3e4768..9f31ee8e6c5 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -1214,6 +1214,20 @@ ActiveRecord::Schema.define(version: 20170306170512) do
   add_index "u2f_registrations", ["key_handle"], name: "index_u2f_registrations_on_key_handle", using: :btree
   add_index "u2f_registrations", ["user_id"], name: "index_u2f_registrations_on_user_id", using: :btree
 
+  create_table "uploads", force: :cascade do |t|
+    t.integer "size", limit: 8, null: false
+    t.string "path", null: false
+    t.string "checksum", limit: 64
+    t.integer "model_id"
+    t.string "model_type"
+    t.string "uploader", null: false
+    t.datetime "created_at", null: false
+  end
+
+  add_index "uploads", ["checksum"], name: "index_uploads_on_checksum", using: :btree
+  add_index "uploads", ["model_id", "model_type"], name: "index_uploads_on_model_id_and_model_type", using: :btree
+  add_index "uploads", ["path"], name: "index_uploads_on_path", using: :btree
+
   create_table "user_agent_details", force: :cascade do |t|
     t.string "user_agent", null: false
     t.string "ip_address", null: false
diff --git a/spec/models/upload_spec.rb b/spec/models/upload_spec.rb
new file mode 100644
index 00000000000..4c832c87d6a
--- /dev/null
+++ b/spec/models/upload_spec.rb
@@ -0,0 +1,151 @@
+require 'rails_helper'
+
+describe Upload, type: :model do
+  describe 'assocations' do
+    it { is_expected.to belong_to(:model) }
+  end
+
+  describe 'validations' do
+    it { is_expected.to validate_presence_of(:size) }
+    it { is_expected.to validate_presence_of(:path) }
+    it { is_expected.to validate_presence_of(:model) }
+    it { is_expected.to validate_presence_of(:uploader) }
+  end
+
+  describe 'callbacks' do
+    context 'for a file above the checksum threshold' do
+      it 'schedules checksum calculation' do
+        stub_const('UploadChecksumWorker', spy)
+
+        upload = described_class.create(
+          path: __FILE__,
+          size: described_class::CHECKSUM_THRESHOLD + 1.kilobyte,
+          model: build_stubbed(:user),
+          uploader: double('ExampleUploader')
+        )
+
+        expect(UploadChecksumWorker)
+          .to have_received(:perform_async).with(upload.id)
+      end
+    end
+
+    context 'for a file at or below the checksum threshold' do
+      it 'calculates checksum immediately before save' do
+        upload = described_class.new(
+          path: __FILE__,
+          size: described_class::CHECKSUM_THRESHOLD,
+          model: build_stubbed(:user),
+          uploader: double('ExampleUploader')
+        )
+
+        expect { upload.save }
+          .to change { upload.checksum }.from(nil)
+          .to(a_string_matching(/\A\h{64}\z/))
+      end
+    end
+  end
+
+  describe '.remove_path' do
+    it 'removes all records at the given path' do
+      described_class.create!(
+        size: File.size(__FILE__),
+        path: __FILE__,
+        model: build_stubbed(:user),
+        uploader: 'AvatarUploader'
+      )
+
+      expect { described_class.remove_path(__FILE__) }.
+        to change { described_class.count }.from(1).to(0)
+    end
+  end
+
+  describe '.record' do
+    let(:fake_uploader) do
+      double(
+        file: double(size: 12_345),
+        relative_path: 'foo/bar.jpg',
+        model: build_stubbed(:user),
+        class: 'AvatarUploader'
+      )
+    end
+
+    it 'removes existing paths before creation' do
+      expect(described_class).to receive(:remove_path)
+        .with(fake_uploader.relative_path)
+
+      described_class.record(fake_uploader)
+    end
+
+    it 'creates a new record and assigns size, path, model, and uploader' do
+      upload = described_class.record(fake_uploader)
+
+      aggregate_failures do
+        expect(upload).to be_persisted
+        expect(upload.size).to eq fake_uploader.file.size
+        expect(upload.path).to eq fake_uploader.relative_path
+        expect(upload.model_id).to eq fake_uploader.model.id
+        expect(upload.model_type).to eq fake_uploader.model.class.to_s
+        expect(upload.uploader).to eq fake_uploader.class
+      end
+    end
+  end
+
+  describe '#absolute_path' do
+    it 'returns the path directly when already absolute' do
+      path = '/path/to/namespace/project/secret/file.jpg'
+      upload = described_class.new(path: path)
+
+      expect(upload).not_to receive(:uploader_class)
+
+      expect(upload.absolute_path).to eq path
+    end
+
+    it "delegates to the uploader's absolute_path method" do
+      uploader = spy('FakeUploader')
+      upload = described_class.new(path: 'secret/file.jpg')
+      expect(upload).to receive(:uploader_class).and_return(uploader)
+
+      upload.absolute_path
+
+      expect(uploader).to have_received(:absolute_path).with(upload)
+    end
+  end
+
+  describe '#calculate_checksum' do
+    it 'calculates the SHA256 sum' do
+      upload = described_class.new(
+        path: __FILE__,
+        size: described_class::CHECKSUM_THRESHOLD - 1.megabyte
+      )
+      expected = Digest::SHA256.file(__FILE__).hexdigest
+
+      expect { upload.calculate_checksum }
+        .to change { upload.checksum }.from(nil).to(expected)
+    end
+
+    it 'returns nil for a non-existant file' do
+      upload = described_class.new(
+        path: __FILE__,
+        size: described_class::CHECKSUM_THRESHOLD - 1.megabyte
+      )
+
+      expect(upload).to receive(:exist?).and_return(false)
+
+      expect(upload.calculate_checksum).to be_nil
+    end
+  end
+
+  describe '#exist?' do
+    it 'returns true when the file exists' do
+      upload = described_class.new(path: __FILE__)
+
+      expect(upload).to exist
+    end
+
+    it 'returns false when the file does not exist' do
+      upload = described_class.new(path: "#{__FILE__}-nope")
+
+      expect(upload).not_to exist
+    end
+  end
+end
diff --git a/spec/support/carrierwave.rb b/spec/support/carrierwave.rb
index 72af2c70324..fa9a316b6a2 100644
--- a/spec/support/carrierwave.rb
+++ b/spec/support/carrierwave.rb
@@ -1,7 +1,7 @@
-CarrierWave.root = 'tmp/tests/uploads'
+CarrierWave.root = File.expand_path('tmp/tests/uploads', Rails.root)
 
 RSpec.configure do |config|
   config.after(:each) do
-    FileUtils.rm_rf('tmp/tests/uploads')
+    FileUtils.rm_rf(CarrierWave.root)
   end
 end
diff --git a/spec/workers/upload_checksum_worker_spec.rb b/spec/workers/upload_checksum_worker_spec.rb
new file mode 100644
index 00000000000..911360da66c
--- /dev/null
+++ b/spec/workers/upload_checksum_worker_spec.rb
@@ -0,0 +1,19 @@
+require 'rails_helper'
+
+describe UploadChecksumWorker do
+  describe '#perform' do
+    it 'rescues ActiveRecord::RecordNotFound' do
+      expect { described_class.new.perform(999_999) }.not_to raise_error
+    end
+
+    it 'calls calculate_checksum_without_delay and save!' do
+      upload = spy
+      expect(Upload).to receive(:find).with(999_999).and_return(upload)
+
+      described_class.new.perform(999_999)
+
+      expect(upload).to have_received(:calculate_checksum)
+      expect(upload).to have_received(:save!)
+    end
+  end
+end
-- 
GitLab