Commit 7bc80026 authored by Marius Bobin's avatar Marius Bobin Committed by Kamil Trzciński
Browse files

Create CI cache keys based on commit ids

Allow sharing CI files cache across branches to speed
up pipeline execution time for many users.

Adds `key:files: []` to the CI config file.
Works by selecting the latest commit that changed any
of given files and uses it as the key.
parent 73a0886c
---
title: Build CI cache key from commit SHAs that changed given files
merge_request: 19392
author:
type: added
......@@ -1535,6 +1535,50 @@ cache:
- binaries/
```
 
##### `cache:key:files`
> [Introduced](https://gitlab.com/gitlab-org/gitlab/issues/18986) in GitLab v12.5.
If `cache:key:files` is added, the cache `key` will use the SHA of the most recent commit
that changed either of the given files. If neither file was changed in any commits, the key will be `default`.
A maximum of two files are allowed.
```yaml
cache:
key:
files:
- Gemfile.lock
- package.json
paths:
- vendor/ruby
- node_modules
```
##### `cache:key:prefix`
> [Introduced](https://gitlab.com/gitlab-org/gitlab/issues/18986) in GitLab v12.5.
The `prefix` parameter adds extra functionality to `key:files` by allowing the key to
be composed of the given `prefix` combined with the SHA of the most recent commit
that changed either of the files. For example, adding a `prefix` of `rspec`, will
cause keys to look like: `rspec-feef9576d21ee9b6a32e30c5c79d0a0ceb68d1e5`. If neither
file was changed in any commits, the prefix is added to `default`, so the key in the
example would be `rspec-default`.
`prefix` follows the same restrictions as `key`, so it can use any of the
[predefined variables](../variables/README.md). Similarly, the `/` character or the
equivalent URI-encoded `%2F`, or a value made only of `.` or `%2E`, is not allowed.
```yaml
cache:
key:
files:
- Gemfile.lock
prefix: ${CI_JOB_NAME}
paths:
- vendor/ruby
```
#### `cache:untracked`
 
Set `untracked: true` to cache all files that are untracked in your Git
......
# frozen_string_literal: true
module Gitlab
module Ci
class Config
module Entry
##
# Entry that represents an array of file paths.
#
class Files < ::Gitlab::Config::Entry::Node
include ::Gitlab::Config::Entry::Validatable
validations do
validates :config, array_of_strings: true
validates :config, length: {
minimum: 1,
maximum: 2,
too_short: 'requires at least %{count} item',
too_long: 'has too many items (maximum is %{count})'
}
end
end
end
end
end
end
......@@ -7,11 +7,48 @@ module Gitlab
##
# Entry that represents a key.
#
class Key < ::Gitlab::Config::Entry::Node
include ::Gitlab::Config::Entry::Validatable
class Key < ::Gitlab::Config::Entry::Simplifiable
strategy :SimpleKey, if: -> (config) { config.is_a?(String) || config.is_a?(Symbol) }
strategy :ComplexKey, if: -> (config) { config.is_a?(Hash) }
 
validations do
validates :config, key: true
class SimpleKey < ::Gitlab::Config::Entry::Node
include ::Gitlab::Config::Entry::Validatable
validations do
validates :config, key: true
end
def self.default
'default'
end
def value
super.to_s
end
end
class ComplexKey < ::Gitlab::Config::Entry::Node
include ::Gitlab::Config::Entry::Attributable
include ::Gitlab::Config::Entry::Configurable
ALLOWED_KEYS = %i[files prefix].freeze
REQUIRED_KEYS = %i[files].freeze
validations do
validates :config, allowed_keys: ALLOWED_KEYS
validates :config, required_keys: REQUIRED_KEYS
end
entry :files, Entry::Files,
description: 'Files that should be used to build the key'
entry :prefix, Entry::Prefix,
description: 'Prefix that is added to the final cache key'
end
class UnknownStrategy < ::Gitlab::Config::Entry::Node
def errors
["#{location} should be a hash, a string or a symbol"]
end
end
 
def self.default
......
# frozen_string_literal: true
module Gitlab
module Ci
class Config
module Entry
##
# Entry that represents a key prefix.
#
class Prefix < ::Gitlab::Config::Entry::Node
include ::Gitlab::Config::Entry::Validatable
validations do
validates :config, key: true
end
end
end
end
end
end
......@@ -29,6 +29,8 @@ module Gitlab
.fabricate(attributes.delete(:except))
@rules = Gitlab::Ci::Build::Rules
.new(attributes.delete(:rules))
@cache = Seed::Build::Cache
.new(pipeline, attributes.delete(:cache))
end
 
def name
......@@ -59,6 +61,7 @@ module Gitlab
@seed_attributes
.deep_merge(pipeline_attributes)
.deep_merge(rules_attributes)
.deep_merge(cache_attributes)
end
 
def bridge?
......@@ -150,6 +153,12 @@ module Gitlab
@using_rules ? @rules.evaluate(@pipeline, self).build_attributes : {}
end
end
def cache_attributes
strong_memoize(:cache_attributes) do
@cache.build_attributes
end
end
end
end
end
......
# frozen_string_literal: true
module Gitlab
module Ci
module Pipeline
module Seed
class Build
class Cache
def initialize(pipeline, cache)
@pipeline = pipeline
local_cache = cache.to_h.deep_dup
@key = local_cache.delete(:key)
@paths = local_cache.delete(:paths)
@policy = local_cache.delete(:policy)
@untracked = local_cache.delete(:untracked)
raise ArgumentError, "unknown cache keys: #{local_cache.keys}" if local_cache.any?
end
def build_attributes
{
options: {
cache: {
key: key_string,
paths: @paths,
policy: @policy,
untracked: @untracked
}.compact.presence
}.compact
}
end
private
def key_string
key_from_string || key_from_files
end
def key_from_string
@key.to_s if @key.is_a?(String) || @key.is_a?(Symbol)
end
def key_from_files
return unless @key.is_a?(Hash)
[@key[:prefix], files_digest].select(&:present?).join('-')
end
def files_digest
hash_of_the_latest_changes || 'default'
end
def hash_of_the_latest_changes
return unless Feature.enabled?(:ci_file_based_cache, @pipeline.project, default_enabled: true)
ids = files.map { |path| last_commit_id_for_path(path) }
ids = ids.compact.sort.uniq
Digest::SHA1.hexdigest(ids.join('-')) if ids.any?
end
def files
@key[:files]
.to_a
.select(&:present?)
.uniq
end
def last_commit_id_for_path(path)
@pipeline.project.repository.last_commit_id_for_path(@pipeline.sha, path)
end
end
end
end
end
end
end
......@@ -43,11 +43,11 @@ module Gitlab
needs_attributes: job.dig(:needs, :job),
interruptible: job[:interruptible],
rules: job[:rules],
cache: job[:cache],
options: {
image: job[:image],
services: job[:services],
artifacts: job[:artifacts],
cache: job[:cache],
dependencies: job[:dependencies],
job_timeout: job[:timeout],
before_script: job[:before_script],
......
......@@ -12,22 +12,53 @@ describe Gitlab::Ci::Config::Entry::Cache do
 
context 'when entry config value is correct' do
let(:policy) { nil }
let(:key) { 'some key' }
 
let(:config) do
{ key: 'some key',
{ key: key,
untracked: true,
paths: ['some/path/'],
policy: policy }
end
 
describe '#value' do
it 'returns hash value' do
expect(entry.value).to eq(key: 'some key', untracked: true, paths: ['some/path/'], policy: 'pull-push')
shared_examples 'hash key value' do
it 'returns hash value' do
expect(entry.value).to eq(key: key, untracked: true, paths: ['some/path/'], policy: 'pull-push')
end
end
it_behaves_like 'hash key value'
context 'with files' do
let(:key) { { files: ['a-file', 'other-file'] } }
it_behaves_like 'hash key value'
end
context 'with files and prefix' do
let(:key) { { files: ['a-file', 'other-file'], prefix: 'prefix-value' } }
it_behaves_like 'hash key value'
end
context 'with prefix' do
let(:key) { { prefix: 'prefix-value' } }
it 'key is nil' do
expect(entry.value).to match(a_hash_including(key: nil))
end
end
end
 
describe '#valid?' do
it { is_expected.to be_valid }
context 'with files' do
let(:key) { { files: ['a-file', 'other-file'] } }
it { is_expected.to be_valid }
end
end
 
context 'policy is pull-push' do
......@@ -87,10 +118,44 @@ describe Gitlab::Ci::Config::Entry::Cache do
end
 
context 'when descendants are invalid' do
let(:config) { { key: 1 } }
context 'with invalid keys' do
let(:config) { { key: 1 } }
 
it 'reports error with descendants' do
is_expected.to include 'key config should be a string or symbol'
it 'reports error with descendants' do
is_expected.to include 'key should be a hash, a string or a symbol'
end
end
context 'with empty key' do
let(:config) { { key: {} } }
it 'reports error with descendants' do
is_expected.to include 'key config missing required keys: files'
end
end
context 'with invalid files' do
let(:config) { { key: { files: 'a-file' } } }
it 'reports error with descendants' do
is_expected.to include 'key:files config should be an array of strings'
end
end
context 'with prefix without files' do
let(:config) { { key: { prefix: 'a-prefix' } } }
it 'reports error with descendants' do
is_expected.to include 'key config missing required keys: files'
end
end
context 'when there is an unknown key present' do
let(:config) { { key: { unknown: 'a-file' } } }
it 'reports error with descendants' do
is_expected.to include 'key config contains unknown keys: unknown'
end
end
end
 
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Ci::Config::Entry::Files do
let(:entry) { described_class.new(config) }
describe 'validations' do
context 'when entry config value is valid' do
let(:config) { ['some/file', 'some/path/'] }
describe '#value' do
it 'returns key value' do
expect(entry.value).to eq config
end
end
describe '#valid?' do
it 'is valid' do
expect(entry).to be_valid
end
end
end
describe '#errors' do
context 'when entry value is not an array' do
let(:config) { 'string' }
it 'saves errors' do
expect(entry.errors)
.to include 'files config should be an array of strings'
end
end
context 'when entry value is not an array of strings' do
let(:config) { [1] }
it 'saves errors' do
expect(entry.errors)
.to include 'files config should be an array of strings'
end
end
context 'when entry value contains more than two values' do
let(:config) { %w[file1 file2 file3] }
it 'saves errors' do
expect(entry.errors)
.to include 'files config has too many items (maximum is 2)'
end
end
end
end
end
......@@ -6,38 +6,38 @@ describe Gitlab::Ci::Config::Entry::Key do
let(:entry) { described_class.new(config) }
 
describe 'validations' do
shared_examples 'key with slash' do
it 'is invalid' do
expect(entry).not_to be_valid
end
it_behaves_like 'key entry validations', 'simple key'
 
it 'reports errors with config value' do
expect(entry.errors).to include 'key config cannot contain the "/" character'
end
end
context 'when entry config value is correct' do
context 'when key is a hash' do
let(:config) { { files: ['test'], prefix: 'something' } }
 
shared_examples 'key with only dots' do
it 'is invalid' do
expect(entry).not_to be_valid
end
describe '#value' do
it 'returns key value' do
expect(entry.value).to match(config)
end
end
 
it 'reports errors with config value' do
expect(entry.errors).to include 'key config cannot be "." or ".."'
describe '#valid?' do
it 'is valid' do
expect(entry).to be_valid
end
end
end
end
 
context 'when entry config value is correct' do
let(:config) { 'test' }
context 'when key is a symbol' do
let(:config) { :key }
 
describe '#value' do
it 'returns key value' do
expect(entry.value).to eq 'test'
describe '#value' do
it 'returns key value' do
expect(entry.value).to eq(config.to_s)
end
end
end
 
describe '#valid?' do
it 'is valid' do
expect(entry).to be_valid
describe '#valid?' do
it 'is valid' do
expect(entry).to be_valid
end
end
end
end
......@@ -47,53 +47,11 @@ describe Gitlab::Ci::Config::Entry::Key do
 
describe '#errors' do
it 'saves errors' do
expect(entry.errors)
.to include 'key config should be a string or symbol'
expect(entry.errors.first)
.to match /should be a hash, a string or a symbol/
end
end
end
context 'when entry value contains slash' do
let(:config) { 'key/with/some/slashes' }
it_behaves_like 'key with slash'
end
context 'when entry value contains URI encoded slash (%2F)' do
let(:config) { 'key%2Fwith%2Fsome%2Fslashes' }
it_behaves_like 'key with slash'
end
context 'when entry value is a dot' do
let(:config) { '.' }
it_behaves_like 'key with only dots'
end
context 'when entry value is two dots' do
let(:config) { '..' }
it_behaves_like 'key with only dots'
end
context 'when entry value is a URI encoded dot (%2E)' do
let(:config) { '%2e' }
it_behaves_like 'key with only dots'
end
context 'when entry value is two URI encoded dots (%2E)' do
let(:config) { '%2E%2e' }
it_behaves_like 'key with only dots'
end
context 'when entry value is one dot and one URI encoded dot' do
let(:config) { '.%2e' }
it_behaves_like 'key with only dots'
end
end
 
describe '.default' do
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Ci::Config::Entry::Prefix do
let(:entry) { described_class.new(config) }
describe 'validations' do
it_behaves_like 'key entry validations', :prefix
context 'when entry value is not correct' do
let(:config) { ['incorrect'] }
describe '#errors' do
it 'saves errors' do
expect(entry.errors)
.to include 'prefix config should be a string or symbol'
end
end
end
end
describe '.default' do
it 'returns default key' do
expect(described_class.default).to be_nil
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Ci::Pipeline::Seed::Build::Cache do
let_it_be(:project) { create(:project, :repository) }
let_it_be(:head_sha) { project.repository.head_commit.id }
let_it_be(:pipeline) { create(:ci_pipeline, project: project, sha: head_sha) }
let(:processor) { described_class.new(pipeline, config) }
describe '#build_attributes' do
subject { processor.build_attributes }
context 'with cache:key' do
let(:config) do
{
key: 'a-key',
paths: ['vendor/ruby']
}