Skip to content
Snippets Groups Projects
Unverified Commit 207b308c authored by Kamil Trzcinski's avatar Kamil Trzcinski
Browse files

Improve flag options

parent fa06ef9c
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -10,8 +10,8 @@ It uses optimised file accesses and API calls to create walk DAG.
 
### Installation
 
```
go get -u gitlab.com/gitlab-org/docker-distribution-pruner
```bash
$ go get -u gitlab.com/gitlab-org/docker-distribution-pruner
```
 
### Preface
Loading
Loading
@@ -29,13 +29,13 @@ If you run `-delete -soft-delete=false` you will remove data forever.
Dry run:
 
```bash
docker-distribution-pruner -config=/path/to/registry/configuration
$ docker-distribution-pruner -config=/path/to/registry/configuration
```
 
Reclaim disk space:
 
```bash
docker-distribution-pruner -config=/path/to/registry/configuration -delete
$ docker-distribution-pruner -config=/path/to/registry/configuration -delete
```
 
### GitLab Omnibus
Loading
Loading
@@ -43,7 +43,7 @@ docker-distribution-pruner -config=/path/to/registry/configuration -delete
Run:
 
```bash
docker-distribution-pruner -config=/var/opt/gitlab/registry/config.yml
$ docker-distribution-pruner -config=/var/opt/gitlab/registry/config.yml
```
 
### S3 effectiveness
Loading
Loading
@@ -62,7 +62,14 @@ For large repositories it allows to save hundreds of thousands requests and also
This tool can effectively run on registries that consists of million objects and terrabytes of data in reasonable time.
To ensure smooth run ensure to have at least 4GB for 5 million objects stored in registry.
 
To speed-up processing of large repositories enable parallel blobs and repository processing:
```bash
$ docker-distribution-pruner -config=/path/to/registry/configuration -parallel-repository-walk -parallel-blob-walk
```
You can also tune performance settings (less or more):
```
-jobs=100 -parallel-walk-jobs=100
```
Loading
Loading
package main
 
import (
"flag"
"fmt"
"path/filepath"
"strings"
Loading
Loading
@@ -11,8 +10,6 @@ import (
"github.com/dustin/go-humanize"
)
 
var parallelBlobWalk = flag.Bool("parallel-blob-walk", true, "Allow to use parallel blob walker")
type blobsData map[digest]*blobData
 
var blobsLock sync.Mutex
Loading
Loading
@@ -116,10 +113,10 @@ func (b blobsData) walkPath(walkPath string) error {
})
}
 
func (b blobsData) walk() error {
func (b blobsData) walk(parallel bool) error {
logrus.Infoln("Walking BLOBS...")
 
if *parallelBlobWalk {
if parallel {
listRootPath := filepath.Join("blobs", "sha256")
return parallelWalk(listRootPath, b.walkPath)
} else {
Loading
Loading
package main
 
import (
"flag"
"path/filepath"
"sync/atomic"
 
Loading
Loading
@@ -16,11 +15,6 @@ var (
deletedBlobSize int64
)
 
var (
delete = flag.Bool("delete", false, "Delete data, instead of dry run")
softDelete = flag.Bool("soft-delete", true, "When deleting, do not remove, but move to backup/ folder")
)
func deleteFile(path string, size int64) error {
logrus.Infoln("DELETE", path, size)
name := filepath.Base(path)
Loading
Loading
Loading
Loading
@@ -10,13 +10,25 @@ import (
)
 
var (
config = flag.String("config", "", "Path to registry config file")
debug = flag.Bool("debug", false, "Print debug messages")
verbose = flag.Bool("verbose", true, "Print verbose messages")
config = flag.String("config", "", "Path to registry config file")
ignoreBlobs = flag.Bool("ignore-blobs", false, "Ignore blobs processing and recycling")
jobs = flag.Int("jobs", 10, "Number of concurrent jobs to execute")
parallelWalkJobs = flag.Int("parallel-walk-jobs", 10, "Number of concurrent parallel walk jobs to execute")
ignoreBlobs = flag.Bool("ignore-blobs", false, "Ignore blobs processing and recycling")
softErrors = flag.Bool("soft-errors", false, "Print errors, but do not fail")
debug = flag.Bool("debug", false, "Print debug messages")
verbose = flag.Bool("verbose", true, "Print verbose messages")
softErrors = flag.Bool("soft-errors", false, "Print errors, but do not fail")
parallelRepositoryWalk = flag.Bool("parallel-repository-walk", false, "Allow to use parallel repository walker")
parallelBlobWalk = flag.Bool("parallel-blob-walk", false, "Allow to use parallel blob walker")
repositoryCsvOutput = flag.String("repository-csv-output", "repositories.csv", "File to which CSV will be written with all metrics")
deleteOldTagVersions = flag.Bool("delete-old-tag-versions", true, "Delete old tag versions")
delete = flag.Bool("delete", false, "Delete data, instead of dry run")
softDelete = flag.Bool("soft-delete", true, "When deleting, do not remove, but move to backup/ folder")
)
 
var (
Loading
Loading
@@ -78,7 +90,7 @@ func main() {
go func() {
defer wg.Done()
 
err = repositories.walk()
err = repositories.walk(*parallelRepositoryWalk)
if err != nil {
logErrorln(err)
}
Loading
Loading
@@ -91,7 +103,7 @@ func main() {
return
}
 
err = blobs.walk()
err = blobs.walk(*parallelBlobWalk)
if err != nil {
logErrorln(err)
}
Loading
Loading
@@ -118,7 +130,7 @@ func main() {
}
 
logrus.Infoln("Summary...")
repositories.info(blobs)
repositories.info(blobs, *repositoryCsvOutput)
blobs.info()
deletesInfo()
currentStorage.Info()
Loading
Loading
package main
 
import (
"flag"
"fmt"
"io"
"os"
Loading
Loading
@@ -11,9 +10,6 @@ import (
"github.com/Sirupsen/logrus"
)
 
var parallelRepositoryWalk = flag.Bool("parallel-repository-walk", true, "Allow to use parallel repository walker")
var repositoryCsvOutput = flag.String("repository-csv-output", "repositories.csv", "File to which CSV will be written with all metrics")
type repositoriesData map[string]*repositoryData
 
var repositoriesLock sync.Mutex
Loading
Loading
@@ -75,12 +71,12 @@ func (r repositoriesData) walkPath(walkPath string, jg *jobGroup) error {
})
}
 
func (r repositoriesData) walk() error {
func (r repositoriesData) walk(parallel bool) error {
logrus.Infoln("Walking REPOSITORIES...")
 
jg := jobsRunner.group()
 
if *parallelRepositoryWalk {
if parallel {
err := parallelWalk("repositories", func(listPath string) error {
return r.walkPath(listPath, jg)
})
Loading
Loading
@@ -131,12 +127,12 @@ func (r repositoriesData) sweep() error {
return nil
}
 
func (r repositoriesData) info(blobs blobsData) {
func (r repositoriesData) info(blobs blobsData, csvOutput string) {
var stream io.WriteCloser
 
if *repositoryCsvOutput != "" {
if csvOutput != "" {
var err error
stream, err = os.Create(*repositoryCsvOutput)
stream, err = os.Create(csvOutput)
if err == nil {
defer stream.Close()
 
Loading
Loading
Loading
Loading
@@ -22,6 +22,7 @@ type s3Storage struct {
S3 *s3.S3
apiCalls int64
expensiveApiCalls int64
freeApiCalls int64
cacheHits int64
cacheError int64
cacheMiss int64
Loading
Loading
@@ -233,7 +234,7 @@ func (f *s3Storage) Read(path string, etag string) ([]byte, error) {
}
 
func (f *s3Storage) Delete(path string) error {
atomic.AddInt64(&f.expensiveApiCalls, 1)
atomic.AddInt64(&f.freeApiCalls, 1)
_, err := f.S3.DeleteObject(&s3.DeleteObjectInput{
Bucket: aws.String(f.Bucket),
Key: aws.String(f.fullPath(path)),
Loading
Loading
@@ -255,7 +256,7 @@ func (f *s3Storage) Move(path, newPath string) error {
}
 
func (f *s3Storage) Info() {
logrus.Infoln("S3 INFO: API calls/expensive:", f.apiCalls, f.expensiveApiCalls,
logrus.Infoln("S3 INFO: API calls/expensive/free:", f.apiCalls, f.expensiveApiCalls, f.freeApiCalls,
"Cache (hit/miss/error):", f.cacheHits, f.cacheMiss, f.cacheError)
}
 
Loading
Loading
package main
 
import (
"flag"
"path/filepath"
"sync"
 
"github.com/Sirupsen/logrus"
)
 
var deleteOldTagVersions = flag.Bool("delete-old-tag-versions", true, "Delete old tag versions")
type tagData struct {
repository *repositoryData
name string
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment