Skip to content
Snippets Groups Projects
Commit 3a2127a4 authored by Nick Thomas's avatar Nick Thomas
Browse files

Skip too-large blobs

parent 2f35c7f8
No related branches found
No related tags found
1 merge request!1Initial implementation of an elasticsearch indexer in Go
Loading
Loading
@@ -13,12 +13,15 @@ import (
)
 
var (
timeoutError = fmt.Errorf("Timeout")
)
const (
// TODO: make this configurable / detectable.
// Limiting to 10MiB lets us work on small AWS clusters, but unnecessarily
// increases round trips in larger or non-AWS clusters
MaxBulkSize = 10 * 1024 * 1024
BulkWorkers = 2
timeoutError = fmt.Errorf("Timeout")
)
 
type Client struct {
Loading
Loading
@@ -101,6 +104,11 @@ func (c *Client) ParentID() string {
return c.ProjectID
}
 
// FIXME(nick): this should reserve some space for encoding
func (c *Client) SubmissionLimit() int64 {
return MaxBulkSize
}
func (c *Client) Flush() error {
return c.bulk.Flush()
}
Loading
Loading
Loading
Loading
@@ -10,6 +10,7 @@ import (
 
type Submitter interface {
ParentID() string
SubmissionLimit() int64
 
Index(id string, thing interface{}) error
Remove(id string) error
Loading
Loading
@@ -30,9 +31,13 @@ func (i *Indexer) SubmitCommit(c *object.Commit) error {
}
 
func (i *Indexer) SubmitBlob(f *object.File, _, toCommit *object.Commit) error {
// TODO(nick): Existing code doesn't index blobs > 10MiB in size
// FIXME(nick): Not sure commitSHA is right, or how it works at all
 
if f.Blob.Size > i.Submitter.SubmissionLimit() {
log.Printf("Skipping %s: too large: %dMiB", f.Name, f.Blob.Size/(1024*1024))
return nil
}
blob, err := BuildBlob(f, toCommit.Hash.String(), i.Submitter.ParentID())
if err != nil {
return err
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment