Skip to content
Snippets Groups Projects
Commit 60ca8cb8 authored by Nick Thomas's avatar Nick Thomas
Browse files

Add types and interfaces for blobs

parent ae645523
No related branches found
No related tags found
1 merge request!1Initial implementation of an elasticsearch indexer in Go
Loading
Loading
@@ -88,3 +88,11 @@ func (c *Client) Flush() error {
func (c *Client) IndexCommit(indexName string, commit interface{}) error {
return fmt.Errorf("TODO")
}
func (c *Client) IndexBlob(indexName string, blob interface{}) error {
return fmt.Errorf("TODO")
}
func (c *Client) RemoveBlob(indexName string, blob interface{}) error {
return fmt.Errorf("TODO")
}
Loading
Loading
@@ -86,7 +86,7 @@ func (r *Repo) Diff() (difftree.Changes, error) {
return difftree.DiffTree(fromTree, toTree)
}
 
type FileFunc func(file *object.File) error
type FileFunc func(file *object.File, fromCommit, toCommit *object.Commit) error
 
func (r *Repo) EachFileChange(ins, mod, del FileFunc) error {
changes, err := r.Diff()
Loading
Loading
@@ -103,13 +103,13 @@ func (r *Repo) EachFileChange(ins, mod, del FileFunc) error {
switch change.Action {
case difftree.Insert:
toF.Name = change.To.Name
err = ins(toF)
err = ins(toF, r.FromCommit, r.ToCommit)
case difftree.Modify:
toF.Name = change.To.Name
err = mod(toF)
err = mod(toF, r.FromCommit, r.ToCommit)
case difftree.Delete:
fromF.Name = change.From.Name
err = del(fromF)
err = del(fromF, r.FromCommit, r.ToCommit)
default:
err = fmt.Errorf("Unrecognised change calculating diff: %+v", change)
}
Loading
Loading
package indexer
import (
"fmt"
"io/ioutil"
"srcd.works/go-git.v4/plumbing/object"
)
type Blob struct {
OID string `json:"oid"`
RepoID string `json:"rid"`
CommitSHA string `json:"commit_sha"`
Content string `json:"content"`
Path string `json:"path"`
// Message copied from gitlab-elasticsearch-git:
//
// We're duplicating file_name parameter here because we need another
// analyzer for it.
//
//Ideally this should be done with copy_to: 'blob.file_name' option
//but it does not work in ES v2.3.*. We're doing it so to not make users
//install newest versions
//
//https://github.com/elastic/elasticsearch-mapper-attachments/issues/124
Filename string `json:"file_name"`
Language string `json:"language"`
}
func (b *Blob) ID() string {
return fmt.Sprintf("%s_%s", b.CommitSHA, b.Path)
}
func BuildBlob(file *object.File, commitSHA, repoID string) (*Blob, error) {
reader, err := file.Blob.Reader()
if err != nil {
return nil, err
}
defer reader.Close()
// FIXME(nick): This doesn't look cheap. Check the RAM & CPU pressure, esp.
// for large blobs
b, err := ioutil.ReadAll(reader)
if err != nil {
return nil, err
}
content := string(b)
return &Blob{
OID: file.Blob.Hash.String(),
RepoID: repoID,
CommitSHA: commitSHA,
Content: content,
Path: file.Name,
Filename: file.Name,
Language: DetectLanguage(b),
}, nil
}
// FIXME: implement this
func DetectLanguage(data []byte) string {
return "Text"
}
Loading
Loading
@@ -11,6 +11,9 @@ import (
 
type Submitter interface {
IndexCommit(indexName string, commit interface{}) error
IndexBlob(indexName string, blob interface{}) error
RemoveBlob(indexName string, blob interface{}) error
Flush() error
}
 
Loading
Loading
@@ -21,29 +24,32 @@ type Indexer struct {
Submitter
}
 
// FIXME: none of the indexers worry about encoding right now
func (i *Indexer) SubmitCommit(c *object.Commit) error {
commit := BuildCommit(c, i.ProjectID)
 
return i.Submitter.IndexCommit(i.IndexName, commit)
}
 
func (i *Indexer) SubmitBlob(file *object.File) error {
log.Print("Write: ", file.Name)
// TODO: touch Submitter
// Read & json-encode the file so we do the same work as the Ruby version
//reader, err := file.Blob.Reader()
//if err != nil {
// os.Exit(1)
//}
//json.NewEncoder(ioutil.Discard).Encode(&Req{&ReaderAsJSONString{reader}})
//reader.Close()
return nil
func (i *Indexer) SubmitBlob(f *object.File, _, toCommit *object.Commit) error {
log.Print("Write: ", f.Name)
// TODO(nick): Existing code doesn't index blobs > 10MiB in size
blob, err := BuildBlob(f, i.ProjectID, toCommit.Hash.String())
if err != nil {
return err
}
return i.Submitter.IndexBlob(i.IndexName, blob)
}
 
func (i *Indexer) RemoveBlob(file *object.File) error {
func (i *Indexer) RemoveBlob(file *object.File, _, toCommit *object.Commit) error {
log.Print("Delete: ", file.Name)
// TODO: touch Submitter
return nil
blob := &Blob{RepoID: i.ProjectID, CommitSHA: toCommit.Hash.String()}
return i.Submitter.RemoveBlob(i.IndexName, blob) // should be the blob id
}
 
func (i *Indexer) IndexCommits() error {
Loading
Loading
Loading
Loading
@@ -14,36 +14,37 @@ func main() {
log.Fatalf("Usage: %s <project-id> <project-path>", os.Args[0])
}
 
esClient, err := elastic.FromEnv()
if err != nil {
log.Fatalln(err)
}
projectID := os.Args[1]
projectPath := os.Args[2]
fromSHA := os.Getenv("FROM_SHA")
toSHA := os.Getenv("TO_SHA")
railsEnv := os.Getenv("RAILS_ENV")
 
indexName := "gitlab"
if railsEnv != "" {
indexName = indexName + "-" + railsEnv
}
repo, err := git.NewRepo(projectPath, fromSHA, toSHA)
if err != nil {
log.Fatalf("Failed to open %s: %s", projectPath, err)
}
 
log.Printf("Indexing from %s to %s", repo.FromHash, repo.ToHash)
log.Printf("Project ID: %s, Rails env: %s", projectID, railsEnv)
log.Printf("Project ID: %s, Rails env: %s, index: %s", projectID, railsEnv, indexName)
esClient, err := elastic.FromEnv()
if err != nil {
log.Fatalln(err)
}
 
idx := &indexer.Indexer{
IndexName: "gitlab",
IndexName: indexName,
ProjectID: projectID,
Submitter: esClient,
Repo: repo,
}
 
if railsEnv != "" {
idx.IndexName = idx.IndexName + "-" + railsEnv
}
if err := idx.Index(); err != nil {
log.Fatalln("Indexing error: ", err)
 
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment