diff options
-rw-r--r-- | .travis.yml | 4 | ||||
-rw-r--r-- | README.md | 99 | ||||
-rw-r--r-- | blame.go | 54 | ||||
-rw-r--r-- | clients/common.go | 2 | ||||
-rw-r--r-- | clients/common/common.go | 1 | ||||
-rw-r--r-- | clients/http/common.go | 1 | ||||
-rw-r--r-- | core/object.go | 1 | ||||
-rw-r--r-- | doc.go | 35 | ||||
-rw-r--r-- | examples/basic/main.go | 41 | ||||
-rw-r--r-- | references.go | 17 | ||||
-rw-r--r-- | references_test.go | 2 | ||||
-rw-r--r-- | repository.go | 3 |
12 files changed, 209 insertions, 51 deletions
diff --git a/.travis.yml b/.travis.yml index 9b7ee12..4c3a723 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,10 @@ go: - 1.4 - 1.5 - tip + +matrix: + allow_failures: + - go: tip install: - rm -rf $GOPATH/src/gopkg.in/src-d @@ -1 +1,98 @@ -# go-git [![GoDoc](https://godoc.org/gopkg.in/src-d/go-git.v2?status.svg)](https://godoc.org/gopkg.in/src-d/go-git.v2) [![Build Status](https://travis-ci.org/src-d/go-git.svg)](https://travis-ci.org/src-d/go-git) [![codecov.io](https://codecov.io/github/src-d/go-git/coverage.svg)](https://codecov.io/github/src-d/go-git) +# go-git [![GoDoc](https://godoc.org/gopkg.in/src-d/go-git.v2?status.svg)](https://godoc.org/gopkg.in/src-d/go-git.v2) [![Build Status](https://travis-ci.org/src-d/go-git.svg)](https://travis-ci.org/src-d/go-git) [![codecov.io](https://codecov.io/github/src-d/go-git/coverage.svg)](https://codecov.io/github/src-d/go-git) [![codebeat badge](https://codebeat.co/badges/b6cb2f73-9e54-483d-89f9-4b95a911f40c)](https://codebeat.co/projects/github-com-src-d-go-git) + +A low level and highly extensible git client library for **reading** repositories from git servers. It is written in Go from scratch, without any C dependencies. + +We have been following the open/close principle in its design to facilitate extensions. + +*go-git* does not claim to be a replacement of [git2go](https://github.com/libgit2/git2go) as its approach and functionality is quite different. + +### ok, but why? ... + +At [source{d}](http://sourced.tech) we analyze almost **all** the public open source contributions made to git repositories in the world. + +We want to extract detailed information from each GitHub repository, which requires downloading repository packfiles and analyzing them: extracting their code, authors, dates and the languages and ecosystems they use. We are also interested in knowing who contributes to what, so we can tell top contributors from the more casual ones. + +You can obtain all this information using the standard `git` command running over a local clone of a repository, but this simple solution does not scale well over millions of repositories: we want to avoid having local copies of the unpacked repositories in a regular file system; *go-git* allows us to work with an in-memory representation of repositories instead. + +### I see... but this is production ready? + +*Yes!!!*, we have been using *go-git* at [source{d}](http://sourced.tech) since August 2015 to analyze all GitHub public repositories (i.e. 16M of repositories). + +### Coming Soon + +Blame support: right now we are using a forward version of a line-tracking +algorithm and we are having some problems handling merges. The plan is to get +merges right and change to a backward line-tracking algorithm soon. + +Installation +------------ + +The recommended way to install *go-git* is: + +``` +go get -u gopkg.in/src-d/go-git.v2/... +``` + + +Examples +-------- + +Basic example: retrieving the commits for a given repository: + +```go +r, err := git.NewRepository("https://github.com/src-d/go-git", nil) +if err != nil { + panic(err) +} + +if err := r.Pull("origin", "refs/heads/master"); err != nil { + panic(err) +} + +iter := r.Commits() +defer iter.Close() + +for { + commit, err := iter.Next() + if err != nil { + if err == io.EOF { + break + } + + panic(err) + } + + fmt.Println(commit) +} +``` + +Outputs: +``` +commit 2275fa7d0c75d20103f90b0e1616937d5a9fc5e6 +Author: Máximo Cuadros <mcuadros@gmail.com> +Date: 2015-10-23 00:44:33 +0200 +0200 + +commit 35b585759cbf29f8ec428ef89da20705d59f99ec +Author: Carlos Cobo <toqueteos@gmail.com> +Date: 2015-05-20 15:21:37 +0200 +0200 + +commit 7e3259c191a9de23d88b6077dcb1cd427e925432 +Author: Alberto Cortés <alberto@sourced.tech> +Date: 2016-01-21 03:29:57 +0100 +0100 + +commit 24b8ae50db91f3909b11304014564bffc6fdee79 +Author: Alberto Cortés <alberto@sourced.tech> +Date: 2015-12-11 17:57:10 +0100 +0100 +... +``` + +Acknowledgements +---------------- + +The earlier versions of the [packfile reader](https://godoc.org/gopkg.in/src-d/go-git.v2/formats/packfile) are based on [git-chain](https://github.com/gitchain/gitchain/blob/master/git/pack.go), project done by [@yrashk](https://github.com/yrashk) + + +License +------- + +MIT, see [LICENSE](LICENSE) @@ -1,11 +1,3 @@ -// Package blame contains blaming functionality for files in the repo. -// -// Blaming a file is finding what commit was the last to modify each of -// the lines in the file, therefore the output of a blaming operation is -// usualy a slice of commits, one commit per line in the file. -// -// This package also provides a pretty print function to output the -// results of a blame in a similar format to the git-blame command. package git import ( @@ -35,41 +27,41 @@ type Blame struct { // Blaming a file is a two step process: // // 1. Create a linear history of the commits affecting a file. We use -// revlist.New for that. +// revlist.New for that. // // 2. Then build a graph with a node for every line in every file in -// the history of the file. +// the history of the file. // -// Each node (line) holds the commit where it was introduced or -// last modified. To achieve that we use the FORWARD algorithm -// described in Zimmermann, et al. "Mining Version Archives for -// Co-changed Lines", in proceedings of the Mining Software -// Repositories workshop, Shanghai, May 22-23, 2006. +// Each node (line) holds the commit where it was introduced or +// last modified. To achieve that we use the FORWARD algorithm +// described in Zimmermann, et al. "Mining Version Archives for +// Co-changed Lines", in proceedings of the Mining Software +// Repositories workshop, Shanghai, May 22-23, 2006. // -// Each node is asigned a commit: Start by the nodes in the first -// commit. Assign that commit as the creator of all its lines. +// Each node is asigned a commit: Start by the nodes in the first +// commit. Assign that commit as the creator of all its lines. // -// Then jump to the nodes in the next commit, and calculate the diff -// between the two files. Newly created lines get -// assigned the new commit as its origin. Modified lines also get -// this new commit. Untouched lines retain the old commit. +// Then jump to the nodes in the next commit, and calculate the diff +// between the two files. Newly created lines get +// assigned the new commit as its origin. Modified lines also get +// this new commit. Untouched lines retain the old commit. // -// All this work is done in the assignOrigin function which holds all -// the internal relevant data in a "blame" struct, that is not -// exported. +// All this work is done in the assignOrigin function which holds all +// the internal relevant data in a "blame" struct, that is not +// exported. // -// TODO: ways to improve the efficiency of this function: +// TODO: ways to improve the efficiency of this function: // -// 1. Improve revlist +// 1. Improve revlist // -// 2. Improve how to traverse the history (example a backward -// traversal will be much more efficient) +// 2. Improve how to traverse the history (example a backward +// traversal will be much more efficient) // -// TODO: ways to improve the function in general: +// TODO: ways to improve the function in general: // -// 1. Add memoization between revlist and assign. +// 1. Add memoization between revlist and assign. // -// 2. It is using much more memory than needed, see the TODOs below. +// 2. It is using much more memory than needed, see the TODOs below. func (c *Commit) Blame(path string) (*Blame, error) { b := new(blame) b.fRev = c diff --git a/clients/common.go b/clients/common.go index 039404d..f7868cf 100644 --- a/clients/common.go +++ b/clients/common.go @@ -47,7 +47,7 @@ func InstallProtocol(scheme string, service common.GitUploadPackService) { KnownProtocols[scheme] = service } -// NewGitUploadPackService returns the appropiate upload pack service +// NewGitUploadPackService returns the appropriate upload pack service // among of the set of known protocols: HTTP, SSH. See `InstallProtocol` // to add or modify protocols. func NewGitUploadPackService(repoURL string) (common.GitUploadPackService, error) { diff --git a/clients/common/common.go b/clients/common/common.go index a6f6166..ec04dda 100644 --- a/clients/common/common.go +++ b/clients/common/common.go @@ -1,3 +1,4 @@ +// Package common contains utils used by the clients package common import ( diff --git a/clients/http/common.go b/clients/http/common.go index f0f2e6b..aa6e7a1 100644 --- a/clients/http/common.go +++ b/clients/http/common.go @@ -1,3 +1,4 @@ +// Package http implements a HTTP client for go-git. package http import ( diff --git a/core/object.go b/core/object.go index 2f3b262..857c6df 100644 --- a/core/object.go +++ b/core/object.go @@ -1,3 +1,4 @@ +// Package core implement the core interfaces and structs used by go-git package core import ( @@ -0,0 +1,35 @@ +// Package git is a low level and highly extensible git client library for +// reading repositories from git servers. It is written in Go from scratch, +// without any C dependencies. +// +// We have been following the open/close principle in its design to facilitate +// extensions. +// +// Small example extracting the commits from a repository: +// func ExampleBasic_printCommits() { +// r, err := git.NewRepository("https://github.com/src-d/go-git", nil) +// if err != nil { +// panic(err) +// } +// +// if err := r.Pull("origin", "refs/heads/master"); err != nil { +// panic(err) +// } +// +// iter := r.Commits() +// defer iter.Close() +// +// for { +// commit, err := iter.Next() +// if err != nil { +// if err == io.EOF { +// break +// } +// +// panic(err) +// } +// +// fmt.Println(commit) +// } +// } +package git diff --git a/examples/basic/main.go b/examples/basic/main.go new file mode 100644 index 0000000..c922d35 --- /dev/null +++ b/examples/basic/main.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + "io" + "os" + + "gopkg.in/src-d/go-git.v2" +) + +func main() { + fmt.Printf("Retrieving %q ...\n", os.Args[2]) + r, err := git.NewRepository(os.Args[2], nil) + if err != nil { + panic(err) + } + + if err := r.Pull("origin", "refs/heads/master"); err != nil { + panic(err) + } + + dumpCommits(r) +} + +func dumpCommits(r *git.Repository) { + iter := r.Commits() + defer iter.Close() + + for { + commit, err := iter.Next() + if err != nil { + if err == io.EOF { + break + } + + panic(err) + } + + fmt.Println(commit) + } +} diff --git a/references.go b/references.go index 0c57df9..c69917d 100644 --- a/references.go +++ b/references.go @@ -1,20 +1,3 @@ -// Package revlist allows to create the revision history of a file, this -// is, the list of commits in the past that affect the file. -// -// The general idea is to traverse the git commit graph backward, -// flattening the graph into a linear history, and skipping commits that -// are irrelevant for the particular file. -// -// There is no single answer for this operation. The git command -// "git-revlist" returns different histories depending on its arguments -// and some internal heuristics. -// -// The current implementation tries to get something similar to what you -// whould get using git-revlist. See the failing tests for some -// insight about how the current implementation and git-revlist differs. -// -// Another way to get the revision history for a file is: -// git log --follow -p -- file package git import ( diff --git a/references_test.go b/references_test.go index ddc1e05..4fa4f76 100644 --- a/references_test.go +++ b/references_test.go @@ -358,7 +358,7 @@ func compareSideBySide(a []string, b []*Commit) string { var cherryPicks = [...][]string{ // repo, path, commit a, commit b - []string{"https://github.com/jamesob/desk.git", "desk", "094d0e7d5d69141c98a606910ba64786c5565da0", "3f34438d54f4a1ca86db8c0f03ed8eb38f20e22c"}, + {"https://github.com/jamesob/desk.git", "desk", "094d0e7d5d69141c98a606910ba64786c5565da0", "3f34438d54f4a1ca86db8c0f03ed8eb38f20e22c"}, } // should detect cherry picks diff --git a/repository.go b/repository.go index b8920c0..2532c8d 100644 --- a/repository.go +++ b/repository.go @@ -53,6 +53,9 @@ func NewPlainRepository() *Repository { } } +// Pull connect and fetch the given branch from the given remote, the branch +// should be provided with the full path not only the abbreviation, eg.: +// "refs/heads/master" func (r *Repository) Pull(remoteName, branch string) (err error) { remote, ok := r.Remotes[remoteName] if !ok { |