aboutsummaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorRoman Bataev <roman@bataev.me>2023-08-06 16:40:23 -0400
committerRoman Bataev <roman@bataev.me>2023-08-21 22:30:00 -0400
commitc5b2e50b8907555538f52356fc04b2a5820b5c6f (patch)
tree41e9e9b9a0c231f2c86819804b0b6d9bc16823fb /utils
parentcd6170c6f808453f58dcfd15c6c59345e3df402b (diff)
downloadgo-git-c5b2e50b8907555538f52356fc04b2a5820b5c6f.tar.gz
utils: filesystem, Calculate filesystem node's hash lazily.
The main motivation behind this change is to speed up status operation. Currently it's very slow, especially for repositories with lots of ignored files (e.g. node.js repository with node_modules directory). One of the reasons for this slowness is the fact that traversing filesystem involves calculating file hashes for all files, even if those hashes are not needed in the end because the files are in gitignore. On my machine, for a random repository with sizable (ignored) node_modules directory this changes bring the execution time for Worktree.Status from ~3.5s to ~1.4s. This is still very slow, but a significant improvement. A better fix (instead of or in addition to this one) would be to avoid traversing ignored files in the first place. However, such change seem to be more intrusive and will require much deeper understanding of the codebase.
Diffstat (limited to 'utils')
-rw-r--r--utils/merkletrie/filesystem/node.go76
1 files changed, 37 insertions, 39 deletions
diff --git a/utils/merkletrie/filesystem/node.go b/utils/merkletrie/filesystem/node.go
index ad169ff..f9a54d7 100644
--- a/utils/merkletrie/filesystem/node.go
+++ b/utils/merkletrie/filesystem/node.go
@@ -29,6 +29,8 @@ type node struct {
hash []byte
children []noder.Noder
isDir bool
+ mode os.FileMode
+ size int64
}
// NewRootNode returns the root node based on a given billy.Filesystem.
@@ -50,6 +52,9 @@ func NewRootNode(
//
// The hash of a directory is always a 24-bytes slice of zero values
func (n *node) Hash() []byte {
+ if n.hash == nil {
+ n.calculateHash()
+ }
return n.hash
}
@@ -117,81 +122,74 @@ func (n *node) calculateChildren() error {
func (n *node) newChildNode(file os.FileInfo) (*node, error) {
path := path.Join(n.path, file.Name())
- hash, err := n.calculateHash(path, file)
- if err != nil {
- return nil, err
- }
-
node := &node{
fs: n.fs,
submodules: n.submodules,
path: path,
- hash: hash,
isDir: file.IsDir(),
+ size: file.Size(),
+ mode: file.Mode(),
}
- if hash, isSubmodule := n.submodules[path]; isSubmodule {
- node.hash = append(hash[:], filemode.Submodule.Bytes()...)
+ if _, isSubmodule := n.submodules[path]; isSubmodule {
node.isDir = false
}
return node, nil
}
-func (n *node) calculateHash(path string, file os.FileInfo) ([]byte, error) {
- if file.IsDir() {
- return make([]byte, 24), nil
- }
-
- var hash plumbing.Hash
- var err error
- if file.Mode()&os.ModeSymlink != 0 {
- hash, err = n.doCalculateHashForSymlink(path, file)
- } else {
- hash, err = n.doCalculateHashForRegular(path, file)
+func (n *node) calculateHash() {
+ if n.isDir {
+ n.hash = make([]byte, 24)
+ return
}
-
+ mode, err := filemode.NewFromOSFileMode(n.mode)
if err != nil {
- return nil, err
+ n.hash = plumbing.ZeroHash[:]
+ return
}
-
- mode, err := filemode.NewFromOSFileMode(file.Mode())
- if err != nil {
- return nil, err
+ if submoduleHash, isSubmodule := n.submodules[n.path]; isSubmodule {
+ n.hash = append(submoduleHash[:], filemode.Submodule.Bytes()...)
+ return
}
-
- return append(hash[:], mode.Bytes()...), nil
+ var hash plumbing.Hash
+ if n.mode&os.ModeSymlink != 0 {
+ hash = n.doCalculateHashForSymlink()
+ } else {
+ hash = n.doCalculateHashForRegular()
+ }
+ n.hash = append(hash[:], mode.Bytes()...)
}
-func (n *node) doCalculateHashForRegular(path string, file os.FileInfo) (plumbing.Hash, error) {
- f, err := n.fs.Open(path)
+func (n *node) doCalculateHashForRegular() plumbing.Hash {
+ f, err := n.fs.Open(n.path)
if err != nil {
- return plumbing.ZeroHash, err
+ return plumbing.ZeroHash
}
defer f.Close()
- h := plumbing.NewHasher(plumbing.BlobObject, file.Size())
+ h := plumbing.NewHasher(plumbing.BlobObject, n.size)
if _, err := io.Copy(h, f); err != nil {
- return plumbing.ZeroHash, err
+ return plumbing.ZeroHash
}
- return h.Sum(), nil
+ return h.Sum()
}
-func (n *node) doCalculateHashForSymlink(path string, file os.FileInfo) (plumbing.Hash, error) {
- target, err := n.fs.Readlink(path)
+func (n *node) doCalculateHashForSymlink() plumbing.Hash {
+ target, err := n.fs.Readlink(n.path)
if err != nil {
- return plumbing.ZeroHash, err
+ return plumbing.ZeroHash
}
- h := plumbing.NewHasher(plumbing.BlobObject, file.Size())
+ h := plumbing.NewHasher(plumbing.BlobObject, n.size)
if _, err := h.Write([]byte(target)); err != nil {
- return plumbing.ZeroHash, err
+ return plumbing.ZeroHash
}
- return h.Sum(), nil
+ return h.Sum()
}
func (n *node) String() string {