diff options
author | Roman Bataev <roman@bataev.me> | 2023-08-06 16:40:23 -0400 |
---|---|---|
committer | Roman Bataev <roman@bataev.me> | 2023-08-21 22:30:00 -0400 |
commit | c5b2e50b8907555538f52356fc04b2a5820b5c6f (patch) | |
tree | 41e9e9b9a0c231f2c86819804b0b6d9bc16823fb /utils/merkletrie | |
parent | cd6170c6f808453f58dcfd15c6c59345e3df402b (diff) | |
download | go-git-c5b2e50b8907555538f52356fc04b2a5820b5c6f.tar.gz |
utils: filesystem, Calculate filesystem node's hash lazily.
The main motivation behind this change is to speed up status operation.
Currently it's very slow, especially for repositories with lots of ignored
files (e.g. node.js repository with node_modules directory).
One of the reasons for this slowness is the fact that traversing filesystem
involves calculating file hashes for all files, even if those hashes are
not needed in the end because the files are in gitignore.
On my machine, for a random repository with sizable (ignored) node_modules
directory this changes bring the execution time for Worktree.Status
from ~3.5s to ~1.4s. This is still very slow, but a significant improvement.
A better fix (instead of or in addition to this one) would be to avoid traversing
ignored files in the first place. However, such change seem to be more
intrusive and will require much deeper understanding of the codebase.
Diffstat (limited to 'utils/merkletrie')
-rw-r--r-- | utils/merkletrie/filesystem/node.go | 76 |
1 files changed, 37 insertions, 39 deletions
diff --git a/utils/merkletrie/filesystem/node.go b/utils/merkletrie/filesystem/node.go index ad169ff..f9a54d7 100644 --- a/utils/merkletrie/filesystem/node.go +++ b/utils/merkletrie/filesystem/node.go @@ -29,6 +29,8 @@ type node struct { hash []byte children []noder.Noder isDir bool + mode os.FileMode + size int64 } // NewRootNode returns the root node based on a given billy.Filesystem. @@ -50,6 +52,9 @@ func NewRootNode( // // The hash of a directory is always a 24-bytes slice of zero values func (n *node) Hash() []byte { + if n.hash == nil { + n.calculateHash() + } return n.hash } @@ -117,81 +122,74 @@ func (n *node) calculateChildren() error { func (n *node) newChildNode(file os.FileInfo) (*node, error) { path := path.Join(n.path, file.Name()) - hash, err := n.calculateHash(path, file) - if err != nil { - return nil, err - } - node := &node{ fs: n.fs, submodules: n.submodules, path: path, - hash: hash, isDir: file.IsDir(), + size: file.Size(), + mode: file.Mode(), } - if hash, isSubmodule := n.submodules[path]; isSubmodule { - node.hash = append(hash[:], filemode.Submodule.Bytes()...) + if _, isSubmodule := n.submodules[path]; isSubmodule { node.isDir = false } return node, nil } -func (n *node) calculateHash(path string, file os.FileInfo) ([]byte, error) { - if file.IsDir() { - return make([]byte, 24), nil - } - - var hash plumbing.Hash - var err error - if file.Mode()&os.ModeSymlink != 0 { - hash, err = n.doCalculateHashForSymlink(path, file) - } else { - hash, err = n.doCalculateHashForRegular(path, file) +func (n *node) calculateHash() { + if n.isDir { + n.hash = make([]byte, 24) + return } - + mode, err := filemode.NewFromOSFileMode(n.mode) if err != nil { - return nil, err + n.hash = plumbing.ZeroHash[:] + return } - - mode, err := filemode.NewFromOSFileMode(file.Mode()) - if err != nil { - return nil, err + if submoduleHash, isSubmodule := n.submodules[n.path]; isSubmodule { + n.hash = append(submoduleHash[:], filemode.Submodule.Bytes()...) + return } - - return append(hash[:], mode.Bytes()...), nil + var hash plumbing.Hash + if n.mode&os.ModeSymlink != 0 { + hash = n.doCalculateHashForSymlink() + } else { + hash = n.doCalculateHashForRegular() + } + n.hash = append(hash[:], mode.Bytes()...) } -func (n *node) doCalculateHashForRegular(path string, file os.FileInfo) (plumbing.Hash, error) { - f, err := n.fs.Open(path) +func (n *node) doCalculateHashForRegular() plumbing.Hash { + f, err := n.fs.Open(n.path) if err != nil { - return plumbing.ZeroHash, err + return plumbing.ZeroHash } defer f.Close() - h := plumbing.NewHasher(plumbing.BlobObject, file.Size()) + h := plumbing.NewHasher(plumbing.BlobObject, n.size) if _, err := io.Copy(h, f); err != nil { - return plumbing.ZeroHash, err + return plumbing.ZeroHash } - return h.Sum(), nil + return h.Sum() } -func (n *node) doCalculateHashForSymlink(path string, file os.FileInfo) (plumbing.Hash, error) { - target, err := n.fs.Readlink(path) +func (n *node) doCalculateHashForSymlink() plumbing.Hash { + target, err := n.fs.Readlink(n.path) if err != nil { - return plumbing.ZeroHash, err + return plumbing.ZeroHash } - h := plumbing.NewHasher(plumbing.BlobObject, file.Size()) + h := plumbing.NewHasher(plumbing.BlobObject, n.size) if _, err := h.Write([]byte(target)); err != nil { - return plumbing.ZeroHash, err + return plumbing.ZeroHash } - return h.Sum(), nil + return h.Sum() } func (n *node) String() string { |