package dag import ( "encoding/json" "fmt" "strconv" "strings" "github.com/ProtonMail/go-crypto/openpgp" "github.com/pkg/errors" "github.com/git-bug/git-bug/entities/identity" "github.com/git-bug/git-bug/entity" "github.com/git-bug/git-bug/repository" "github.com/git-bug/git-bug/util/lamport" ) const opsEntryName = "ops" const extraEntryName = "extra" const versionEntryPrefix = "version-" const createClockEntryPrefix = "create-clock-" const editClockEntryPrefix = "edit-clock-" // operationPack is a wrapper structure to store multiple operations in a single git blob. // Additionally, it holds and stores the metadata for those operations. type operationPack struct { // An identifier, taken from a hash of the serialized Operations. id entity.Id // The author of the Operations. Must be the same author for all the Operations. Author identity.Interface // The list of Operation stored in the operationPack Operations []Operation // Encode the entity's logical time of creation across all entities of the same type. // Only exist on the root operationPack CreateTime lamport.Time // Encode the entity's logical time of last edition across all entities of the same type. // Exist on all operationPack EditTime lamport.Time } func (opp *operationPack) Id() entity.Id { if opp.id == "" || opp.id == entity.UnsetId { // This means we are trying to get the opp's Id *before* it has been stored. // As the Id is computed based on the actual bytes written on the disk, we are going to predict // those and then get the Id. This is safe as it will be the exact same code writing on disk later. data, err := json.Marshal(opp) if err != nil { panic(err) } opp.id = entity.DeriveId(data) } return opp.id } func (opp *operationPack) MarshalJSON() ([]byte, error) { return json.Marshal(struct { Author identity.Interface `json:"author"` Operations []Operation `json:"ops"` }{ Author: opp.Author, Operations: opp.Operations, }) } func (opp *operationPack) Validate() error { if opp.Author == nil { return fmt.Errorf("missing author") } for _, op := range opp.Operations { if op.Author().Id() != opp.Author.Id() { return fmt.Errorf("operation has different author than the operationPack's") } } if opp.EditTime == 0 { return fmt.Errorf("lamport edit time is zero") } return nil } // Write writes the OperationPack in git, with zero, one or more parent commits. // If the repository has a key pair able to sign (that is, with a private key), the resulting commit is signed with that key. // Return the hash of the created commit. func (opp *operationPack) Write(def Definition, repo repository.Repo, parentCommit ...repository.Hash) (repository.Hash, error) { if err := opp.Validate(); err != nil { return "", err } // For different reason, we store the clocks and format version directly in the git tree. // Version has to be accessible before any attempt to decode to return early with a unique error. // Clocks could possibly be stored in the git blob but it's nice to separate data and metadata, and // we are storing something directly in the tree already so why not. // // To have a valid Tree, we point the "fake" entries to always the same value, the empty blob. emptyBlobHash, err := repo.StoreData([]byte{}) if err != nil { return "", err } // Write the Ops as a Git blob containing the serialized array of operations data, err := json.Marshal(opp) if err != nil { return "", err } // compute the Id while we have the serialized data opp.id = entity.DeriveId(data) hash, err := repo.StoreData(data) if err != nil { return "", err } // Make a Git tree referencing this blob and encoding the other values: // - format version // - clocks // - extra data tree := []repository.TreeEntry{ {ObjectType: repository.Blob, Hash: emptyBlobHash, Name: fmt.Sprintf(versionEntryPrefix+"%d", def.FormatVersion)}, {ObjectType: repository.Blob, Hash: hash, Name: opsEntryName}, {ObjectType: repository.Blob, Hash: emptyBlobHash, Name: fmt.Sprintf(editClockEntryPrefix+"%d", opp.EditTime)}, } if opp.CreateTime > 0 { tree = append(tree, repository.TreeEntry{ ObjectType: repository.Blob, Hash: emptyBlobHash, Name: fmt.Sprintf(createClockEntryPrefix+"%d", opp.CreateTime), }) } if extraTree := opp.makeExtraTree(); len(extraTree) > 0 { extraTreeHash, err := repo.StoreTree(extraTree) if err != nil { return "", err } tree = append(tree, repository.TreeEntry{ ObjectType: repository.Tree, Hash: extraTreeHash, Name: extraEntryName, }) } // Store the tree treeHash, err := repo.StoreTree(tree) if err != nil { return "", err } // Write a Git commit referencing the tree, with the previous commit as parent // If we have keys, sign. var commitHash repository.Hash // Sign the commit if we have a key signingKey, err := opp.Author.SigningKey(repo) if err != nil { return "", err } if signingKey != nil { commitHash, err = repo.StoreSignedCommit(treeHash, signingKey.PGPEntity(), parentCommit...) } else { commitHash, err = repo.StoreCommit(treeHash, parentCommit...) } if err != nil { return "", err } return commitHash, nil } func (opp *operationPack) makeExtraTree() []repository.TreeEntry { var tree []repository.TreeEntry counter := 0 added := make(map[repository.Hash]interface{}) for _, ops := range opp.Operations { ops, ok := ops.(OperationWithFiles) if !ok { continue } for _, file := range ops.GetFiles() { if _, has := added[file]; !has { tree = append(tree, repository.TreeEntry{ ObjectType: repository.Blob, Hash: file, // The name is not important here, we only need to // reference the blob. Name: fmt.Sprintf("file%d", counter), }) counter++ added[file] = struct{}{} } } } return tree } // readOperationPack read the operationPack encoded in git at the given Tree hash. // // Validity of the Lamport clocks is left for the caller to decide. func readOperationPack(def Definition, repo repository.RepoData, resolvers entity.Resolvers, commit repository.Commit) (*operationPack, error) { entries, err := repo.ReadTree(commit.TreeHash) if err != nil { return nil, err } // check the format version first, fail early instead of trying to read something var version uint for _, entry := range entries { if strings.HasPrefix(entry.Name, versionEntryPrefix) { v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, versionEntryPrefix), 10, 64) if err != nil { return nil, errors.Wrap(err, "can't read format version") } if v > 1<<12 { return nil, fmt.Errorf("format version too big") } version = uint(v) break } } if version == 0 { return nil, entity.NewErrUnknownFormat(def.FormatVersion) } if version != def.FormatVersion { return nil, entity.NewErrInvalidFormat(version, def.FormatVersion) } var id entity.Id var author identity.Interface var ops []Operation var createTime lamport.Time var editTime lamport.Time for _, entry := range entries { switch { case entry.Name == opsEntryName: data, err := repo.ReadData(entry.Hash) if err != nil { return nil, errors.Wrap(err, "failed to read git blob data") } ops, author, err = unmarshallPack(def, resolvers, data) if err != nil { return nil, err } id = entity.DeriveId(data) case strings.HasPrefix(entry.Name, createClockEntryPrefix): v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, createClockEntryPrefix), 10, 64) if err != nil { return nil, errors.Wrap(err, "can't read creation lamport time") } createTime = lamport.Time(v) case strings.HasPrefix(entry.Name, editClockEntryPrefix): v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, editClockEntryPrefix), 10, 64) if err != nil { return nil, errors.Wrap(err, "can't read edit lamport time") } editTime = lamport.Time(v) } } // Verify signature if we expect one keys := author.ValidKeysAtTime(fmt.Sprintf(editClockPattern, def.Namespace), editTime) if len(keys) > 0 { // this is a *very* convoluted and inefficient way to make OpenPGP accept to check a signature, but anything // else goes against the grain and make it very unhappy. keyring := openpgp.EntityList{} for _, key := range keys { keyring = append(keyring, key.PGPEntity()) } _, err = openpgp.CheckDetachedSignature(keyring, commit.SignedData, commit.Signature, nil) if err != nil { return nil, fmt.Errorf("signature failure: %v", err) } } return &operationPack{ id: id, Author: author, Operations: ops, CreateTime: createTime, EditTime: editTime, }, nil } // readOperationPackClock is similar to readOperationPack but only read and decode the Lamport clocks. // Validity of those is left for the caller to decide. func readOperationPackClock(repo repository.RepoData, commit repository.Commit) (lamport.Time, lamport.Time, error) { entries, err := repo.ReadTree(commit.TreeHash) if err != nil { return 0, 0, err } var createTime lamport.Time var editTime lamport.Time for _, entry := range entries { switch { case strings.HasPrefix(entry.Name, createClockEntryPrefix): v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, createClockEntryPrefix), 10, 64) if err != nil { return 0, 0, errors.Wrap(err, "can't read creation lamport time") } createTime = lamport.Time(v) case strings.HasPrefix(entry.Name, editClockEntryPrefix): v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, editClockEntryPrefix), 10, 64) if err != nil { return 0, 0, errors.Wrap(err, "can't read edit lamport time") } editTime = lamport.Time(v) } } return createTime, editTime, nil } // unmarshallPack delegate the unmarshalling of the Operation's JSON to the decoding // function provided by the concrete entity. This gives access to the concrete type of each // Operation. func unmarshallPack(def Definition, resolvers entity.Resolvers, data []byte) ([]Operation, identity.Interface, error) { aux := struct { Author identity.IdentityStub `json:"author"` Operations []json.RawMessage `json:"ops"` }{} if err := json.Unmarshal(data, &aux); err != nil { return nil, nil, err } if aux.Author.Id() == "" || aux.Author.Id() == entity.UnsetId { return nil, nil, fmt.Errorf("missing author") } author, err := entity.Resolve[identity.Interface](resolvers, aux.Author.Id()) if err != nil { return nil, nil, err } ops := make([]Operation, 0, len(aux.Operations)) for _, raw := range aux.Operations { // delegate to specialized unmarshal function op, err := def.OperationUnmarshaler(raw, resolvers) if err != nil { return nil, nil, err } // Set the id from the serialized data op.setId(entity.DeriveId(raw)) // Set the author, taken from the OperationPack op.setAuthor(author) ops = append(ops, op) } return ops, author, nil }