mirror of
https://github.com/restic/restic.git
synced 2026-06-01 22:59:44 +00:00
Merge pull request #21828 from MichaelEischer/fix-split-pack-index
repository: repair index: correctly handle split index entries
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
Bugfix: Correct handling of duplicate index entries
|
||||
|
||||
Before restic 0.10.0, a bug could in very rare cases split information
|
||||
about a pack file across multiple index files. Since restic 0.17.0, any
|
||||
operation that rewrites the index (like `prune` or `repair packs`)
|
||||
could lose part of that information, resulting in errors in later
|
||||
`check` or `prune` runs. Those can be fixed by running `repair packs`.
|
||||
Note that only repositories using repository format version 1 might be affected.
|
||||
|
||||
Split pack index entries are no longer lost during index rewrites. The
|
||||
`check` command now reports these cases as errors that can instead be fixed using
|
||||
the `repair packs` command. On older restic versions, running `repair index`
|
||||
twice also fixes the problem.
|
||||
|
||||
https://github.com/restic/restic/issues/21820
|
||||
https://github.com/restic/restic/pull/21828
|
||||
@@ -261,8 +261,15 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
|
||||
}
|
||||
|
||||
errorsFound := false
|
||||
salvagePacks := restic.NewIDSet()
|
||||
|
||||
for _, hint := range hints {
|
||||
switch hint.(type) {
|
||||
switch hint := hint.(type) {
|
||||
case *repository.ErrIncompletePackEntry:
|
||||
printer.E("%s", hint.Error())
|
||||
salvagePacks.Insert(hint.PackID)
|
||||
errorsFound = true
|
||||
summary.NumErrors++
|
||||
case *repository.ErrDuplicatePacks:
|
||||
printer.S("%s", hint.Error())
|
||||
summary.HintRepairIndex = true
|
||||
@@ -295,7 +302,6 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
|
||||
|
||||
orphanedPacks := 0
|
||||
errChan := make(chan error)
|
||||
salvagePacks := restic.NewIDSet()
|
||||
|
||||
printer.P("check all packs\n")
|
||||
go chkr.Packs(ctx, errChan)
|
||||
|
||||
@@ -17,6 +17,16 @@ import (
|
||||
|
||||
const maxStreamBufferSize = 4 * 1024 * 1024
|
||||
|
||||
// ErrIncompletePackEntry is returned when indexes contain different data for a pack.
|
||||
type ErrIncompletePackEntry struct {
|
||||
PackID restic.ID
|
||||
Indexes restic.IDSet
|
||||
}
|
||||
|
||||
func (e *ErrIncompletePackEntry) Error() string {
|
||||
return fmt.Sprintf("pack %v has different data in indexes: %v", e.PackID, e.Indexes)
|
||||
}
|
||||
|
||||
// ErrDuplicatePacks is returned when a pack is found in more than one index.
|
||||
type ErrDuplicatePacks struct {
|
||||
PackID restic.ID
|
||||
@@ -79,6 +89,9 @@ func computePackTypes(ctx context.Context, idx restic.ListBlobser) (map[restic.I
|
||||
func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory) (hints []error, errs []error) {
|
||||
debug.Log("Start")
|
||||
packToIndex := make(map[restic.ID]restic.IDSet)
|
||||
// in restic < 0.10.0, the blobs of a pack could be split over multiple indexes.
|
||||
// by now this is considered as repository damage.
|
||||
packToPackBlobHash := make(map[restic.ID]restic.IDSet)
|
||||
|
||||
// Use the repository's internal loadIndexWithCallback to handle per-index errors
|
||||
err := c.repo.loadIndexWithCallback(ctx, p, func(id restic.ID, idx *index.Index, err error) error {
|
||||
@@ -104,6 +117,14 @@ func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory
|
||||
packToIndex[blob.PackID].Insert(id)
|
||||
}
|
||||
|
||||
for pbs := range idx.EachByPack(ctx, restic.NewIDSet()) {
|
||||
packBlobHash := index.PackBlobsHash(pbs)
|
||||
if _, ok := packToPackBlobHash[pbs.PackID]; !ok {
|
||||
packToPackBlobHash[pbs.PackID] = restic.NewIDSet()
|
||||
}
|
||||
packToPackBlobHash[pbs.PackID].Insert(packBlobHash)
|
||||
}
|
||||
|
||||
debug.Log("%d blobs processed", cnt)
|
||||
return nil
|
||||
})
|
||||
@@ -120,7 +141,12 @@ func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory
|
||||
debug.Log("checking for duplicate packs")
|
||||
for packID := range packTypes {
|
||||
debug.Log(" check pack %v: contained in %d indexes", packID, len(packToIndex[packID]))
|
||||
if len(packToIndex[packID]) > 1 {
|
||||
if len(packToPackBlobHash[packID]) > 1 {
|
||||
hints = append(hints, &ErrIncompletePackEntry{
|
||||
PackID: packID,
|
||||
Indexes: packToIndex[packID],
|
||||
})
|
||||
} else if len(packToIndex[packID]) > 1 {
|
||||
hints = append(hints, &ErrDuplicatePacks{
|
||||
PackID: packID,
|
||||
Indexes: packToIndex[packID],
|
||||
|
||||
@@ -3,10 +3,13 @@ package index
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"iter"
|
||||
"math"
|
||||
"slices"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -546,3 +549,21 @@ func (idx *Index) Len(t restic.BlobType) uint {
|
||||
|
||||
return idx.byType[t].len()
|
||||
}
|
||||
|
||||
func PackBlobsHash(pbs restic.PackBlobs) restic.ID {
|
||||
h := sha256.New()
|
||||
h.Write(pbs.PackID[:])
|
||||
|
||||
sortedBlobs := slices.Clone(pbs.Blobs)
|
||||
sortedBlobs.Sort()
|
||||
for _, blob := range sortedBlobs {
|
||||
h.Write(blob.ID[:])
|
||||
buf := make([]byte, 0, 16)
|
||||
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Type))
|
||||
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Offset))
|
||||
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Length))
|
||||
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.UncompressedLength))
|
||||
h.Write(buf)
|
||||
}
|
||||
return restic.ID(h.Sum(nil))
|
||||
}
|
||||
|
||||
@@ -467,15 +467,27 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
|
||||
|
||||
wg.Go(func() error {
|
||||
defer close(saveCh)
|
||||
// duplicate packs must be tracked separately to allow the `EachByPack` loop to check
|
||||
// for duplicate index entries with different blobs.
|
||||
// this is necessary to work around a bug in restic < 0.10.0 where the blobs of
|
||||
// a pack file could be split over multiple indexes.
|
||||
packBlobsIDSet := restic.NewIDSet()
|
||||
newIndex := NewIndex()
|
||||
for task := range rewriteCh {
|
||||
// always rewrite indexes that include a pack that must be removed or that are not full
|
||||
// always rewrite indexes that include a pack that must be removed or is a duplicate or that are not full
|
||||
if len(task.idx.Packs().Intersect(excludePacks)) == 0 && Full(task.idx) && !Oversized(task.idx) {
|
||||
// make sure that each pack is only stored exactly once in the index
|
||||
excludePacks.Merge(task.idx.Packs())
|
||||
// index is already up to date
|
||||
p.Add(1)
|
||||
continue
|
||||
// check that no pack index entry is a duplicate of an already processed one
|
||||
idxPackBlobsIDSet := restic.NewIDSet()
|
||||
for pbs := range task.idx.EachByPack(wgCtx, excludePacks) {
|
||||
idxPackBlobsIDSet.Insert(PackBlobsHash(pbs))
|
||||
}
|
||||
if len(idxPackBlobsIDSet.Intersect(packBlobsIDSet)) == 0 {
|
||||
// index is already up to date
|
||||
// make sure that each pack is only stored exactly once in the index
|
||||
packBlobsIDSet.Merge(idxPackBlobsIDSet)
|
||||
p.Add(1)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
ids, err := task.idx.IDs()
|
||||
@@ -485,6 +497,13 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
|
||||
obsolete.Merge(restic.NewIDSet(ids...))
|
||||
|
||||
for pbs := range task.idx.EachByPack(wgCtx, excludePacks) {
|
||||
// only filter pack blobs with matching packID and blobs
|
||||
packBlobsID := PackBlobsHash(pbs)
|
||||
if packBlobsIDSet.Has(packBlobsID) {
|
||||
continue
|
||||
}
|
||||
packBlobsIDSet.Insert(packBlobsID)
|
||||
|
||||
newIndex.StorePack(pbs.PackID, pbs.Blobs)
|
||||
if Full(newIndex) {
|
||||
select {
|
||||
@@ -498,8 +517,6 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
|
||||
if wgCtx.Err() != nil {
|
||||
return wgCtx.Err()
|
||||
}
|
||||
// make sure that each pack is only stored exactly once in the index
|
||||
excludePacks.Merge(task.idx.Packs())
|
||||
p.Add(1)
|
||||
}
|
||||
|
||||
|
||||
@@ -637,3 +637,60 @@ func TestRewriteOversizedIndex(t *testing.T) {
|
||||
ids := mi2.IDs()
|
||||
rtest.Assert(t, len(ids) > 1, "oversized index was not split into multiple indexes")
|
||||
}
|
||||
|
||||
func TestRewriteSplitPacks(t *testing.T) {
|
||||
repo, unpacked, _ := repository.TestRepositoryWithVersion(t, restic.StableRepoVersion)
|
||||
|
||||
bh1 := restic.NewRandomBlobHandle()
|
||||
bh2 := restic.NewRandomBlobHandle()
|
||||
bhOther := restic.NewRandomBlobHandle()
|
||||
|
||||
blob1 := restic.PackedBlob{
|
||||
PackID: restic.NewRandomID(),
|
||||
Blob: restic.Blob{
|
||||
BlobHandle: bh1,
|
||||
Length: uint(crypto.CiphertextLength(10)),
|
||||
Offset: 0,
|
||||
},
|
||||
}
|
||||
blob2 := restic.PackedBlob{
|
||||
PackID: blob1.PackID,
|
||||
Blob: restic.Blob{
|
||||
BlobHandle: bh2,
|
||||
Length: uint(crypto.CiphertextLength(100)),
|
||||
Offset: 10,
|
||||
UncompressedLength: 200,
|
||||
},
|
||||
}
|
||||
// used to force index repacking
|
||||
blobOther := restic.PackedBlob{
|
||||
PackID: restic.NewRandomID(),
|
||||
Blob: restic.Blob{
|
||||
BlobHandle: bhOther,
|
||||
Length: uint(crypto.CiphertextLength(100)),
|
||||
Offset: 10,
|
||||
},
|
||||
}
|
||||
|
||||
mi := index.NewMasterIndex()
|
||||
rtest.OK(t, mi.StorePack(context.TODO(), blob1.PackID, restic.Blobs{blob1.Blob}, unpacked))
|
||||
rtest.OK(t, mi.StorePack(context.TODO(), blobOther.PackID, restic.Blobs{blobOther.Blob}, unpacked))
|
||||
rtest.OK(t, mi.Flush(context.TODO(), unpacked))
|
||||
rtest.OK(t, mi.StorePack(context.TODO(), blob2.PackID, restic.Blobs{blob2.Blob}, unpacked))
|
||||
rtest.OK(t, mi.StorePack(context.TODO(), blobOther.PackID, restic.Blobs{blobOther.Blob}, unpacked))
|
||||
rtest.OK(t, mi.Flush(context.TODO(), unpacked))
|
||||
|
||||
rtest.OK(t, mi.Rewrite(context.TODO(), unpacked, restic.NewIDSet(blobOther.PackID), nil, nil, index.MasterIndexRewriteOpts{}))
|
||||
|
||||
mi = index.NewMasterIndex()
|
||||
rtest.OK(t, mi.Load(context.TODO(), repo, nil, nil))
|
||||
|
||||
// test that all blobs are still in the index
|
||||
for _, blob := range []restic.PackedBlob{blob1, blob2} {
|
||||
blobs := mi.Lookup(blob.BlobHandle)
|
||||
rtest.Equals(t, []restic.PackedBlob{blob}, blobs)
|
||||
}
|
||||
|
||||
blobs := mi.Lookup(blobOther.BlobHandle)
|
||||
rtest.Equals(t, nil, blobs)
|
||||
}
|
||||
|
||||
@@ -53,3 +53,8 @@ func TestBlobsSort(t *testing.T) {
|
||||
rtest.Equals(t, uint(50), blobs[1].Offset)
|
||||
rtest.Equals(t, uint(100), blobs[2].Offset)
|
||||
}
|
||||
|
||||
func TestBlobsSortNilSlice(t *testing.T) {
|
||||
var blobs Blobs
|
||||
blobs.Sort()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user