Merge pull request #21828 from MichaelEischer/fix-split-pack-index

repository: repair index: correctly handle split index entries
This commit is contained in:
Michael Eischer
2026-05-31 16:30:49 +02:00
committed by GitHub
7 changed files with 159 additions and 11 deletions
+16
View File
@@ -0,0 +1,16 @@
Bugfix: Correct handling of duplicate index entries
Before restic 0.10.0, a bug could in very rare cases split information
about a pack file across multiple index files. Since restic 0.17.0, any
operation that rewrites the index (like `prune` or `repair packs`)
could lose part of that information, resulting in errors in later
`check` or `prune` runs. Those can be fixed by running `repair packs`.
Note that only repositories using repository format version 1 might be affected.
Split pack index entries are no longer lost during index rewrites. The
`check` command now reports these cases as errors that can instead be fixed using
the `repair packs` command. On older restic versions, running `repair index`
twice also fixes the problem.
https://github.com/restic/restic/issues/21820
https://github.com/restic/restic/pull/21828
+8 -2
View File
@@ -261,8 +261,15 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
}
errorsFound := false
salvagePacks := restic.NewIDSet()
for _, hint := range hints {
switch hint.(type) {
switch hint := hint.(type) {
case *repository.ErrIncompletePackEntry:
printer.E("%s", hint.Error())
salvagePacks.Insert(hint.PackID)
errorsFound = true
summary.NumErrors++
case *repository.ErrDuplicatePacks:
printer.S("%s", hint.Error())
summary.HintRepairIndex = true
@@ -295,7 +302,6 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
orphanedPacks := 0
errChan := make(chan error)
salvagePacks := restic.NewIDSet()
printer.P("check all packs\n")
go chkr.Packs(ctx, errChan)
+27 -1
View File
@@ -17,6 +17,16 @@ import (
const maxStreamBufferSize = 4 * 1024 * 1024
// ErrIncompletePackEntry is returned when indexes contain different data for a pack.
type ErrIncompletePackEntry struct {
PackID restic.ID
Indexes restic.IDSet
}
func (e *ErrIncompletePackEntry) Error() string {
return fmt.Sprintf("pack %v has different data in indexes: %v", e.PackID, e.Indexes)
}
// ErrDuplicatePacks is returned when a pack is found in more than one index.
type ErrDuplicatePacks struct {
PackID restic.ID
@@ -79,6 +89,9 @@ func computePackTypes(ctx context.Context, idx restic.ListBlobser) (map[restic.I
func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory) (hints []error, errs []error) {
debug.Log("Start")
packToIndex := make(map[restic.ID]restic.IDSet)
// in restic < 0.10.0, the blobs of a pack could be split over multiple indexes.
// by now this is considered as repository damage.
packToPackBlobHash := make(map[restic.ID]restic.IDSet)
// Use the repository's internal loadIndexWithCallback to handle per-index errors
err := c.repo.loadIndexWithCallback(ctx, p, func(id restic.ID, idx *index.Index, err error) error {
@@ -104,6 +117,14 @@ func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory
packToIndex[blob.PackID].Insert(id)
}
for pbs := range idx.EachByPack(ctx, restic.NewIDSet()) {
packBlobHash := index.PackBlobsHash(pbs)
if _, ok := packToPackBlobHash[pbs.PackID]; !ok {
packToPackBlobHash[pbs.PackID] = restic.NewIDSet()
}
packToPackBlobHash[pbs.PackID].Insert(packBlobHash)
}
debug.Log("%d blobs processed", cnt)
return nil
})
@@ -120,7 +141,12 @@ func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory
debug.Log("checking for duplicate packs")
for packID := range packTypes {
debug.Log(" check pack %v: contained in %d indexes", packID, len(packToIndex[packID]))
if len(packToIndex[packID]) > 1 {
if len(packToPackBlobHash[packID]) > 1 {
hints = append(hints, &ErrIncompletePackEntry{
PackID: packID,
Indexes: packToIndex[packID],
})
} else if len(packToIndex[packID]) > 1 {
hints = append(hints, &ErrDuplicatePacks{
PackID: packID,
Indexes: packToIndex[packID],
+21
View File
@@ -3,10 +3,13 @@ package index
import (
"bytes"
"context"
"crypto/sha256"
"encoding/binary"
"encoding/json"
"io"
"iter"
"math"
"slices"
"sync"
"time"
@@ -546,3 +549,21 @@ func (idx *Index) Len(t restic.BlobType) uint {
return idx.byType[t].len()
}
func PackBlobsHash(pbs restic.PackBlobs) restic.ID {
h := sha256.New()
h.Write(pbs.PackID[:])
sortedBlobs := slices.Clone(pbs.Blobs)
sortedBlobs.Sort()
for _, blob := range sortedBlobs {
h.Write(blob.ID[:])
buf := make([]byte, 0, 16)
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Type))
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Offset))
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Length))
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.UncompressedLength))
h.Write(buf)
}
return restic.ID(h.Sum(nil))
}
+25 -8
View File
@@ -467,15 +467,27 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
wg.Go(func() error {
defer close(saveCh)
// duplicate packs must be tracked separately to allow the `EachByPack` loop to check
// for duplicate index entries with different blobs.
// this is necessary to work around a bug in restic < 0.10.0 where the blobs of
// a pack file could be split over multiple indexes.
packBlobsIDSet := restic.NewIDSet()
newIndex := NewIndex()
for task := range rewriteCh {
// always rewrite indexes that include a pack that must be removed or that are not full
// always rewrite indexes that include a pack that must be removed or is a duplicate or that are not full
if len(task.idx.Packs().Intersect(excludePacks)) == 0 && Full(task.idx) && !Oversized(task.idx) {
// make sure that each pack is only stored exactly once in the index
excludePacks.Merge(task.idx.Packs())
// index is already up to date
p.Add(1)
continue
// check that no pack index entry is a duplicate of an already processed one
idxPackBlobsIDSet := restic.NewIDSet()
for pbs := range task.idx.EachByPack(wgCtx, excludePacks) {
idxPackBlobsIDSet.Insert(PackBlobsHash(pbs))
}
if len(idxPackBlobsIDSet.Intersect(packBlobsIDSet)) == 0 {
// index is already up to date
// make sure that each pack is only stored exactly once in the index
packBlobsIDSet.Merge(idxPackBlobsIDSet)
p.Add(1)
continue
}
}
ids, err := task.idx.IDs()
@@ -485,6 +497,13 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
obsolete.Merge(restic.NewIDSet(ids...))
for pbs := range task.idx.EachByPack(wgCtx, excludePacks) {
// only filter pack blobs with matching packID and blobs
packBlobsID := PackBlobsHash(pbs)
if packBlobsIDSet.Has(packBlobsID) {
continue
}
packBlobsIDSet.Insert(packBlobsID)
newIndex.StorePack(pbs.PackID, pbs.Blobs)
if Full(newIndex) {
select {
@@ -498,8 +517,6 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
if wgCtx.Err() != nil {
return wgCtx.Err()
}
// make sure that each pack is only stored exactly once in the index
excludePacks.Merge(task.idx.Packs())
p.Add(1)
}
@@ -637,3 +637,60 @@ func TestRewriteOversizedIndex(t *testing.T) {
ids := mi2.IDs()
rtest.Assert(t, len(ids) > 1, "oversized index was not split into multiple indexes")
}
func TestRewriteSplitPacks(t *testing.T) {
repo, unpacked, _ := repository.TestRepositoryWithVersion(t, restic.StableRepoVersion)
bh1 := restic.NewRandomBlobHandle()
bh2 := restic.NewRandomBlobHandle()
bhOther := restic.NewRandomBlobHandle()
blob1 := restic.PackedBlob{
PackID: restic.NewRandomID(),
Blob: restic.Blob{
BlobHandle: bh1,
Length: uint(crypto.CiphertextLength(10)),
Offset: 0,
},
}
blob2 := restic.PackedBlob{
PackID: blob1.PackID,
Blob: restic.Blob{
BlobHandle: bh2,
Length: uint(crypto.CiphertextLength(100)),
Offset: 10,
UncompressedLength: 200,
},
}
// used to force index repacking
blobOther := restic.PackedBlob{
PackID: restic.NewRandomID(),
Blob: restic.Blob{
BlobHandle: bhOther,
Length: uint(crypto.CiphertextLength(100)),
Offset: 10,
},
}
mi := index.NewMasterIndex()
rtest.OK(t, mi.StorePack(context.TODO(), blob1.PackID, restic.Blobs{blob1.Blob}, unpacked))
rtest.OK(t, mi.StorePack(context.TODO(), blobOther.PackID, restic.Blobs{blobOther.Blob}, unpacked))
rtest.OK(t, mi.Flush(context.TODO(), unpacked))
rtest.OK(t, mi.StorePack(context.TODO(), blob2.PackID, restic.Blobs{blob2.Blob}, unpacked))
rtest.OK(t, mi.StorePack(context.TODO(), blobOther.PackID, restic.Blobs{blobOther.Blob}, unpacked))
rtest.OK(t, mi.Flush(context.TODO(), unpacked))
rtest.OK(t, mi.Rewrite(context.TODO(), unpacked, restic.NewIDSet(blobOther.PackID), nil, nil, index.MasterIndexRewriteOpts{}))
mi = index.NewMasterIndex()
rtest.OK(t, mi.Load(context.TODO(), repo, nil, nil))
// test that all blobs are still in the index
for _, blob := range []restic.PackedBlob{blob1, blob2} {
blobs := mi.Lookup(blob.BlobHandle)
rtest.Equals(t, []restic.PackedBlob{blob}, blobs)
}
blobs := mi.Lookup(blobOther.BlobHandle)
rtest.Equals(t, nil, blobs)
}
+5
View File
@@ -53,3 +53,8 @@ func TestBlobsSort(t *testing.T) {
rtest.Equals(t, uint(50), blobs[1].Offset)
rtest.Equals(t, uint(100), blobs[2].Offset)
}
func TestBlobsSortNilSlice(t *testing.T) {
var blobs Blobs
blobs.Sort()
}