Merge pull request #21828 from MichaelEischer/fix-split-pack-index

repository: repair index: correctly handle split index entries
2026-07-17 04:34:53 +00:00 · 2026-05-31 16:30:49 +02:00
parent f86307d223 a54ac48f60
commit 53de77ea1a
7 changed files with 159 additions and 11 deletions
@@ -0,0 +1,16 @@
+Bugfix: Correct handling of duplicate index entries
+
+Before restic 0.10.0, a bug could in very rare cases split information
+about a pack file across multiple index files. Since restic 0.17.0, any
+operation that rewrites the index (like `prune` or `repair packs`)
+could lose part of that information, resulting in errors in later
+`check` or `prune` runs. Those can be fixed by running `repair packs`.
+Note that only repositories using repository format version 1 might be affected.
+
+Split pack index entries are no longer lost during index rewrites. The
+`check` command now reports these cases as errors that can instead be fixed using
+the `repair packs` command. On older restic versions, running `repair index`
+twice also fixes the problem.
+
+https://github.com/restic/restic/issues/21820
+https://github.com/restic/restic/pull/21828
@@ -261,8 +261,15 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
 	}

 	errorsFound := false
+	salvagePacks := restic.NewIDSet()
+
 	for _, hint := range hints {
-		switch hint.(type) {
+		switch hint := hint.(type) {
+		case *repository.ErrIncompletePackEntry:
+			printer.E("%s", hint.Error())
+			salvagePacks.Insert(hint.PackID)
+			errorsFound = true
+			summary.NumErrors++
 		case *repository.ErrDuplicatePacks:
 			printer.S("%s", hint.Error())
 			summary.HintRepairIndex = true
@@ -295,7 +302,6 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args

 	orphanedPacks := 0
 	errChan := make(chan error)
-	salvagePacks := restic.NewIDSet()

 	printer.P("check all packs\n")
 	go chkr.Packs(ctx, errChan)
@@ -17,6 +17,16 @@ import (

 const maxStreamBufferSize = 4 * 1024 * 1024

+// ErrIncompletePackEntry is returned when indexes contain different data for a pack.
+type ErrIncompletePackEntry struct {
+	PackID  restic.ID
+	Indexes restic.IDSet
+}
+
+func (e *ErrIncompletePackEntry) Error() string {
+	return fmt.Sprintf("pack %v has different data in indexes: %v", e.PackID, e.Indexes)
+}
+
 // ErrDuplicatePacks is returned when a pack is found in more than one index.
 type ErrDuplicatePacks struct {
 	PackID  restic.ID
@@ -79,6 +89,9 @@ func computePackTypes(ctx context.Context, idx restic.ListBlobser) (map[restic.I
 func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory) (hints []error, errs []error) {
 	debug.Log("Start")
 	packToIndex := make(map[restic.ID]restic.IDSet)
+	// in restic < 0.10.0, the blobs of a pack could be split over multiple indexes.
+	// by now this is considered as repository damage.
+	packToPackBlobHash := make(map[restic.ID]restic.IDSet)

 	// Use the repository's internal loadIndexWithCallback to handle per-index errors
 	err := c.repo.loadIndexWithCallback(ctx, p, func(id restic.ID, idx *index.Index, err error) error {
@@ -104,6 +117,14 @@ func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory
 			packToIndex[blob.PackID].Insert(id)
 		}

+		for pbs := range idx.EachByPack(ctx, restic.NewIDSet()) {
+			packBlobHash := index.PackBlobsHash(pbs)
+			if _, ok := packToPackBlobHash[pbs.PackID]; !ok {
+				packToPackBlobHash[pbs.PackID] = restic.NewIDSet()
+			}
+			packToPackBlobHash[pbs.PackID].Insert(packBlobHash)
+		}
+
 		debug.Log("%d blobs processed", cnt)
 		return nil
 	})
@@ -120,7 +141,12 @@ func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory
 	debug.Log("checking for duplicate packs")
 	for packID := range packTypes {
 		debug.Log("  check pack %v: contained in %d indexes", packID, len(packToIndex[packID]))
-		if len(packToIndex[packID]) > 1 {
+		if len(packToPackBlobHash[packID]) > 1 {
+			hints = append(hints, &ErrIncompletePackEntry{
+				PackID:  packID,
+				Indexes: packToIndex[packID],
+			})
+		} else if len(packToIndex[packID]) > 1 {
 			hints = append(hints, &ErrDuplicatePacks{
 				PackID:  packID,
 				Indexes: packToIndex[packID],
@@ -3,10 +3,13 @@ package index
 import (
 	"bytes"
 	"context"
+	"crypto/sha256"
+	"encoding/binary"
 	"encoding/json"
 	"io"
 	"iter"
 	"math"
+	"slices"
 	"sync"
 	"time"

@@ -546,3 +549,21 @@ func (idx *Index) Len(t restic.BlobType) uint {

 	return idx.byType[t].len()
 }
+
+func PackBlobsHash(pbs restic.PackBlobs) restic.ID {
+	h := sha256.New()
+	h.Write(pbs.PackID[:])
+
+	sortedBlobs := slices.Clone(pbs.Blobs)
+	sortedBlobs.Sort()
+	for _, blob := range sortedBlobs {
+		h.Write(blob.ID[:])
+		buf := make([]byte, 0, 16)
+		buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Type))
+		buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Offset))
+		buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Length))
+		buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.UncompressedLength))
+		h.Write(buf)
+	}
+	return restic.ID(h.Sum(nil))
+}
@@ -467,15 +467,27 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.

 	wg.Go(func() error {
 		defer close(saveCh)
+		// duplicate packs must be tracked separately to allow the `EachByPack` loop to check
+		// for duplicate index entries with different blobs.
+		// this is necessary to work around a bug in restic < 0.10.0 where the blobs of
+		// a pack file could be split over multiple indexes.
+		packBlobsIDSet := restic.NewIDSet()
 		newIndex := NewIndex()
 		for task := range rewriteCh {
-			// always rewrite indexes that include a pack that must be removed or that are not full
+			// always rewrite indexes that include a pack that must be removed or is a duplicate or that are not full
 			if len(task.idx.Packs().Intersect(excludePacks)) == 0 && Full(task.idx) && !Oversized(task.idx) {
-				// make sure that each pack is only stored exactly once in the index
-				excludePacks.Merge(task.idx.Packs())
-				// index is already up to date
-				p.Add(1)
-				continue
+				// check that no pack index entry is a duplicate of an already processed one
+				idxPackBlobsIDSet := restic.NewIDSet()
+				for pbs := range task.idx.EachByPack(wgCtx, excludePacks) {
+					idxPackBlobsIDSet.Insert(PackBlobsHash(pbs))
+				}
+				if len(idxPackBlobsIDSet.Intersect(packBlobsIDSet)) == 0 {
+					// index is already up to date
+					// make sure that each pack is only stored exactly once in the index
+					packBlobsIDSet.Merge(idxPackBlobsIDSet)
+					p.Add(1)
+					continue
+				}
 			}

 			ids, err := task.idx.IDs()
@@ -485,6 +497,13 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
 			obsolete.Merge(restic.NewIDSet(ids...))

 			for pbs := range task.idx.EachByPack(wgCtx, excludePacks) {
+				// only filter pack blobs with matching packID and blobs
+				packBlobsID := PackBlobsHash(pbs)
+				if packBlobsIDSet.Has(packBlobsID) {
+					continue
+				}
+				packBlobsIDSet.Insert(packBlobsID)
+
 				newIndex.StorePack(pbs.PackID, pbs.Blobs)
 				if Full(newIndex) {
 					select {
@@ -498,8 +517,6 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
 			if wgCtx.Err() != nil {
 				return wgCtx.Err()
 			}
-			// make sure that each pack is only stored exactly once in the index
-			excludePacks.Merge(task.idx.Packs())
 			p.Add(1)
 		}

@@ -637,3 +637,60 @@ func TestRewriteOversizedIndex(t *testing.T) {
 	ids := mi2.IDs()
 	rtest.Assert(t, len(ids) > 1, "oversized index was not split into multiple indexes")
 }
+
+func TestRewriteSplitPacks(t *testing.T) {
+	repo, unpacked, _ := repository.TestRepositoryWithVersion(t, restic.StableRepoVersion)
+
+	bh1 := restic.NewRandomBlobHandle()
+	bh2 := restic.NewRandomBlobHandle()
+	bhOther := restic.NewRandomBlobHandle()
+
+	blob1 := restic.PackedBlob{
+		PackID: restic.NewRandomID(),
+		Blob: restic.Blob{
+			BlobHandle: bh1,
+			Length:     uint(crypto.CiphertextLength(10)),
+			Offset:     0,
+		},
+	}
+	blob2 := restic.PackedBlob{
+		PackID: blob1.PackID,
+		Blob: restic.Blob{
+			BlobHandle:         bh2,
+			Length:             uint(crypto.CiphertextLength(100)),
+			Offset:             10,
+			UncompressedLength: 200,
+		},
+	}
+	// used to force index repacking
+	blobOther := restic.PackedBlob{
+		PackID: restic.NewRandomID(),
+		Blob: restic.Blob{
+			BlobHandle: bhOther,
+			Length:     uint(crypto.CiphertextLength(100)),
+			Offset:     10,
+		},
+	}
+
+	mi := index.NewMasterIndex()
+	rtest.OK(t, mi.StorePack(context.TODO(), blob1.PackID, restic.Blobs{blob1.Blob}, unpacked))
+	rtest.OK(t, mi.StorePack(context.TODO(), blobOther.PackID, restic.Blobs{blobOther.Blob}, unpacked))
+	rtest.OK(t, mi.Flush(context.TODO(), unpacked))
+	rtest.OK(t, mi.StorePack(context.TODO(), blob2.PackID, restic.Blobs{blob2.Blob}, unpacked))
+	rtest.OK(t, mi.StorePack(context.TODO(), blobOther.PackID, restic.Blobs{blobOther.Blob}, unpacked))
+	rtest.OK(t, mi.Flush(context.TODO(), unpacked))
+
+	rtest.OK(t, mi.Rewrite(context.TODO(), unpacked, restic.NewIDSet(blobOther.PackID), nil, nil, index.MasterIndexRewriteOpts{}))
+
+	mi = index.NewMasterIndex()
+	rtest.OK(t, mi.Load(context.TODO(), repo, nil, nil))
+
+	// test that all blobs are still in the index
+	for _, blob := range []restic.PackedBlob{blob1, blob2} {
+		blobs := mi.Lookup(blob.BlobHandle)
+		rtest.Equals(t, []restic.PackedBlob{blob}, blobs)
+	}
+
+	blobs := mi.Lookup(blobOther.BlobHandle)
+	rtest.Equals(t, nil, blobs)
+}
@@ -53,3 +53,8 @@ func TestBlobsSort(t *testing.T) {
 	rtest.Equals(t, uint(50), blobs[1].Offset)
 	rtest.Equals(t, uint(100), blobs[2].Offset)
 }
+
+func TestBlobsSortNilSlice(t *testing.T) {
+	var blobs Blobs
+	blobs.Sort()
+}