repository: repair index: correctly handle split index entries

In restic <0.10.0, it was possible that the blobs of a pack file were
split across multiple indexes. `MasterIndex.Rewrite` however assumed
that each an index always contains the full description of a pack file.
Therefore, further index entries for a pack were filtered out as
duplicates. Now, the code also checks the blobs contained in the index
entry while filtering out duplicates.
This commit is contained in:
Michael Eischer
2026-05-30 20:08:18 +02:00
parent f86307d223
commit 5b39ad861e
2 changed files with 46 additions and 8 deletions
+21
View File
@@ -3,10 +3,13 @@ package index
import (
"bytes"
"context"
"crypto/sha256"
"encoding/binary"
"encoding/json"
"io"
"iter"
"math"
"slices"
"sync"
"time"
@@ -546,3 +549,21 @@ func (idx *Index) Len(t restic.BlobType) uint {
return idx.byType[t].len()
}
func PackBlobsHash(pbs restic.PackBlobs) restic.ID {
h := sha256.New()
h.Write(pbs.PackID[:])
sortedBlobs := slices.Clone(pbs.Blobs)
sortedBlobs.Sort()
for _, blob := range sortedBlobs {
h.Write(blob.ID[:])
buf := make([]byte, 0, 16)
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Type))
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Offset))
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.Length))
buf = binary.LittleEndian.AppendUint32(buf, uint32(blob.UncompressedLength))
h.Write(buf)
}
return restic.ID(h.Sum(nil))
}
+25 -8
View File
@@ -467,15 +467,27 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
wg.Go(func() error {
defer close(saveCh)
// duplicate packs must be tracked separately to allow the `EachByPack` loop to check
// for duplicate index entries with different blobs.
// this is necessary to work around a bug in restic < 0.10.0 where the blobs of
// a pack file could be split over multiple indexes.
packBlobsIDSet := restic.NewIDSet()
newIndex := NewIndex()
for task := range rewriteCh {
// always rewrite indexes that include a pack that must be removed or that are not full
// always rewrite indexes that include a pack that must be removed or is a duplicate or that are not full
if len(task.idx.Packs().Intersect(excludePacks)) == 0 && Full(task.idx) && !Oversized(task.idx) {
// make sure that each pack is only stored exactly once in the index
excludePacks.Merge(task.idx.Packs())
// index is already up to date
p.Add(1)
continue
// check that no pack index entry is a duplicate of an already processed one
idxPackBlobsIDSet := restic.NewIDSet()
for pbs := range task.idx.EachByPack(wgCtx, excludePacks) {
idxPackBlobsIDSet.Insert(PackBlobsHash(pbs))
}
if len(idxPackBlobsIDSet.Intersect(packBlobsIDSet)) == 0 {
// index is already up to date
// make sure that each pack is only stored exactly once in the index
packBlobsIDSet.Merge(idxPackBlobsIDSet)
p.Add(1)
continue
}
}
ids, err := task.idx.IDs()
@@ -485,6 +497,13 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
obsolete.Merge(restic.NewIDSet(ids...))
for pbs := range task.idx.EachByPack(wgCtx, excludePacks) {
// only filter pack blobs with matching packID and blobs
packBlobsID := PackBlobsHash(pbs)
if packBlobsIDSet.Has(packBlobsID) {
continue
}
packBlobsIDSet.Insert(packBlobsID)
newIndex.StorePack(pbs.PackID, pbs.Blobs)
if Full(newIndex) {
select {
@@ -498,8 +517,6 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
if wgCtx.Err() != nil {
return wgCtx.Err()
}
// make sure that each pack is only stored exactly once in the index
excludePacks.Merge(task.idx.Packs())
p.Add(1)
}