From e33ed5d0c1863a5bb3ce6bd52d781d69cee297cf Mon Sep 17 00:00:00 2001
From: Michael Eischer <michael.eischer@fau.de>
Date: Sat, 14 Feb 2026 21:07:30 +0100
Subject: [PATCH 1/6] index: make tests more representative

---
 internal/repository/index/index_test.go | 34 +++++++++++++++++--------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/internal/repository/index/index_test.go b/internal/repository/index/index_test.go
index 30a662a37..bdc666d3f 100644
--- a/internal/repository/index/index_test.go
+++ b/internal/repository/index/index_test.go
@@ -463,22 +463,34 @@ func createRandomIndex(rng *rand.Rand, packfiles int) (idx *index.Index, lookupB
 
 func BenchmarkIndexHasUnknown(b *testing.B) {
 	idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
-	lookupBh := restic.NewRandomBlobHandle()
+	handles := make([]restic.BlobHandle, 0, 100000)
+	for i := 0; i < cap(handles); i++ {
+		handles = append(handles, restic.NewRandomBlobHandle())
+	}
 
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
-		idx.Has(lookupBh)
+	for b.Loop() {
+		// use multiple handles to reduce cache effects
+		for _, handle := range handles {
+			idx.Has(handle)
+		}
 	}
 }
 
 func BenchmarkIndexHasKnown(b *testing.B) {
-	idx, lookupBh := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
+	idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
+	handles := make([]restic.BlobHandle, 0, 100000)
+	for handle := range idx.Values() {
+		handles = append(handles, handle.BlobHandle)
+		if len(handles) == cap(handles) {
+			break
+		}
+	}
 
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
-		idx.Has(lookupBh)
+	for b.Loop() {
+		// use multiple handles to reduce cache effects
+		for _, handle := range handles {
+			idx.Has(handle)
+		}
 	}
 }
 
@@ -486,7 +498,7 @@ func BenchmarkIndexAlloc(b *testing.B) {
 	rng := rand.New(rand.NewSource(0))
 	b.ReportAllocs()
 
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		createRandomIndex(rng, 200000)
 	}
 }

From 320f709fbc98b9919b40193e43cd35723dbf043f Mon Sep 17 00:00:00 2001
From: Michael Eischer <michael.eischer@fau.de>
Date: Sun, 15 Feb 2026 11:46:03 +0100
Subject: [PATCH 2/6] index: modernize masterindex tests

`b.Loop()` drastically shortens benchmark execution times for tests with
an expensive initialization phase as it only has to happen once now.
---
 .../repository/index/master_index_test.go     | 40 ++++++++-----------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/internal/repository/index/master_index_test.go b/internal/repository/index/master_index_test.go
index 5cae25873..964ee8814 100644
--- a/internal/repository/index/master_index_test.go
+++ b/internal/repository/index/master_index_test.go
@@ -298,17 +298,24 @@ func BenchmarkMasterIndexAlloc(b *testing.B) {
 	rng := rand.New(rand.NewSource(0))
 	b.ReportAllocs()
 
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		createRandomMasterIndex(b, rng, 10000, 5)
 	}
 }
 
+func BenchmarkMasterIndexMerge(b *testing.B) {
+	rng := rand.New(rand.NewSource(0))
+	b.ReportAllocs()
+
+	for b.Loop() {
+		createRandomMasterIndex(b, rng, 1000, 1000)
+	}
+}
+
 func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) {
 	mIdx, lookupBh := createRandomMasterIndex(b, rand.New(rand.NewSource(0)), 1, 200000)
 
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		mIdx.Lookup(lookupBh)
 	}
 }
@@ -316,21 +323,16 @@ func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) {
 func BenchmarkMasterIndexLookupMultipleIndex(b *testing.B) {
 	mIdx, lookupBh := createRandomMasterIndex(b, rand.New(rand.NewSource(0)), 100, 10000)
 
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		mIdx.Lookup(lookupBh)
 	}
 }
 
 func BenchmarkMasterIndexLookupSingleIndexUnknown(b *testing.B) {
-
 	lookupBh := restic.NewRandomBlobHandle()
 	mIdx, _ := createRandomMasterIndex(b, rand.New(rand.NewSource(0)), 1, 200000)
 
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		mIdx.Lookup(lookupBh)
 	}
 }
@@ -339,9 +341,7 @@ func BenchmarkMasterIndexLookupMultipleIndexUnknown(b *testing.B) {
 	lookupBh := restic.NewRandomBlobHandle()
 	mIdx, _ := createRandomMasterIndex(b, rand.New(rand.NewSource(0)), 100, 10000)
 
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		mIdx.Lookup(lookupBh)
 	}
 }
@@ -380,9 +380,7 @@ func BenchmarkMasterIndexLookupBlobSize(b *testing.B) {
 	rng := rand.New(rand.NewSource(0))
 	mIdx, lookupBh := createRandomMasterIndex(b, rand.New(rng), 5, 200000)
 
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		mIdx.LookupSize(lookupBh)
 	}
 }
@@ -391,9 +389,7 @@ func BenchmarkMasterIndexEach(b *testing.B) {
 	rng := rand.New(rand.NewSource(0))
 	mIdx, _ := createRandomMasterIndex(b, rand.New(rng), 5, 200000)
 
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		entries := 0
 		for range mIdx.Values() {
 			entries++
@@ -404,9 +400,7 @@ func BenchmarkMasterIndexEach(b *testing.B) {
 func BenchmarkMasterIndexGC(b *testing.B) {
 	mIdx, _ := createRandomMasterIndex(b, rand.New(rand.NewSource(0)), 100, 10000)
 
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		runtime.GC()
 	}
 	runtime.KeepAlive(mIdx)

From ba638b660237ca6a9942dc4ebf2bc9b370f7cbe1 Mon Sep 17 00:00:00 2001
From: Michael Eischer <michael.eischer@fau.de>
Date: Sat, 14 Feb 2026 21:49:17 +0100
Subject: [PATCH 3/6] indexmap: use bloom filter to drastically speed up check
 for unknown blobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only in use on 64-bit systems. Use the upper 28bits of the id of an
index entry as bloom filter. This allows skipping the index entry
traversal most of the time if an id is not stored in the hashmap.

The bloom filter embedded in the index entry id is check each time
before following a reference to an index entry. This further reduces
the risk of false positives. The bloom filter itself is basically for
free on modern CPUs.

The main performance cost of checking for unknown blobs in the index are
the essentially random RAM accesses for the initial bucket lookup as
well as following the next pointer in the index entries. With the bloom
filter most of the time only the initial bucket lookup is necessary.

This speeds up checking for unknown blobs by a factor 5 (!), while
having no effect on the lookup of known blobs:

$ benchstat no-bloom with-bloom
name                old time/op  new time/op  delta
IndexHasUnknown-16  49.0ms ± 2%   9.9ms ± 7%  -79.70%  (p=0.000 n=10+10)
IndexHasKnown-16    48.0ms ± 3%  47.9ms ± 3%     ~     (p=0.968 n=10+9)

This bloom filter parameters m=28 k=1 were derived empirically, while
also leaving sufficient room for very large repositories. Before this
commit, the final merge index step took roughly 1 second per million
index entries. With the chosen bloom filter parameters, it would
currently take 19 hours to just merge such an index. It is safe to
assume that such large repositories don't exist.

Comparison with other parameter sets:

$ m=28 k=1 versus m=32 k=1
name                old time/op  new time/op  delta
IndexHasUnknown-16  49.0ms ± 2%   9.7ms ±16%  -80.17%  (p=0.000 n=10+10)
IndexHasKnown-16    48.0ms ± 3%  48.4ms ± 3%     ~     (p=0.436 n=10+10)

$ m=28 k=1 versus m=24 k=1
name                old time/op  new time/op  delta
IndexHasUnknown-16  49.0ms ± 2%  10.8ms ±13%  -77.90%  (p=0.000 n=10+10)
IndexHasKnown-16    48.0ms ± 3%  47.9ms ± 3%     ~     (p=0.684 n=10+10)

$ m=28 k=1 versus m=28 k=2
name                old time/op  new time/op  delta
IndexHasUnknown-16  49.0ms ± 2%  24.9ms ± 5%  -49.27%  (p=0.000 n=10+10)
IndexHasKnown-16    48.0ms ± 3%  48.0ms ± 4%     ~     (p=1.000 n=10+10)

`k=2` outright wrecks the performance. This is most likely the case as
it performs worse on longer index entry chains, which also happen to be
the expensive ones to process.

`m=32` yields diminishing returns, while getting within an order of
magnitude of the largest known restic repositories.

Design alternatives:

In principle it would be possible to add a single large bloom filter
instead of embedding them in the index entry ids. However, this bloom
filter would necessarily incur additional random memory accesses and
thus slow things down overall.
---
 internal/repository/index/indexmap.go | 70 ++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 8 deletions(-)

diff --git a/internal/repository/index/indexmap.go b/internal/repository/index/indexmap.go
index 16f27d614..115cbd2d2 100644
--- a/internal/repository/index/indexmap.go
+++ b/internal/repository/index/indexmap.go
@@ -3,6 +3,7 @@ package index
 import (
 	"hash/maphash"
 	"iter"
+	"math"
 
 	"github.com/restic/restic/internal/restic"
 )
@@ -16,6 +17,15 @@ import (
 // The buckets in this hash table contain only pointers, rather than inlined
 // key-value pairs like the standard Go map. This way, only a pointer array
 // needs to be resized when the table grows, preventing memory usage spikes.
+//
+// On 64-bit systems, the id of an indexEntry is a uint64 containing the index
+// of the entry in the `buckets` slice. This index is also stored in the
+// `next` field of an indexEntry. However, the actual number of entries
+// is far lower. Thus, the upper 28 bits are used to store a bloom filter,
+// leaving the lower 36 bits for the index in the block list. The bloom filter
+// is used to quickly check if an entry might be present in the map before
+// traversing the block list. This significantly reduces the number of cache
+// misses when following the `next` field chain for unknown ids.
 type indexMap struct {
 	// The number of buckets is always a power of two and never zero.
 	buckets    []uint
@@ -50,7 +60,7 @@ func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompr
 	e.length = length
 	e.uncompressedLength = uncompressedLength
 
-	m.buckets[h] = idx
+	m.buckets[h] = bloomInsertID(idx, e.next, id)
 	m.numentries++
 }
 
@@ -75,7 +85,9 @@ func (m *indexMap) valuesWithID(id restic.ID) iter.Seq[*indexEntry] {
 
 		h := m.hash(id)
 		ei := m.buckets[h]
-		for ei != 0 {
+		// checking before resolving each entry is significantly faster than
+		// checking only once at the start.
+		for bloomHasID(ei, id) {
 			e := m.resolve(ei)
 			ei = e.next
 			if e.id != id {
@@ -96,7 +108,7 @@ func (m *indexMap) get(id restic.ID) *indexEntry {
 
 	h := m.hash(id)
 	ei := m.buckets[h]
-	for ei != 0 {
+	for bloomHasID(ei, id) {
 		e := m.resolve(ei)
 		if e.id == id {
 			return e
@@ -116,9 +128,9 @@ func (m *indexMap) firstIndex(id restic.ID) int {
 	idx := -1
 	h := m.hash(id)
 	ei := m.buckets[h]
-	for ei != 0 {
+	for bloomHasID(ei, id) {
 		e := m.resolve(ei)
-		cur := ei
+		cur := bloomCleanID(ei)
 		ei = e.next
 		if e.id != id {
 			continue
@@ -141,7 +153,7 @@ func (m *indexMap) grow() {
 
 		h := m.hash(e.id)
 		e.next = m.buckets[h]
-		m.buckets[h] = i
+		m.buckets[h] = bloomInsertID(i, e.next, e.id)
 	}
 }
 
@@ -169,11 +181,53 @@ func (m *indexMap) init() {
 func (m *indexMap) len() uint { return m.numentries }
 
 func (m *indexMap) newEntry() (*indexEntry, uint) {
-	return m.blockList.Alloc()
+	entry, idx := m.blockList.Alloc()
+	if idx != bloomCleanID(idx) {
+		panic("repository index size overflow")
+	}
+	return entry, idx
 }
 
 func (m *indexMap) resolve(idx uint) *indexEntry {
-	return m.blockList.Ref(idx)
+	return m.blockList.Ref(bloomCleanID(idx))
+}
+
+// On 32-bit systems, the bloom filter compiles away into a no-op.
+const bloomShift = 36
+const bloomMask = 1<<bloomShift - 1
+
+func bloomCleanID(idx uint) uint {
+	// extra variable to compile on 32bit systems
+	bloomMask := uint64(bloomMask)
+	return idx & uint(bloomMask)
+}
+
+func bloomForID(id restic.ID) uint {
+	// A bloom filter with a single hash function seems to work best.
+	// This is probably because the entry chains can be quite long, such that several entries end
+	// up in the same bloom filter. In this case, a single hash function yields the lowest false positive rate.
+	k1 := id[0] % (64 - bloomShift)
+	return uint(1 << k1)
+}
+
+// bloomHasID returns whether the idx could contain the id. Returns false only if the index cannot contain the id.
+// It may return true even if the id is not present in the entry chain. However, those false positives are expected to be rare.
+func bloomHasID(idx uint, id restic.ID) bool {
+	if math.MaxUint == math.MaxUint32 {
+		// On 32-bit systems, the bloom filter is empty for all entries.
+		// Thus, simply check if there is a next entry.
+		return idx != 0
+	}
+	bloom := idx >> bloomShift
+	return bloom&bloomForID(id) != 0
+}
+
+func bloomInsertID(idx uint, nextIdx uint, id restic.ID) uint {
+	// extra variable to compile on 32bit systems
+	bloomMask := uint64(bloomMask)
+	oldBloom := (nextIdx & ^uint(bloomMask))
+	newBloom := bloomForID(id) << bloomShift
+	return idx | oldBloom | newBloom
 }
 
 type indexEntry struct {

From 934c615e51c03b7a5300e2e94d03bbc702b39b2a Mon Sep 17 00:00:00 2001
From: Michael Eischer <michael.eischer@fau.de>
Date: Sat, 14 Feb 2026 22:36:33 +0100
Subject: [PATCH 4/6] index: support index preallocation

---
 internal/repository/index/index.go        |  9 +++++++
 internal/repository/index/index_test.go   |  2 ++
 internal/repository/index/indexmap.go     | 31 +++++++++++++++--------
 internal/repository/index/master_index.go | 14 ++++++++++
 4 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/internal/repository/index/index.go b/internal/repository/index/index.go
index 85e751e29..10a4275fe 100644
--- a/internal/repository/index/index.go
+++ b/internal/repository/index/index.go
@@ -132,6 +132,15 @@ var Oversized = func(idx *Index) bool {
 	return blobs >= indexMaxBlobs+pack.MaxHeaderEntries
 }
 
+// Preallocate preallocates space for the given blob type.
+// This is used to avoid reallocations when adding a large number of blobs to the index.
+func (idx *Index) Preallocate(t restic.BlobType, numEntries int) {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	idx.byType[t].preallocate(numEntries)
+}
+
 // StorePack remembers the ids of all blobs of a given pack
 // in the index
 func (idx *Index) StorePack(id restic.ID, blobs []restic.Blob) {
diff --git a/internal/repository/index/index_test.go b/internal/repository/index/index_test.go
index bdc666d3f..f47b27496 100644
--- a/internal/repository/index/index_test.go
+++ b/internal/repository/index/index_test.go
@@ -427,6 +427,8 @@ func NewRandomTestID(rng *rand.Rand) restic.ID {
 
 func createRandomIndex(rng *rand.Rand, packfiles int) (idx *index.Index, lookupBh restic.BlobHandle) {
 	idx = index.NewIndex()
+	// the expectation is slightly above 8 blobs per pack, so preallocate 9 to be safe
+	idx.Preallocate(restic.DataBlob, packfiles*9)
 
 	// create index with given number of pack files
 	for i := 0; i < packfiles; i++ {
diff --git a/internal/repository/index/indexmap.go b/internal/repository/index/indexmap.go
index 115cbd2d2..af836a97d 100644
--- a/internal/repository/index/indexmap.go
+++ b/internal/repository/index/indexmap.go
@@ -37,19 +37,14 @@ type indexMap struct {
 }
 
 const (
-	growthFactor = 2 // Must be a power of 2.
-	maxLoad      = 4 // Max. number of entries per bucket.
+	maxLoad = 4 // Max. number of entries per bucket.
 )
 
 // add inserts an indexEntry for the given arguments into the map,
 // using id as the key.
 func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) {
-	switch {
-	case m.numentries == 0: // Lazy initialization.
-		m.init()
-	case m.numentries >= maxLoad*uint(len(m.buckets)):
-		m.grow()
-	}
+	// Make sure there is enough space for the new entry.
+	m.preallocate(int(m.numentries) + 1)
 
 	h := m.hash(id)
 	e, idx := m.newEntry()
@@ -144,8 +139,24 @@ func (m *indexMap) firstIndex(id restic.ID) int {
 	return idx
 }
 
-func (m *indexMap) grow() {
-	m.buckets = make([]uint, growthFactor*len(m.buckets))
+func (m *indexMap) preallocate(numEntries int) {
+	if numEntries == 0 {
+		return
+	}
+	if len(m.buckets) == 0 {
+		m.init() // Perform lazy initialization.
+	}
+
+	// new size must be a power of two
+	newSize := len(m.buckets)
+	for newSize < (numEntries+maxLoad-1)/maxLoad {
+		newSize *= 2
+	}
+	if newSize == len(m.buckets) {
+		return
+	}
+
+	m.buckets = make([]uint, newSize)
 
 	blockCount := m.blockList.Size()
 	for i := uint(1); i < blockCount; i++ {
diff --git a/internal/repository/index/master_index.go b/internal/repository/index/master_index.go
index bc0198882..e37614fc5 100644
--- a/internal/repository/index/master_index.go
+++ b/internal/repository/index/master_index.go
@@ -243,6 +243,20 @@ func (mi *MasterIndex) MergeFinalIndexes() error {
 	mi.idxMutex.Lock()
 	defer mi.idxMutex.Unlock()
 
+	if len(mi.idx) == 0 {
+		return nil
+	}
+
+	// preallocate space for all blob types
+	for typ := range restic.NumBlobTypes {
+		size := 0
+		for _, idx := range mi.idx {
+			size += int(idx.Len(typ))
+		}
+
+		mi.idx[0].Preallocate(typ, size)
+	}
+
 	// The first index is always final and the one to merge into
 	newIdx := mi.idx[:1]
 	for i := 1; i < len(mi.idx); i++ {

From 5c935e71fa3a00b8e3a3f9a2dde522f4aa714f7b Mon Sep 17 00:00:00 2001
From: Michael Eischer <michael.eischer@fau.de>
Date: Sat, 14 Feb 2026 23:39:17 +0100
Subject: [PATCH 5/6] index: also preallocate hashed array tree

---
 internal/repository/index/indexmap.go | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/internal/repository/index/indexmap.go b/internal/repository/index/indexmap.go
index af836a97d..159abe7fe 100644
--- a/internal/repository/index/indexmap.go
+++ b/internal/repository/index/indexmap.go
@@ -166,6 +166,8 @@ func (m *indexMap) preallocate(numEntries int) {
 		e.next = m.buckets[h]
 		m.buckets[h] = bloomInsertID(i, e.next, e.id)
 	}
+
+	m.blockList.preallocate(uint(numEntries))
 }
 
 func (m *indexMap) hash(id restic.ID) uint {
@@ -300,9 +302,9 @@ func (h *hashedArrayTree) Size() uint {
 	return h.size
 }
 
-func (h *hashedArrayTree) grow() {
-	idx, subIdx := h.index(h.size)
-	if int(idx) == len(h.blockList) {
+func (h *hashedArrayTree) preallocate(numEntries uint) {
+	idx, _ := h.index(numEntries - 1)
+	for int(idx) >= len(h.blockList) {
 		// blockList is too short -> double list and block size
 		h.blockSize *= 2
 		h.mask = h.mask*2 + 1
@@ -314,15 +316,26 @@ func (h *hashedArrayTree) grow() {
 
 		// pairwise merging of blocks
 		for i := 0; i < len(oldBlocks); i += 2 {
+			if oldBlocks[i] == nil && oldBlocks[i+1] == nil {
+				// merged all blocks with data. Grow will allocate the block later on
+				break
+			}
 			block := make([]indexEntry, 0, h.blockSize)
 			block = append(block, oldBlocks[i]...)
 			block = append(block, oldBlocks[i+1]...)
-			h.blockList[i/2] = block
+			// make sure to set the correct length as not all old blocks may contain entries yet
+			h.blockList[i/2] = block[0:h.blockSize]
 			// allow GC
 			oldBlocks[i] = nil
 			oldBlocks[i+1] = nil
 		}
 	}
+}
+
+func (h *hashedArrayTree) grow() {
+	h.preallocate(h.size + 1)
+
+	idx, subIdx := h.index(h.size)
 	if subIdx == 0 {
 		// new index entry batch
 		h.blockList[idx] = make([]indexEntry, h.blockSize)

From adce279d0482d1b6aac1c6dbb51cf14adb4bcdcf Mon Sep 17 00:00:00 2001
From: Michael Eischer <michael.eischer@fau.de>
Date: Sun, 10 May 2026 00:38:01 +0200
Subject: [PATCH 6/6] add changelog

---
 changelog/unreleased/pull-5720 | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/changelog/unreleased/pull-5720 b/changelog/unreleased/pull-5720
index 13600af83..ea70bfbe3 100644
--- a/changelog/unreleased/pull-5720
+++ b/changelog/unreleased/pull-5720
@@ -1,7 +1,9 @@
-Enhancement: speed up index loading in `restic mount`
+Enhancement: speed up index loading
 
-`restic mount` now loads the index once on startup and incrementally loads only
+Loading the index for a large repository is now significantly faster. `restic mount`
+now also loads the index once on startup and incrementally loads only
 new index files afterwards. In addition, `restic mount` now loads snapshots
 before printing that the repository is being served.
 
 https://github.com/restic/restic/pull/5720
+https://github.com/restic/restic/pull/5713