restic check with snapshot filters (#5469)

---------

Co-authored-by: Michael Eischer <michael.eischer@fau.de>
This commit is contained in:
Winfried Plappert
2025-11-28 19:12:38 +00:00
committed by GitHub
parent 8fdbdc57a0
commit ce57961f14
7 changed files with 170 additions and 18 deletions

View File

@@ -0,0 +1,8 @@
Enhancement: `restic check` for specified snapshot(s) via snapshot filtering
Snapshots can now be specified for the command `restic check` on the command line
via the standard snapshot filter, (`--tag`, `--host`, `--path` or specifying
snapshot IDs directly) and will be used for checking the packfiles used by these snapshots.
https://github.com/restic/restic/issues/3326
https://github.com/restic/restic/pull/5213

View File

@@ -15,6 +15,7 @@ import (
"github.com/restic/restic/internal/backend/cache" "github.com/restic/restic/internal/backend/cache"
"github.com/restic/restic/internal/checker" "github.com/restic/restic/internal/checker"
"github.com/restic/restic/internal/data"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/global" "github.com/restic/restic/internal/global"
"github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/repository"
@@ -71,6 +72,7 @@ type CheckOptions struct {
ReadDataSubset string ReadDataSubset string
CheckUnused bool CheckUnused bool
WithCache bool WithCache bool
data.SnapshotFilter
} }
func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) { func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) {
@@ -84,6 +86,7 @@ func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) {
panic(err) panic(err)
} }
f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository") f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository")
initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
} }
func checkFlags(opts CheckOptions) error { func checkFlags(opts CheckOptions) error {
@@ -220,9 +223,6 @@ func prepareCheckCache(opts CheckOptions, gopts *global.Options, printer progres
func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args []string, term ui.Terminal) (checkSummary, error) { func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args []string, term ui.Terminal) (checkSummary, error) {
summary := checkSummary{MessageType: "summary"} summary := checkSummary{MessageType: "summary"}
if len(args) != 0 {
return summary, errors.Fatal("the check command expects no arguments, only options - please see `restic help check` for usage and flags")
}
var printer progress.Printer var printer progress.Printer
if !gopts.JSON { if !gopts.JSON {
@@ -231,11 +231,6 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
printer = newJSONErrorPrinter(term) printer = newJSONErrorPrinter(term)
} }
readDataFilter, err := buildPacksFilter(opts, printer)
if err != nil {
return summary, err
}
cleanup := prepareCheckCache(opts, &gopts, printer) cleanup := prepareCheckCache(opts, &gopts, printer)
defer cleanup() defer cleanup()
@@ -249,7 +244,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
defer unlock() defer unlock()
chkr := checker.New(repo, opts.CheckUnused) chkr := checker.New(repo, opts.CheckUnused)
err = chkr.LoadSnapshots(ctx) err = chkr.LoadSnapshots(ctx, &opts.SnapshotFilter, args)
if err != nil { if err != nil {
return summary, err return summary, err
} }
@@ -365,6 +360,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
return summary, ctx.Err() return summary, ctx.Err()
} }
// the following block only used for tests
if opts.CheckUnused { if opts.CheckUnused {
unused, err := chkr.UnusedBlobs(ctx) unused, err := chkr.UnusedBlobs(ctx)
if err != nil { if err != nil {
@@ -376,6 +372,11 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
} }
} }
readDataFilter, err := buildPacksFilter(opts, printer, chkr.IsFiltered())
if err != nil {
return summary, err
}
if readDataFilter != nil { if readDataFilter != nil {
p := printer.NewCounter("packs") p := printer.NewCounter("packs")
errChan := make(chan error) errChan := make(chan error)
@@ -416,11 +417,16 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
return summary, nil return summary, nil
} }
func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs map[restic.ID]int64) map[restic.ID]int64, error) { func buildPacksFilter(opts CheckOptions, printer progress.Printer,
filteredStatus bool) (func(packs map[restic.ID]int64) map[restic.ID]int64, error) {
typeData := ""
if filteredStatus {
typeData = "filtered "
}
switch { switch {
case opts.ReadData: case opts.ReadData:
return func(packs map[restic.ID]int64) map[restic.ID]int64 { return func(packs map[restic.ID]int64) map[restic.ID]int64 {
printer.P("read all data\n") printer.P("read all %sdata", typeData)
return packs return packs
}, nil }, nil
case opts.ReadDataSubset != "": case opts.ReadDataSubset != "":
@@ -431,7 +437,7 @@ func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs m
return func(packs map[restic.ID]int64) map[restic.ID]int64 { return func(packs map[restic.ID]int64) map[restic.ID]int64 {
packCount := uint64(len(packs)) packCount := uint64(len(packs))
packs = selectPacksByBucket(packs, bucket, totalBuckets) packs = selectPacksByBucket(packs, bucket, totalBuckets)
printer.P("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, len(packs), packCount, totalBuckets) printer.P("read group #%d of %d %sdata packs (out of total %d packs in %d groups", bucket, len(packs), typeData, packCount, totalBuckets)
return packs return packs
}, nil }, nil
} else if strings.HasSuffix(opts.ReadDataSubset, "%") { } else if strings.HasSuffix(opts.ReadDataSubset, "%") {
@@ -440,7 +446,7 @@ func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs m
return nil, err return nil, err
} }
return func(packs map[restic.ID]int64) map[restic.ID]int64 { return func(packs map[restic.ID]int64) map[restic.ID]int64 {
printer.P("read %.1f%% of data packs\n", percentage) printer.P("read %.1f%% of %spackfiles", percentage, typeData)
return selectRandomPacksByPercentage(packs, percentage) return selectRandomPacksByPercentage(packs, percentage)
}, nil }, nil
} }
@@ -461,7 +467,7 @@ func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs m
if repoSize == 0 { if repoSize == 0 {
percentage = 100 percentage = 100
} }
printer.P("read %d bytes (%.1f%%) of data packs\n", subsetSize, percentage) printer.P("read %d bytes (%.1f%%) of %sdata packs\n", subsetSize, percentage, typeData)
return packs return packs
}, nil }, nil
} }

View File

@@ -2,6 +2,7 @@ package main
import ( import (
"context" "context"
"strings"
"testing" "testing"
"github.com/restic/restic/internal/global" "github.com/restic/restic/internal/global"
@@ -34,3 +35,67 @@ func testRunCheckOutput(t testing.TB, gopts global.Options, checkUnused bool) (s
}) })
return buf.String(), err return buf.String(), err
} }
func testRunCheckOutputWithOpts(t testing.TB, gopts global.Options, opts CheckOptions, args []string) (string, error) {
buf, err := withCaptureStdout(t, gopts, func(ctx context.Context, gopts global.Options) error {
gopts.Verbosity = 2
_, err := runCheck(context.TODO(), opts, gopts, args, gopts.Term)
return err
})
return buf.String(), err
}
func TestCheckWithSnaphotFilter(t *testing.T) {
testCases := []struct {
opts CheckOptions
args []string
expectedOutput string
}{
{ // full --read-data, all snapshots
CheckOptions{ReadData: true},
nil,
"4 / 4 packs",
},
{ // full --read-data, all snapshots
CheckOptions{ReadData: true},
nil,
"2 / 2 snapshots",
},
{ // full --read-data, latest snapshot
CheckOptions{ReadData: true},
[]string{"latest"},
"2 / 2 packs",
},
{ // full --read-data, latest snapshot
CheckOptions{ReadData: true},
[]string{"latest"},
"1 / 1 snapshots",
},
{ // --read-data-subset, latest snapshot
CheckOptions{ReadDataSubset: "1%"},
[]string{"latest"},
"1 / 1 packs",
},
{ // --read-data-subset, latest snapshot
CheckOptions{ReadDataSubset: "1%"},
[]string{"latest"},
"filtered",
},
}
env, cleanup := withTestEnvironment(t)
defer cleanup()
testSetupBackupData(t, env)
opts := BackupOptions{}
testRunBackup(t, env.testdata+"/0", []string{"for_cmd_ls"}, opts, env.gopts)
testRunBackup(t, env.testdata+"/0", []string{"0/9"}, opts, env.gopts)
for _, testCase := range testCases {
output, err := testRunCheckOutputWithOpts(t, env.gopts, testCase.opts, testCase.args)
rtest.OK(t, err)
hasOutput := strings.Contains(output, testCase.expectedOutput)
rtest.Assert(t, hasOutput, `expected to find substring %q, but did not find it`, testCase.expectedOutput)
}
}

View File

@@ -82,6 +82,12 @@ If ``check`` detects damaged pack files, it will show instructions on how to rep
them using the ``repair pack`` command. Use that command instead of the "Repair the them using the ``repair pack`` command. Use that command instead of the "Repair the
index" section in this guide. index" section in this guide.
If you are interested to check only specific snapshots, you can now
use the standard snapshot filter method specifying ``--host``, ``--path``, ``--tag`` or
alternatively naming snapshot ID(s) explicitely. The selected subset of packfiles
will then be checked for consistency and read when either ``--read-data`` or
``--read-data-subset`` is given.
2. Backup the repository 2. Backup the repository
************************ ************************

View File

@@ -31,6 +31,10 @@ type Checker struct {
snapshots restic.Lister snapshots restic.Lister
repo restic.Repository repo restic.Repository
// when snapshot filtering is being used
snapshotFilter *data.SnapshotFilter
args []string
} }
type checkerRepository interface { type checkerRepository interface {
@@ -51,12 +55,19 @@ func New(repo checkerRepository, trackUnused bool) *Checker {
return c return c
} }
func (c *Checker) LoadSnapshots(ctx context.Context) error { func (c *Checker) LoadSnapshots(ctx context.Context, snapshotFilter *data.SnapshotFilter, args []string) error {
var err error var err error
c.snapshots, err = restic.MemorizeList(ctx, c.repo, restic.SnapshotFile) c.snapshots, err = restic.MemorizeList(ctx, c.repo, restic.SnapshotFile)
c.args = args
c.snapshotFilter = snapshotFilter
return err return err
} }
// IsFiltered returns true if snapshot filtering is active
func (c *Checker) IsFiltered() bool {
return len(c.args) != 0 || !c.snapshotFilter.Empty()
}
// Error is an error that occurred while checking a repository. // Error is an error that occurred while checking a repository.
type Error struct { type Error struct {
TreeID restic.ID TreeID restic.ID
@@ -124,11 +135,39 @@ func loadSnapshotTreeIDs(ctx context.Context, lister restic.Lister, repo restic.
return ids, errs return ids, errs
} }
func (c *Checker) loadActiveTrees(ctx context.Context, snapshotFilter *data.SnapshotFilter, args []string) (trees restic.IDs, errs []error) {
trees = []restic.ID{}
errs = []error{}
if !c.IsFiltered() {
return loadSnapshotTreeIDs(ctx, c.snapshots, c.repo)
}
err := snapshotFilter.FindAll(ctx, c.snapshots, c.repo, args, func(_ string, sn *data.Snapshot, err error) error {
if err != nil {
errs = append(errs, err)
return err
} else if sn != nil {
trees = append(trees, *sn.Tree)
}
return nil
})
if err != nil {
errs = append(errs, err)
return nil, errs
}
// track blobs to learn which packs need to be checked
c.trackUnused = true
return trees, errs
}
// Structure checks that for all snapshots all referenced data blobs and // Structure checks that for all snapshots all referenced data blobs and
// subtrees are available in the index. errChan is closed after all trees have // subtrees are available in the index. errChan is closed after all trees have
// been traversed. // been traversed.
func (c *Checker) Structure(ctx context.Context, p *progress.Counter, errChan chan<- error) { func (c *Checker) Structure(ctx context.Context, p *progress.Counter, errChan chan<- error) {
trees, errs := loadSnapshotTreeIDs(ctx, c.snapshots, c.repo) trees, errs := c.loadActiveTrees(ctx, c.snapshotFilter, c.args)
p.SetMax(uint64(len(trees))) p.SetMax(uint64(len(trees)))
debug.Log("need to check %d trees from snapshots, %d errs returned", len(trees), len(errs)) debug.Log("need to check %d trees from snapshots, %d errs returned", len(trees), len(errs))
@@ -259,3 +298,30 @@ func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles, er
return blobs, err return blobs, err
} }
// ReadPacks wraps repository.ReadPacks:
// in case snapshot filtering is not active it calls repository.ReadPacks()
// with an unmodified parameter list
// Otherwise it calculates the packfiles needed, gets their sizes from the full
// packfile set and submits them to repository.ReadPacks()
func (c *Checker) ReadPacks(ctx context.Context, filter func(packs map[restic.ID]int64) map[restic.ID]int64, p *progress.Counter, errChan chan<- error) {
// no snapshot filtering, pass through
if !c.IsFiltered() {
c.Checker.ReadPacks(ctx, filter, p, errChan)
return
}
packfileFilter := func(allPacks map[restic.ID]int64) map[restic.ID]int64 {
filteredPacks := make(map[restic.ID]int64)
// convert used blobs into their encompassing packfiles
for bh := range c.blobRefs.M.Keys() {
for _, pb := range c.repo.LookupBlob(bh.Type, bh.ID) {
filteredPacks[pb.PackID] = allPacks[pb.PackID]
}
}
return filter(filteredPacks)
}
c.Checker.ReadPacks(ctx, packfileFilter, p, errChan)
}

View File

@@ -46,7 +46,7 @@ func checkPacks(chkr *checker.Checker) []error {
} }
func checkStruct(chkr *checker.Checker) []error { func checkStruct(chkr *checker.Checker) []error {
err := chkr.LoadSnapshots(context.TODO()) err := chkr.LoadSnapshots(context.TODO(), &data.SnapshotFilter{}, nil)
if err != nil { if err != nil {
return []error{err} return []error{err}
} }

View File

@@ -4,6 +4,7 @@ import (
"context" "context"
"testing" "testing"
"github.com/restic/restic/internal/data"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
) )
@@ -20,7 +21,7 @@ func TestCheckRepo(t testing.TB, repo checkerRepository) {
t.Fatalf("errors loading index: %v", hints) t.Fatalf("errors loading index: %v", hints)
} }
err := chkr.LoadSnapshots(context.TODO()) err := chkr.LoadSnapshots(context.TODO(), &data.SnapshotFilter{}, nil)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }