mirror of
https://github.com/restic/restic.git
synced 2026-06-28 11:34:18 +00:00
Fix broken snapshots via restic repair snapshots --forget (#21907)
Co-authored-by: Michael Eischer <michael.eischer@fau.de>
This commit is contained in:
committed by
GitHub
parent
aa94be2dbd
commit
9e1a526611
@@ -0,0 +1,11 @@
|
||||
Enhancement: `restic repair snapshots --forget` removes broken snapshots
|
||||
|
||||
In the past restic had no command to remove broken snapshot files.
|
||||
|
||||
`restic repair snapshots --forget` can now remove broken snapshot files,
|
||||
which have been found previously by `restic check` or by earlier error messages from restic commands.
|
||||
|
||||
For example, `restic repair snapshots --forget 1d204771` will remove a broken snapshot file with id `1d204771`.
|
||||
|
||||
https://github.com/restic/restic/issues/21892
|
||||
https://github.com/restic/restic/pull/21907
|
||||
+14
-3
@@ -339,6 +339,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
|
||||
|
||||
printer.P("check snapshots, trees and blobs\n")
|
||||
errChan = make(chan error)
|
||||
var brokenSnapshots []string
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Add(1)
|
||||
@@ -351,13 +352,17 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
|
||||
|
||||
for err := range errChan {
|
||||
errorsFound = true
|
||||
if e, ok := err.(*checker.TreeError); ok {
|
||||
switch e := err.(type) {
|
||||
case *checker.TreeError:
|
||||
printer.E("error for tree %v:\n", e.ID.Str())
|
||||
for _, treeErr := range e.Errors {
|
||||
summary.NumErrors++
|
||||
printer.E(" %v\n", treeErr)
|
||||
}
|
||||
} else {
|
||||
case *checker.SnapshotError:
|
||||
printer.E("snapshot error %v: %v", e.ID, e.Message)
|
||||
brokenSnapshots = append(brokenSnapshots, e.ID)
|
||||
default:
|
||||
summary.NumErrors++
|
||||
printer.E("error: %v\n", err)
|
||||
}
|
||||
@@ -412,12 +417,18 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
|
||||
printer.E("Damaged pack files can be caused by backend problems, hardware problems or bugs in restic. Please open an issue at https://github.com/restic/restic/issues/new/choose for further troubleshooting!\n")
|
||||
}
|
||||
|
||||
if len(brokenSnapshots) > 0 {
|
||||
printer.E("\nThe repository contains damaged snapshot files. These damaged files must be removed to repair the repository. This can be done using the following commands. Please read the troubleshooting guide at https://restic.readthedocs.io/en/stable/077_troubleshooting.html first.\n\n")
|
||||
printer.E("restic repair snapshots --forget %s\n\n", strings.Join(brokenSnapshots, " "))
|
||||
printer.E("Damaged snapshot files can be caused by backend problems, hardware problems or bugs in restic. Please open an issue at https://github.com/restic/restic/issues/new/choose for further troubleshooting!\n")
|
||||
}
|
||||
|
||||
if ctx.Err() != nil {
|
||||
return summary, ctx.Err()
|
||||
}
|
||||
|
||||
if errorsFound {
|
||||
if len(salvagePacks) == 0 {
|
||||
if len(salvagePacks) == 0 && len(brokenSnapshots) == 0 {
|
||||
printer.E("\nThe repository is damaged and must be repaired. Please follow the troubleshooting guide at https://restic.readthedocs.io/en/stable/077_troubleshooting.html .\n\n")
|
||||
}
|
||||
return summary, errors.Fatal("repository contains errors")
|
||||
|
||||
@@ -78,6 +78,37 @@ func (opts *RepairOptions) AddFlags(f *pflag.FlagSet) {
|
||||
initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
|
||||
}
|
||||
|
||||
// handleUnreadableSnapshotFile is called when FindAll returns an error for ID 'id'
|
||||
func handleUnreadableSnapshotFile(
|
||||
ctx context.Context,
|
||||
be restic.Lister,
|
||||
repo restic.Repository,
|
||||
opts RepairOptions,
|
||||
id string,
|
||||
args []string,
|
||||
printer restic.Printer,
|
||||
) (bool, error) {
|
||||
brokenID, err := restic.Find(ctx, be, restic.SnapshotFile, id)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if opts.Forget && slices.Index(args, id) >= 0 {
|
||||
if opts.DryRun {
|
||||
printer.P("would remove unreadable snapshot %v", brokenID)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if err := repo.RemoveUnpacked(ctx, restic.WriteableSnapshotFile, brokenID); err != nil {
|
||||
return false, errors.Wrapf(err, "unable to remove unreadable snapshot file %v", brokenID)
|
||||
}
|
||||
printer.P("removed unreadable snapshot %v", brokenID)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, errors.Fatalf("snapshot file %[1]s is unreadable, use `restic repair snapshots --forget %[1]s` to remove it. Original error: %v", brokenID, err)
|
||||
}
|
||||
|
||||
func runRepairSnapshots(ctx context.Context, gopts global.Options, opts RepairOptions, args []string, term ui.Terminal) error {
|
||||
printer := progress.NewTerminalPrinter(false, gopts.Verbosity, term)
|
||||
|
||||
@@ -96,10 +127,11 @@ func runRepairSnapshots(ctx context.Context, gopts global.Options, opts RepairOp
|
||||
return err
|
||||
}
|
||||
|
||||
// Three error cases are checked:
|
||||
// Four error cases are checked:
|
||||
// - tree is a nil tree (-> will be replaced by an empty tree)
|
||||
// - trees which cannot be loaded (-> the tree contents will be removed)
|
||||
// - files whose contents are not fully available (-> file will be modified)
|
||||
// - *checker.SnapshotError
|
||||
rewriter := walker.NewTreeRewriter(walker.RewriteOpts{
|
||||
RewriteNode: func(node *data.Node, path string) *data.Node {
|
||||
if node.Type == data.NodeTypeIrregular || node.Type == data.NodeTypeInvalid {
|
||||
@@ -146,7 +178,15 @@ func runRepairSnapshots(ctx context.Context, gopts global.Options, opts RepairOp
|
||||
})
|
||||
|
||||
changedCount := 0
|
||||
for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args, printer) {
|
||||
errOuter := opts.SnapshotFilter.FindAll(ctx, snapshotLister, repo, args, func(id string, sn *data.Snapshot, err error) error {
|
||||
if err != nil {
|
||||
changed, err := handleUnreadableSnapshotFile(ctx, snapshotLister, repo, opts, id, args, printer)
|
||||
if changed {
|
||||
changedCount++
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
printer.P("\n%v", sn)
|
||||
changed, err := filterAndReplaceSnapshot(ctx, repo, sn,
|
||||
func(ctx context.Context, sn *data.Snapshot, uploader restic.BlobSaver) (restic.ID, *data.SnapshotSummary, error) {
|
||||
@@ -159,9 +199,11 @@ func runRepairSnapshots(ctx context.Context, gopts global.Options, opts RepairOp
|
||||
if changed {
|
||||
changedCount++
|
||||
}
|
||||
}
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
return nil
|
||||
})
|
||||
|
||||
if errOuter != nil {
|
||||
return errOuter
|
||||
}
|
||||
|
||||
printer.P("")
|
||||
|
||||
@@ -2,6 +2,8 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"math/rand"
|
||||
@@ -136,3 +138,38 @@ func TestRepairSnapshotsIntact(t *testing.T) {
|
||||
rtest.Assert(t, reflect.DeepEqual(oldSnapshotIDs, snapshotIDs), "unexpected snapshot id mismatch %v vs. %v", oldSnapshotIDs, snapshotIDs)
|
||||
testRunCheck(t, env.gopts)
|
||||
}
|
||||
|
||||
func TestRepairSnapshotsBrokenSnapshots(t *testing.T) {
|
||||
env, cleanup := withTestEnvironment(t)
|
||||
defer cleanup()
|
||||
|
||||
testRunInit(t, env.gopts)
|
||||
|
||||
// create backup
|
||||
testRunBackup(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{}, env.gopts)
|
||||
|
||||
// create zero length file in snapshots/
|
||||
// will fail with
|
||||
// failed to load snapshot 1d204771: LoadRaw(<snapshot/1d20477115>): invalid data returned
|
||||
handle, err := os.Create(filepath.Join(env.repo, "snapshots", "1d20477115fb872069a28a80ffb95a82cb8b1b1920de046a68c0195da63f30cf"))
|
||||
rtest.OK(t, err)
|
||||
rtest.OK(t, handle.Close())
|
||||
|
||||
// create some file with a correct sha256 name in snapshots/, will fail with
|
||||
// failed to load snapshot abcd1234: ciphertext verification failed
|
||||
contents := rtest.Random(1234567890123, 42)
|
||||
sha256Contents := sha256.Sum256(contents)
|
||||
target := hex.EncodeToString(sha256Contents[:])
|
||||
rtest.OK(t, os.WriteFile(filepath.Join(env.repo, "snapshots", target), contents, 0o600))
|
||||
|
||||
// run repair snapshots
|
||||
repairOpts := RepairOptions{Forget: true}
|
||||
env.gopts.BackendTestHook = nil
|
||||
_, err = withCaptureStdout(t, env.gopts, func(ctx context.Context, gopts global.Options) error {
|
||||
return runRepairSnapshots(ctx, gopts, repairOpts, []string{"1d204771", target[:8]}, gopts.Term)
|
||||
})
|
||||
rtest.OK(t, err)
|
||||
|
||||
// verify that there are no snapshot errors
|
||||
testRunCheck(t, env.gopts)
|
||||
}
|
||||
|
||||
@@ -84,6 +84,10 @@ If ``check`` detects damaged pack files, it will show instructions on how to rep
|
||||
them using the ``repair packs`` command. Use that command instead of the "Repairing the
|
||||
index" section in this guide.
|
||||
|
||||
If ``check`` detects unreadable snapshot files, it will show instructions on how to repair
|
||||
them using the ``repair snapshots`` command. Follow those instructions as part of the
|
||||
"Removing broken snapshots" section in this guide.
|
||||
|
||||
If you are interested to check only specific snapshots, you can now
|
||||
use the standard snapshot filter method specifying ``--host``, ``--path``, ``--tag`` or
|
||||
alternatively naming snapshot ID(s) explicitly. The selected subset of packfiles
|
||||
@@ -148,8 +152,39 @@ repair the index first!
|
||||
Please note that it is not recommended to repair the index unless the repository
|
||||
is actually damaged.
|
||||
|
||||
4. Removing broken snapshots
|
||||
****************************
|
||||
|
||||
4. Running all backups (optional)
|
||||
.. note::
|
||||
|
||||
This step is only necessary if the ``check`` command tells you to run ``restic repair snapshots``.
|
||||
|
||||
In case of damage to a snapshot file, ``check`` will show an error message like the following:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ restic check
|
||||
using temporary cache in /tmp/restic-check-cache-2150939789
|
||||
create exclusive lock for repository
|
||||
repository cfabc5ed opened (version 1)
|
||||
created new cache in /tmp/restic-check-cache-2150939789
|
||||
load indexes
|
||||
[0:00] 100.00% 1 / 1 index files loaded
|
||||
check all packs
|
||||
check snapshots, trees and blobs
|
||||
error: failed to load snapshot 1d204771: LoadRaw(<snapshot/1d20477115>): invalid data returned
|
||||
[0:00] 100.00% 1 / 1 snapshots
|
||||
|
||||
The repository contains damaged snapshot files. These damaged files must be removed to repair the repository. This can be done using the following commands. Please read the troubleshooting guide at https://restic.readthedocs.io/en/stable/077_troubleshooting.html first.
|
||||
|
||||
restic repair snapshots --forget 1d2047711588c657efea246369c499bb2133240b1e03477d503386ceaa92fa2f
|
||||
|
||||
Damaged snapshot files can be caused by backend problems, hardware problems or bugs in restic. Please open an issue at https://github.com/restic/restic/issues/new/choose for further troubleshooting!
|
||||
Fatal: repository contains errors
|
||||
|
||||
As explained in the command output, you have to run ``restic repair snapshots --forget 1d2047711588c657efea246369c499bb2133240b1e03477d503386ceaa92fa2f`` to remove the broken snapshot file.
|
||||
|
||||
5. Running all backups (optional)
|
||||
*********************************
|
||||
|
||||
With a correct index, the ``backup`` command guarantees that newly created
|
||||
@@ -165,7 +200,7 @@ To check if the repository is fully repaired, you can run ``restic check``
|
||||
To get a list of still damaged files, you can run ``restic repair snapshots --dry-run``.
|
||||
Look for ``would save new snapshot`` messages to find affected snapshots.
|
||||
|
||||
5. Removing missing data from snapshots
|
||||
6. Removing missing data from snapshots
|
||||
***************************************
|
||||
|
||||
If your repository is still missing data, then you can use the ``repair snapshots``
|
||||
@@ -190,8 +225,7 @@ If you did not add the ``--forget`` option, then you have to manually delete all
|
||||
modified snapshots using the ``forget`` command. In the example above, you'd have
|
||||
to run ``restic forget 6979421e``.
|
||||
|
||||
|
||||
6. Checking the repository again
|
||||
7. Checking the repository again
|
||||
********************************
|
||||
|
||||
As a final step, run ``check`` again to make sure that the repository has been successfully
|
||||
|
||||
@@ -89,10 +89,19 @@ func (e *TreeError) Error() string {
|
||||
return fmt.Sprintf("tree %v: %v", e.ID, e.Errors)
|
||||
}
|
||||
|
||||
type SnapshotError struct {
|
||||
ID string
|
||||
Message error
|
||||
}
|
||||
|
||||
func (e *SnapshotError) Error() string {
|
||||
return fmt.Sprintf("snapshot %v: %v", e.ID, e.Message)
|
||||
}
|
||||
|
||||
func loadSnapshotTreeIDs(ctx context.Context, lister restic.Lister, repo restic.LoaderUnpacked) (ids restic.IDs, errs []error) {
|
||||
err := data.ForAllSnapshots(ctx, lister, repo, nil, func(id restic.ID, sn *data.Snapshot, err error) error {
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
errs = append(errs, &SnapshotError{ID: id.String(), Message: err})
|
||||
return nil
|
||||
}
|
||||
treeID := *sn.Tree
|
||||
@@ -115,10 +124,10 @@ func (c *Checker) loadActiveTrees(ctx context.Context, snapshotFilter *data.Snap
|
||||
return loadSnapshotTreeIDs(ctx, c.snapshots, c.repo)
|
||||
}
|
||||
|
||||
err := snapshotFilter.FindAll(ctx, c.snapshots, c.repo, args, func(_ string, sn *data.Snapshot, err error) error {
|
||||
err := snapshotFilter.FindAll(ctx, c.snapshots, c.repo, args, func(id string, sn *data.Snapshot, err error) error {
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
return err
|
||||
errs = append(errs, &SnapshotError{ID: id, Message: err})
|
||||
return nil
|
||||
} else if sn != nil {
|
||||
trees = append(trees, *sn.Tree)
|
||||
}
|
||||
|
||||
@@ -132,6 +132,11 @@ func (f *SnapshotFilter) FindAll(ctx context.Context, be restic.Lister, loader r
|
||||
var err error
|
||||
usedFilter := false
|
||||
|
||||
be, err := restic.MemorizeList(ctx, be, restic.SnapshotFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ids := restic.NewIDSet()
|
||||
// Process all snapshot IDs given as arguments.
|
||||
for _, s := range snapshotIDs {
|
||||
@@ -181,14 +186,18 @@ func (f *SnapshotFilter) FindAll(ctx context.Context, be restic.Lister, loader r
|
||||
if !usedFilter && !f.Empty() {
|
||||
return fn("filters", nil, errors.Errorf("explicit snapshot ids are given"))
|
||||
}
|
||||
return nil
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
return ForAllSnapshots(ctx, be, loader, nil, func(id restic.ID, sn *Snapshot, err error) error {
|
||||
err := ForAllSnapshots(ctx, be, loader, nil, func(id restic.ID, sn *Snapshot, err error) error {
|
||||
if err == nil && !f.matches(sn) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fn(id.String(), sn, err)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user