Files
restic/cmd/restic/cmd_restore.go
T
Gilbert Gilb's 536ebefff4 feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores

This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.

To prevent unexpected behavior for existing users, the feature is gated
behind new flags:

- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
  in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
  (default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
  processed. (default to `Standard`)

As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.

To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.

**Limitations:**

- Tests against the backend were not written due to the lack of cold
  storage class support in MinIO. Testing was done manually on
  Scaleway's S3-compatible object storage. If necessary, we could
  explore testing with LocalStack or mocks, though this requires further
  discussion.
- Currently, this feature only warms up before restores and repacks
  (prune/copy), as those are the two main use-cases I came across.
  Support for other commands may be added in future iterations, as long
  as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
  make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
  While I think it is not necessary because of the opt-in flag, showing
  some notice may improve usability (but would probably require major
  refactoring in the progress bar which I didn't want to start). Another
  possibility would be to add a flag to send restores requests and fail
  early.

See https://github.com/restic/restic/issues/3202

* ui: warn user when files are warming up from cold storage

* refactor: remove the PacksWarmer struct

It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 18:26:27 +00:00

312 lines
9.1 KiB
Go

package main
import (
"context"
"path/filepath"
"time"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/filter"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/restorer"
"github.com/restic/restic/internal/ui"
restoreui "github.com/restic/restic/internal/ui/restore"
"github.com/restic/restic/internal/ui/termstatus"
"github.com/spf13/cobra"
)
var cmdRestore = &cobra.Command{
Use: "restore [flags] snapshotID",
Short: "Extract the data from a snapshot",
Long: `
The "restore" command extracts the data from a snapshot from the repository to
a directory.
The special snapshotID "latest" can be used to restore the latest snapshot in the
repository.
To only restore a specific subfolder, you can use the "snapshotID:subfolder"
syntax, where "subfolder" is a path within the snapshot.
EXIT STATUS
===========
Exit status is 0 if the command was successful.
Exit status is 1 if there was any error.
Exit status is 10 if the repository does not exist.
Exit status is 11 if the repository is already locked.
Exit status is 12 if the password is incorrect.
`,
GroupID: cmdGroupDefault,
DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error {
term, cancel := setupTermstatus()
defer cancel()
return runRestore(cmd.Context(), restoreOptions, globalOptions, term, args)
},
}
// RestoreOptions collects all options for the restore command.
type RestoreOptions struct {
filter.ExcludePatternOptions
filter.IncludePatternOptions
Target string
restic.SnapshotFilter
DryRun bool
Sparse bool
Verify bool
Overwrite restorer.OverwriteBehavior
Delete bool
ExcludeXattrPattern []string
IncludeXattrPattern []string
}
var restoreOptions RestoreOptions
func init() {
cmdRoot.AddCommand(cmdRestore)
flags := cmdRestore.Flags()
flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to")
restoreOptions.ExcludePatternOptions.Add(flags)
restoreOptions.IncludePatternOptions.Add(flags)
flags.StringArrayVar(&restoreOptions.ExcludeXattrPattern, "exclude-xattr", nil, "exclude xattr by `pattern` (can be specified multiple times)")
flags.StringArrayVar(&restoreOptions.IncludeXattrPattern, "include-xattr", nil, "include xattr by `pattern` (can be specified multiple times)")
initSingleSnapshotFilter(flags, &restoreOptions.SnapshotFilter)
flags.BoolVar(&restoreOptions.DryRun, "dry-run", false, "do not write any data, just show what would be done")
flags.BoolVar(&restoreOptions.Sparse, "sparse", false, "restore files as sparse")
flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content")
flags.Var(&restoreOptions.Overwrite, "overwrite", "overwrite behavior, one of (always|if-changed|if-newer|never) (default: always)")
flags.BoolVar(&restoreOptions.Delete, "delete", false, "delete files from target directory if they do not exist in snapshot. Use '--dry-run -vv' to check what would be deleted")
}
func runRestore(ctx context.Context, opts RestoreOptions, gopts GlobalOptions,
term *termstatus.Terminal, args []string) error {
excludePatternFns, err := opts.ExcludePatternOptions.CollectPatterns(Warnf)
if err != nil {
return err
}
includePatternFns, err := opts.IncludePatternOptions.CollectPatterns(Warnf)
if err != nil {
return err
}
hasExcludes := len(excludePatternFns) > 0
hasIncludes := len(includePatternFns) > 0
switch {
case len(args) == 0:
return errors.Fatal("no snapshot ID specified")
case len(args) > 1:
return errors.Fatalf("more than one snapshot ID specified: %v", args)
}
if opts.Target == "" {
return errors.Fatal("please specify a directory to restore to (--target)")
}
if hasExcludes && hasIncludes {
return errors.Fatal("exclude and include patterns are mutually exclusive")
}
if opts.DryRun && opts.Verify {
return errors.Fatal("--dry-run and --verify are mutually exclusive")
}
if opts.Delete && filepath.Clean(opts.Target) == "/" && !hasExcludes && !hasIncludes {
return errors.Fatal("'--target / --delete' must be combined with an include or exclude filter")
}
snapshotIDString := args[0]
debug.Log("restore %v to %v", snapshotIDString, opts.Target)
ctx, repo, unlock, err := openWithReadLock(ctx, gopts, gopts.NoLock)
if err != nil {
return err
}
defer unlock()
sn, subfolder, err := (&restic.SnapshotFilter{
Hosts: opts.Hosts,
Paths: opts.Paths,
Tags: opts.Tags,
}).FindLatest(ctx, repo, repo, snapshotIDString)
if err != nil {
return errors.Fatalf("failed to find snapshot: %v", err)
}
bar := newIndexTerminalProgress(gopts.Quiet, gopts.JSON, term)
err = repo.LoadIndex(ctx, bar)
if err != nil {
return err
}
sn.Tree, err = restic.FindTreeDirectory(ctx, repo, sn.Tree, subfolder)
if err != nil {
return err
}
msg := ui.NewMessage(term, gopts.verbosity)
var printer restoreui.ProgressPrinter
if gopts.JSON {
printer = restoreui.NewJSONProgress(term, gopts.verbosity)
} else {
printer = restoreui.NewTextProgress(term, gopts.verbosity)
}
progress := restoreui.NewProgress(printer, calculateProgressInterval(!gopts.Quiet, gopts.JSON))
res := restorer.NewRestorer(repo, sn, restorer.Options{
DryRun: opts.DryRun,
Sparse: opts.Sparse,
Progress: progress,
Overwrite: opts.Overwrite,
Delete: opts.Delete,
})
totalErrors := 0
res.Error = func(location string, err error) error {
totalErrors++
return progress.Error(location, err)
}
res.Warn = func(message string) {
msg.E("Warning: %s\n", message)
}
res.Info = func(message string) {
if gopts.JSON {
return
}
msg.P("Info: %s\n", message)
}
selectExcludeFilter := func(item string, isDir bool) (selectedForRestore bool, childMayBeSelected bool) {
matched := false
for _, rejectFn := range excludePatternFns {
matched = matched || rejectFn(item)
// implementing a short-circuit here to improve the performance
// to prevent additional pattern matching once the first pattern
// matches.
if matched {
break
}
}
// An exclude filter is basically a 'wildcard but foo',
// so even if a childMayMatch, other children of a dir may not,
// therefore childMayMatch does not matter, but we should not go down
// unless the dir is selected for restore
selectedForRestore = !matched
childMayBeSelected = selectedForRestore && isDir
return selectedForRestore, childMayBeSelected
}
selectIncludeFilter := func(item string, isDir bool) (selectedForRestore bool, childMayBeSelected bool) {
selectedForRestore = false
childMayBeSelected = false
for _, includeFn := range includePatternFns {
matched, childMayMatch := includeFn(item)
selectedForRestore = selectedForRestore || matched
childMayBeSelected = childMayBeSelected || childMayMatch
if selectedForRestore && childMayBeSelected {
break
}
}
childMayBeSelected = childMayBeSelected && isDir
return selectedForRestore, childMayBeSelected
}
if hasExcludes {
res.SelectFilter = selectExcludeFilter
} else if hasIncludes {
res.SelectFilter = selectIncludeFilter
}
res.XattrSelectFilter, err = getXattrSelectFilter(opts)
if err != nil {
return err
}
if !gopts.JSON {
msg.P("restoring %s to %s\n", res.Snapshot(), opts.Target)
}
countRestoredFiles, err := res.RestoreTo(ctx, opts.Target)
if err != nil {
return err
}
progress.Finish()
if totalErrors > 0 {
return errors.Fatalf("There were %d errors\n", totalErrors)
}
if opts.Verify {
if !gopts.JSON {
msg.P("verifying files in %s\n", opts.Target)
}
var count int
t0 := time.Now()
bar := newTerminalProgressMax(!gopts.Quiet && !gopts.JSON && stdoutIsTerminal(), 0, "files verified", term)
count, err = res.VerifyFiles(ctx, opts.Target, countRestoredFiles, bar)
if err != nil {
return err
}
if totalErrors > 0 {
return errors.Fatalf("There were %d errors\n", totalErrors)
}
if !gopts.JSON {
msg.P("finished verifying %d files in %s (took %s)\n", count, opts.Target,
time.Since(t0).Round(time.Millisecond))
}
}
return nil
}
func getXattrSelectFilter(opts RestoreOptions) (func(xattrName string) bool, error) {
hasXattrExcludes := len(opts.ExcludeXattrPattern) > 0
hasXattrIncludes := len(opts.IncludeXattrPattern) > 0
if hasXattrExcludes && hasXattrIncludes {
return nil, errors.Fatal("exclude and include xattr patterns are mutually exclusive")
}
if hasXattrExcludes {
if err := filter.ValidatePatterns(opts.ExcludeXattrPattern); err != nil {
return nil, errors.Fatalf("--exclude-xattr: %s", err)
}
return func(xattrName string) bool {
shouldReject := filter.RejectByPattern(opts.ExcludeXattrPattern, Warnf)(xattrName)
return !shouldReject
}, nil
}
if hasXattrIncludes {
// User has either input include xattr pattern(s) or we're using our default include pattern
if err := filter.ValidatePatterns(opts.IncludeXattrPattern); err != nil {
return nil, errors.Fatalf("--include-xattr: %s", err)
}
return func(xattrName string) bool {
shouldInclude, _ := filter.IncludeByPattern(opts.IncludeXattrPattern, Warnf)(xattrName)
return shouldInclude
}, nil
}
// default to including all xattrs
return func(_ string) bool { return true }, nil
}