mirror of
https://github.com/restic/restic.git
synced 2026-05-27 12:25:23 +00:00
Reject files excluded by name before calling lstat to improve scan speed
Adds a SelectByName method to the archive and scanner which only require the filename as input, and can thus be run before calling lstat on the file. Can speed up scanning significantly if a lot of filename excludes are used.
This commit is contained in:
committed by
Alexander Neumann
parent
9b513312e2
commit
b07bb3d8c3
+36
-13
@@ -186,18 +186,9 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// collectRejectFuncs returns a list of all functions which may reject data
|
||||
// from being saved in a snapshot
|
||||
func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectFunc, err error) {
|
||||
// allowed devices
|
||||
if opts.ExcludeOtherFS && !opts.Stdin {
|
||||
f, err := rejectByDevice(targets)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fs = append(fs, f)
|
||||
}
|
||||
|
||||
// collectRejectByNameFuncs returns a list of all functions which may reject data
|
||||
// from being saved in a snapshot based on path only
|
||||
func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectByNameFunc, err error) {
|
||||
// exclude restic cache
|
||||
if repo.Cache != nil {
|
||||
f, err := rejectResticCache(repo)
|
||||
@@ -237,6 +228,21 @@ func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets
|
||||
return fs, nil
|
||||
}
|
||||
|
||||
// collectRejectFuncs returns a list of all functions which may reject data
|
||||
// from being saved in a snapshot based on path and file info
|
||||
func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectFunc, err error) {
|
||||
// allowed devices
|
||||
if opts.ExcludeOtherFS && !opts.Stdin {
|
||||
f, err := rejectByDevice(targets)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fs = append(fs, f)
|
||||
}
|
||||
|
||||
return fs, nil
|
||||
}
|
||||
|
||||
// readExcludePatternsFromFiles reads all exclude files and returns the list of
|
||||
// exclude patterns. For each line, leading and trailing white space is removed
|
||||
// and comment lines are ignored. For each remaining pattern, environment
|
||||
@@ -393,7 +399,13 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
||||
return err
|
||||
}
|
||||
|
||||
// rejectFuncs collect functions that can reject items from the backup
|
||||
// rejectByNameFuncs collect functions that can reject items from the backup based on path only
|
||||
rejectByNameFuncs, err := collectRejectByNameFuncs(opts, repo, targets)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// rejectFuncs collect functions that can reject items from the backup based on path and file info
|
||||
rejectFuncs, err := collectRejectFuncs(opts, repo, targets)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -414,6 +426,15 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
||||
p.V("using parent snapshot %v\n", parentSnapshotID.Str())
|
||||
}
|
||||
|
||||
selectByNameFilter := func(item string) bool {
|
||||
for _, reject := range rejectByNameFuncs {
|
||||
if reject(item) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
selectFilter := func(item string, fi os.FileInfo) bool {
|
||||
for _, reject := range rejectFuncs {
|
||||
if reject(item, fi) {
|
||||
@@ -436,6 +457,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
||||
}
|
||||
|
||||
sc := archiver.NewScanner(targetFS)
|
||||
sc.SelectByName = selectByNameFilter
|
||||
sc.Select = selectFilter
|
||||
sc.Error = p.ScannerError
|
||||
sc.Result = p.ReportTotal
|
||||
@@ -444,6 +466,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
||||
t.Go(func() error { return sc.Scan(t.Context(gopts.ctx), targets) })
|
||||
|
||||
arch := archiver.New(repo, targetFS, archiver.Options{})
|
||||
arch.SelectByName = selectByNameFilter
|
||||
arch.Select = selectFilter
|
||||
arch.WithAtime = opts.WithAtime
|
||||
arch.Error = p.Error
|
||||
|
||||
+17
-12
@@ -60,15 +60,20 @@ func (rc *rejectionCache) Store(dir string, rejected bool) {
|
||||
rc.m[dir] = rejected
|
||||
}
|
||||
|
||||
// RejectByNameFunc is a function that takes a filename of a
|
||||
// file that would be included in the backup. The function returns true if it
|
||||
// should be excluded (rejected) from the backup.
|
||||
type RejectByNameFunc func(path string) bool
|
||||
|
||||
// RejectFunc is a function that takes a filename and os.FileInfo of a
|
||||
// file that would be included in the backup. The function returns true if it
|
||||
// should be excluded (rejected) from the backup.
|
||||
type RejectFunc func(path string, fi os.FileInfo) bool
|
||||
|
||||
// rejectByPattern returns a RejectFunc which rejects files that match
|
||||
// rejectByPattern returns a RejectByNameFunc which rejects files that match
|
||||
// one of the patterns.
|
||||
func rejectByPattern(patterns []string) RejectFunc {
|
||||
return func(item string, fi os.FileInfo) bool {
|
||||
func rejectByPattern(patterns []string) RejectByNameFunc {
|
||||
return func(item string) bool {
|
||||
matched, _, err := filter.List(patterns, item)
|
||||
if err != nil {
|
||||
Warnf("error for exclude pattern: %v", err)
|
||||
@@ -83,14 +88,14 @@ func rejectByPattern(patterns []string) RejectFunc {
|
||||
}
|
||||
}
|
||||
|
||||
// rejectIfPresent returns a RejectFunc which itself returns whether a path
|
||||
// should be excluded. The RejectFunc considers a file to be excluded when
|
||||
// rejectIfPresent returns a RejectByNameFunc which itself returns whether a path
|
||||
// should be excluded. The RejectByNameFunc considers a file to be excluded when
|
||||
// it resides in a directory with an exclusion file, that is specified by
|
||||
// excludeFileSpec in the form "filename[:content]". The returned error is
|
||||
// non-nil if the filename component of excludeFileSpec is empty. If rc is
|
||||
// non-nil, it is going to be used in the RejectFunc to expedite the evaluation
|
||||
// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation
|
||||
// of a directory based on previous visits.
|
||||
func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) {
|
||||
func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) {
|
||||
if excludeFileSpec == "" {
|
||||
return nil, errors.New("name for exclusion tagfile is empty")
|
||||
}
|
||||
@@ -107,7 +112,7 @@ func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) {
|
||||
}
|
||||
debug.Log("using %q as exclusion tagfile", tf)
|
||||
rc := &rejectionCache{}
|
||||
fn := func(filename string, _ os.FileInfo) bool {
|
||||
fn := func(filename string) bool {
|
||||
return isExcludedByFile(filename, tf, tc, rc)
|
||||
}
|
||||
return fn, nil
|
||||
@@ -252,11 +257,11 @@ func rejectByDevice(samples []string) (RejectFunc, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
// rejectResticCache returns a RejectFunc that rejects the restic cache
|
||||
// rejectResticCache returns a RejectByNameFunc that rejects the restic cache
|
||||
// directory (if set).
|
||||
func rejectResticCache(repo *repository.Repository) (RejectFunc, error) {
|
||||
func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) {
|
||||
if repo.Cache == nil {
|
||||
return func(string, os.FileInfo) bool {
|
||||
return func(string) bool {
|
||||
return false
|
||||
}, nil
|
||||
}
|
||||
@@ -266,7 +271,7 @@ func rejectResticCache(repo *repository.Repository) (RejectFunc, error) {
|
||||
return nil, errors.New("cacheBase is empty string")
|
||||
}
|
||||
|
||||
return func(item string, _ os.FileInfo) bool {
|
||||
return func(item string) bool {
|
||||
if fs.HasPathPrefix(cacheBase, item) {
|
||||
debug.Log("rejecting restic cache directory %v", item)
|
||||
return true
|
||||
|
||||
@@ -27,7 +27,7 @@ func TestRejectByPattern(t *testing.T) {
|
||||
for _, tc := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
reject := rejectByPattern(patterns)
|
||||
res := reject(tc.filename, nil)
|
||||
res := reject(tc.filename)
|
||||
if res != tc.reject {
|
||||
t.Fatalf("wrong result for filename %v: want %v, got %v",
|
||||
tc.filename, tc.reject, res)
|
||||
@@ -140,8 +140,8 @@ func TestMultipleIsExcludedByFile(t *testing.T) {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
excludedByFoo := fooExclude(p, fi)
|
||||
excludedByBar := barExclude(p, fi)
|
||||
excludedByFoo := fooExclude(p)
|
||||
excludedByBar := barExclude(p)
|
||||
excluded := excludedByFoo || excludedByBar
|
||||
// the log message helps debugging in case the test fails
|
||||
t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded)
|
||||
|
||||
Reference in New Issue
Block a user