Merge pull request #3548 from gum3ng/issue_3490

Support for specifying amount of data in read-data-subset
This commit is contained in:
MichaelEischer
2021-11-05 23:28:11 +01:00
committed by GitHub
4 changed files with 98 additions and 9 deletions

View File

@@ -65,7 +65,7 @@ func checkFlags(opts CheckOptions) error {
}
if opts.ReadDataSubset != "" {
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%%")
argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage or a file size, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%% or --read-data-subset=10G")
if err == nil {
if len(dataSubset) != 2 {
return argumentError
@@ -76,7 +76,7 @@ func checkFlags(opts CheckOptions) error {
if dataSubset[1] > totalBucketsMax {
return errors.Fatalf("check flag --read-data-subset=n/t t must be at most %d", totalBucketsMax)
}
} else {
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
percentage, err := parsePercentage(opts.ReadDataSubset)
if err != nil {
return argumentError
@@ -86,6 +86,17 @@ func checkFlags(opts CheckOptions) error {
return errors.Fatal(
"check flag --read-data-subset=n% n must be above 0.0% and at most 100.0%")
}
} else {
fileSize, err := parseSizeStr(opts.ReadDataSubset)
if err != nil {
return argumentError
}
if fileSize <= 0.0 {
return errors.Fatal(
"check flag --read-data-subset=n n must be above 0.0")
}
}
}
@@ -294,10 +305,27 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
packCount := uint64(len(packs))
Verbosef("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
percentage, err := parsePercentage(opts.ReadDataSubset)
if err == nil {
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
Verbosef("read %.1f%% of data packs\n", percentage)
}
} else {
percentage, _ := parsePercentage(opts.ReadDataSubset)
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
Verbosef("read %.1f%% of data packs\n", percentage)
repoSize := int64(0)
allPacks := chkr.GetPacks()
for _, size := range allPacks {
repoSize += size
}
if repoSize == 0 {
return errors.Fatal("Cannot read from a repository having size 0")
}
subsetSize, _ := parseSizeStr(opts.ReadDataSubset)
if subsetSize > repoSize {
subsetSize = repoSize
}
packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
Verbosef("read %d bytes of data packs\n", subsetSize)
}
if packs == nil {
return errors.Fatal("internal error: failed to select packs to check")
@@ -349,6 +377,11 @@ func selectRandomPacksByPercentage(allPacks map[restic.ID]int64, percentage floa
id := keys[idx[i]]
packs[id] = allPacks[id]
}
return packs
}
func selectRandomPacksByFileSize(allPacks map[restic.ID]int64, subsetSize int64, repoSize int64) map[restic.ID]int64 {
subsetPercentage := (float64(subsetSize) / float64(repoSize)) * 100.0
packs := selectRandomPacksByPercentage(allPacks, subsetPercentage)
return packs
}

View File

@@ -129,3 +129,37 @@ func TestSelectNoRandomPacksByPercentage(t *testing.T) {
selectedPacks := selectRandomPacksByPercentage(testPacks, 10.0)
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
}
func TestSelectRandomPacksByFileSize(t *testing.T) {
var testPacks = make(map[restic.ID]int64)
for i := 1; i <= 10; i++ {
id := restic.NewRandomID()
// ensure unique ids
id[0] = byte(i)
testPacks[id] = 0
}
selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500)
rtest.Assert(t, len(selectedPacks) == 1, "Expected 1 selected packs")
selectedPacks = selectRandomPacksByFileSize(testPacks, 10240, 51200)
rtest.Assert(t, len(selectedPacks) == 2, "Expected 2 selected packs")
for pack := range selectedPacks {
_, ok := testPacks[pack]
rtest.Assert(t, ok, "Unexpected selection")
}
selectedPacks = selectRandomPacksByFileSize(testPacks, 500, 500)
rtest.Assert(t, len(selectedPacks) == 10, "Expected 10 selected packs")
for pack := range selectedPacks {
_, ok := testPacks[pack]
rtest.Assert(t, ok, "Unexpected item in selection")
}
}
func TestSelectNoRandomPacksByFileSize(t *testing.T) {
// that the a repository without pack files works
var testPacks = make(map[restic.ID]int64)
selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500)
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
}