pack: move to repository package

This commit is contained in:
Michael Eischer
2024-05-24 23:09:58 +02:00
parent 50ec408302
commit 5e0ea8fcfa
11 changed files with 8 additions and 8 deletions

View File

@@ -0,0 +1,2 @@
// Package pack provides functions for combining and parsing pack files.
package pack

View File

@@ -0,0 +1,407 @@
package pack
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"io"
"sync"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/crypto"
)
// Packer is used to create a new Pack.
type Packer struct {
blobs []restic.Blob
bytes uint
k *crypto.Key
wr io.Writer
m sync.Mutex
}
// NewPacker returns a new Packer that can be used to pack blobs together.
func NewPacker(k *crypto.Key, wr io.Writer) *Packer {
return &Packer{k: k, wr: wr}
}
// Add saves the data read from rd as a new blob to the packer. Returned is the
// number of bytes written to the pack plus the pack header entry size.
func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte, uncompressedLength int) (int, error) {
p.m.Lock()
defer p.m.Unlock()
c := restic.Blob{BlobHandle: restic.BlobHandle{Type: t, ID: id}}
n, err := p.wr.Write(data)
c.Length = uint(n)
c.Offset = p.bytes
c.UncompressedLength = uint(uncompressedLength)
p.bytes += uint(n)
p.blobs = append(p.blobs, c)
n += CalculateEntrySize(c)
return n, errors.Wrap(err, "Write")
}
var entrySize = uint(binary.Size(restic.BlobType(0)) + 2*headerLengthSize + len(restic.ID{}))
var plainEntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{}))
// headerEntry describes the format of header entries. It serves only as
// documentation.
type headerEntry struct {
Type uint8
Length uint32
ID restic.ID
}
// compressedHeaderEntry describes the format of header entries for compressed blobs.
// It serves only as documentation.
type compressedHeaderEntry struct {
Type uint8
Length uint32
UncompressedLength uint32
ID restic.ID
}
// Finalize writes the header for all added blobs and finalizes the pack.
func (p *Packer) Finalize() error {
p.m.Lock()
defer p.m.Unlock()
header, err := makeHeader(p.blobs)
if err != nil {
return err
}
encryptedHeader := make([]byte, 0, crypto.CiphertextLength(len(header)))
nonce := crypto.NewRandomNonce()
encryptedHeader = append(encryptedHeader, nonce...)
encryptedHeader = p.k.Seal(encryptedHeader, nonce, header, nil)
encryptedHeader = binary.LittleEndian.AppendUint32(encryptedHeader, uint32(len(encryptedHeader)))
if err := verifyHeader(p.k, encryptedHeader, p.blobs); err != nil {
//nolint:revive // ignore linter warnings about error message spelling
return fmt.Errorf("Detected data corruption while writing pack-file header: %w\nCorrupted data is either caused by hardware issues or software bugs. Please open an issue at https://github.com/restic/restic/issues/new/choose for further troubleshooting.", err)
}
// append the header
n, err := p.wr.Write(encryptedHeader)
if err != nil {
return errors.Wrap(err, "Write")
}
if n != len(encryptedHeader) {
return errors.New("wrong number of bytes written")
}
p.bytes += uint(len(encryptedHeader))
return nil
}
func verifyHeader(k *crypto.Key, header []byte, expected []restic.Blob) error {
// do not offer a way to skip the pack header verification, as pack headers are usually small enough
// to not result in a significant performance impact
decoded, hdrSize, err := List(k, bytes.NewReader(header), int64(len(header)))
if err != nil {
return fmt.Errorf("header decoding failed: %w", err)
}
if hdrSize != uint32(len(header)) {
return fmt.Errorf("unexpected header size %v instead of %v", hdrSize, len(header))
}
if len(decoded) != len(expected) {
return fmt.Errorf("pack header size mismatch")
}
for i := 0; i < len(decoded); i++ {
if decoded[i] != expected[i] {
return fmt.Errorf("pack header entry mismatch got %v instead of %v", decoded[i], expected[i])
}
}
return nil
}
// HeaderOverhead returns an estimate of the number of bytes written by a call to Finalize.
func (p *Packer) HeaderOverhead() int {
return crypto.CiphertextLength(0) + binary.Size(uint32(0))
}
// makeHeader constructs the header for p.
func makeHeader(blobs []restic.Blob) ([]byte, error) {
buf := make([]byte, 0, len(blobs)*int(entrySize))
for _, b := range blobs {
switch {
case b.Type == restic.DataBlob && b.UncompressedLength == 0:
buf = append(buf, 0)
case b.Type == restic.TreeBlob && b.UncompressedLength == 0:
buf = append(buf, 1)
case b.Type == restic.DataBlob && b.UncompressedLength != 0:
buf = append(buf, 2)
case b.Type == restic.TreeBlob && b.UncompressedLength != 0:
buf = append(buf, 3)
default:
return nil, errors.Errorf("invalid blob type %v", b.Type)
}
var lenLE [4]byte
binary.LittleEndian.PutUint32(lenLE[:], uint32(b.Length))
buf = append(buf, lenLE[:]...)
if b.UncompressedLength != 0 {
binary.LittleEndian.PutUint32(lenLE[:], uint32(b.UncompressedLength))
buf = append(buf, lenLE[:]...)
}
buf = append(buf, b.ID[:]...)
}
return buf, nil
}
// Size returns the number of bytes written so far.
func (p *Packer) Size() uint {
p.m.Lock()
defer p.m.Unlock()
return p.bytes
}
// Count returns the number of blobs in this packer.
func (p *Packer) Count() int {
p.m.Lock()
defer p.m.Unlock()
return len(p.blobs)
}
// HeaderFull returns true if the pack header is full.
func (p *Packer) HeaderFull() bool {
p.m.Lock()
defer p.m.Unlock()
return headerSize+uint(len(p.blobs)+1)*entrySize > MaxHeaderSize
}
// Blobs returns the slice of blobs that have been written.
func (p *Packer) Blobs() []restic.Blob {
p.m.Lock()
defer p.m.Unlock()
return p.blobs
}
func (p *Packer) String() string {
return fmt.Sprintf("<Packer %d blobs, %d bytes>", len(p.blobs), p.bytes)
}
var (
// we require at least one entry in the header, and one blob for a pack file
minFileSize = plainEntrySize + crypto.Extension + uint(headerLengthSize)
)
const (
// size of the header-length field at the end of the file; it is a uint32
headerLengthSize = 4
// headerSize is the header's constant overhead (independent of #entries)
headerSize = headerLengthSize + crypto.Extension
// MaxHeaderSize is the max size of header including header-length field
MaxHeaderSize = 16*1024*1024 + headerLengthSize
// number of header entries to download as part of header-length request
eagerEntries = 15
)
// readRecords reads up to bufsize bytes from the underlying ReaderAt, returning
// the raw header, the total number of bytes in the header, and any error.
// If the header contains fewer than bufsize bytes, the header is truncated to
// the appropriate size.
func readRecords(rd io.ReaderAt, size int64, bufsize int) ([]byte, int, error) {
if bufsize > int(size) {
bufsize = int(size)
}
b := make([]byte, bufsize)
off := size - int64(bufsize)
if _, err := rd.ReadAt(b, off); err != nil {
return nil, 0, err
}
hlen := binary.LittleEndian.Uint32(b[len(b)-headerLengthSize:])
b = b[:len(b)-headerLengthSize]
debug.Log("header length: %v", hlen)
var err error
switch {
case hlen == 0:
err = InvalidFileError{Message: "header length is zero"}
case hlen < crypto.Extension:
err = InvalidFileError{Message: "header length is too short"}
case int64(hlen) > size-int64(headerLengthSize):
err = InvalidFileError{Message: "header is larger than file"}
case int64(hlen) > MaxHeaderSize-int64(headerLengthSize):
err = InvalidFileError{Message: "header is larger than maxHeaderSize"}
}
if err != nil {
return nil, 0, errors.Wrap(err, "readHeader")
}
total := int(hlen + headerLengthSize)
if total < bufsize {
// truncate to the beginning of the pack header
b = b[len(b)-int(hlen):]
}
return b, total, nil
}
// readHeader reads the header at the end of rd. size is the length of the
// whole data accessible in rd.
func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
debug.Log("size: %v", size)
if size < int64(minFileSize) {
err := InvalidFileError{Message: "file is too short"}
return nil, errors.Wrap(err, "readHeader")
}
// assuming extra request is significantly slower than extra bytes download,
// eagerly download eagerEntries header entries as part of header-length request.
// only make second request if actual number of entries is greater than eagerEntries
eagerSize := eagerEntries*int(entrySize) + headerSize
b, c, err := readRecords(rd, size, eagerSize)
if err != nil {
return nil, err
}
if c <= eagerSize {
// eager read sufficed, return what we got
return b, nil
}
b, _, err = readRecords(rd, size, c)
if err != nil {
return nil, err
}
return b, nil
}
// InvalidFileError is return when a file is found that is not a pack file.
type InvalidFileError struct {
Message string
}
func (e InvalidFileError) Error() string {
return e.Message
}
// List returns the list of entries found in a pack file and the length of the
// header (including header size and crypto overhead)
func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdrSize uint32, err error) {
buf, err := readHeader(rd, size)
if err != nil {
return nil, 0, err
}
if len(buf) < crypto.CiphertextLength(0) {
return nil, 0, errors.New("invalid header, too short")
}
hdrSize = headerLengthSize + uint32(len(buf))
nonce, buf := buf[:k.NonceSize()], buf[k.NonceSize():]
buf, err = k.Open(buf[:0], nonce, buf, nil)
if err != nil {
return nil, 0, err
}
// might over allocate a bit if all blobs have EntrySize but only by a few percent
entries = make([]restic.Blob, 0, uint(len(buf))/plainEntrySize)
pos := uint(0)
for len(buf) > 0 {
entry, headerSize, err := parseHeaderEntry(buf)
if err != nil {
return nil, 0, err
}
entry.Offset = pos
entries = append(entries, entry)
pos += entry.Length
buf = buf[headerSize:]
}
return entries, hdrSize, nil
}
func parseHeaderEntry(p []byte) (b restic.Blob, size uint, err error) {
l := uint(len(p))
size = plainEntrySize
if l < plainEntrySize {
err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
return b, size, err
}
tpe := p[0]
switch tpe {
case 0, 2:
b.Type = restic.DataBlob
case 1, 3:
b.Type = restic.TreeBlob
default:
return b, size, errors.Errorf("invalid type %d", tpe)
}
b.Length = uint(binary.LittleEndian.Uint32(p[1:5]))
p = p[5:]
if tpe == 2 || tpe == 3 {
size = entrySize
if l < entrySize {
err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
return b, size, err
}
b.UncompressedLength = uint(binary.LittleEndian.Uint32(p[0:4]))
p = p[4:]
}
copy(b.ID[:], p[:])
return b, size, nil
}
func CalculateEntrySize(blob restic.Blob) int {
if blob.UncompressedLength != 0 {
return int(entrySize)
}
return int(plainEntrySize)
}
func CalculateHeaderSize(blobs []restic.Blob) int {
size := headerSize
for _, blob := range blobs {
size += CalculateEntrySize(blob)
}
return size
}
// Size returns the size of all packs computed by index information.
// If onlyHdr is set to true, only the size of the header is returned
// Note that this function only gives correct sizes, if there are no
// duplicates in the index.
func Size(ctx context.Context, mi restic.ListBlobser, onlyHdr bool) (map[restic.ID]int64, error) {
packSize := make(map[restic.ID]int64)
err := mi.ListBlobs(ctx, func(blob restic.PackedBlob) {
size, ok := packSize[blob.PackID]
if !ok {
size = headerSize
}
if !onlyHdr {
size += int64(blob.Length)
}
packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob))
})
return packSize, err
}

View File

@@ -0,0 +1,237 @@
package pack
import (
"bytes"
"encoding/binary"
"io"
"strings"
"testing"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func TestParseHeaderEntry(t *testing.T) {
h := headerEntry{
Type: 0, // Blob
Length: 100,
}
for i := range h.ID {
h.ID[i] = byte(i)
}
buf := new(bytes.Buffer)
_ = binary.Write(buf, binary.LittleEndian, &h)
b, size, err := parseHeaderEntry(buf.Bytes())
rtest.OK(t, err)
rtest.Equals(t, restic.DataBlob, b.Type)
rtest.Equals(t, plainEntrySize, size)
t.Logf("%v %v", h.ID, b.ID)
rtest.Equals(t, h.ID[:], b.ID[:])
rtest.Equals(t, uint(h.Length), b.Length)
rtest.Equals(t, uint(0), b.UncompressedLength)
c := compressedHeaderEntry{
Type: 2, // compressed Blob
Length: 100,
UncompressedLength: 200,
}
for i := range c.ID {
c.ID[i] = byte(i)
}
buf = new(bytes.Buffer)
_ = binary.Write(buf, binary.LittleEndian, &c)
b, size, err = parseHeaderEntry(buf.Bytes())
rtest.OK(t, err)
rtest.Equals(t, restic.DataBlob, b.Type)
rtest.Equals(t, entrySize, size)
t.Logf("%v %v", c.ID, b.ID)
rtest.Equals(t, c.ID[:], b.ID[:])
rtest.Equals(t, uint(c.Length), b.Length)
rtest.Equals(t, uint(c.UncompressedLength), b.UncompressedLength)
}
func TestParseHeaderEntryErrors(t *testing.T) {
h := headerEntry{
Type: 0, // Blob
Length: 100,
}
for i := range h.ID {
h.ID[i] = byte(i)
}
h.Type = 0xae
buf := new(bytes.Buffer)
_ = binary.Write(buf, binary.LittleEndian, &h)
_, _, err := parseHeaderEntry(buf.Bytes())
rtest.Assert(t, err != nil, "no error for invalid type")
h.Type = 0
buf.Reset()
_ = binary.Write(buf, binary.LittleEndian, &h)
_, _, err = parseHeaderEntry(buf.Bytes()[:plainEntrySize-1])
rtest.Assert(t, err != nil, "no error for short input")
}
type countingReaderAt struct {
delegate io.ReaderAt
invocationCount int
}
func (rd *countingReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
rd.invocationCount++
return rd.delegate.ReadAt(p, off)
}
func TestReadHeaderEagerLoad(t *testing.T) {
testReadHeader := func(dataSize, entryCount, expectedReadInvocationCount int) {
expectedHeader := rtest.Random(0, entryCount*int(entrySize)+crypto.Extension)
buf := &bytes.Buffer{}
buf.Write(rtest.Random(0, dataSize)) // pack blobs data
buf.Write(expectedHeader) // pack header
rtest.OK(t, binary.Write(buf, binary.LittleEndian, uint32(len(expectedHeader)))) // pack header length
rd := &countingReaderAt{delegate: bytes.NewReader(buf.Bytes())}
header, err := readHeader(rd, int64(buf.Len()))
rtest.OK(t, err)
rtest.Equals(t, expectedHeader, header)
rtest.Equals(t, expectedReadInvocationCount, rd.invocationCount)
}
// basic
testReadHeader(100, 1, 1)
// header entries == eager entries
testReadHeader(100, eagerEntries-1, 1)
testReadHeader(100, eagerEntries, 1)
testReadHeader(100, eagerEntries+1, 2)
// file size == eager header load size
eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension)
headerSize := int(1*entrySize) + crypto.Extension
dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0))
testReadHeader(dataSize-1, 1, 1)
testReadHeader(dataSize, 1, 1)
testReadHeader(dataSize+1, 1, 1)
testReadHeader(dataSize+2, 1, 1)
testReadHeader(dataSize+3, 1, 1)
testReadHeader(dataSize+4, 1, 1)
}
func TestReadRecords(t *testing.T) {
testReadRecords := func(dataSize, entryCount, totalRecords int) {
totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension)
bufSize := entryCount*int(entrySize) + crypto.Extension
off := len(totalHeader) - bufSize
if off < 0 {
off = 0
}
expectedHeader := totalHeader[off:]
buf := &bytes.Buffer{}
buf.Write(rtest.Random(0, dataSize)) // pack blobs data
buf.Write(totalHeader) // pack header
rtest.OK(t, binary.Write(buf, binary.LittleEndian, uint32(len(totalHeader)))) // pack header length
rd := bytes.NewReader(buf.Bytes())
header, count, err := readRecords(rd, int64(rd.Len()), bufSize+4)
rtest.OK(t, err)
rtest.Equals(t, len(totalHeader)+4, count)
rtest.Equals(t, expectedHeader, header)
}
// basic
testReadRecords(100, 1, 1)
testReadRecords(100, 0, 1)
testReadRecords(100, 1, 0)
// header entries ~ eager entries
testReadRecords(100, eagerEntries, eagerEntries-1)
testReadRecords(100, eagerEntries, eagerEntries)
testReadRecords(100, eagerEntries, eagerEntries+1)
// file size == eager header load size
eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension)
headerSize := int(1*entrySize) + crypto.Extension
dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0))
testReadRecords(dataSize-1, 1, 1)
testReadRecords(dataSize, 1, 1)
testReadRecords(dataSize+1, 1, 1)
testReadRecords(dataSize+2, 1, 1)
testReadRecords(dataSize+3, 1, 1)
testReadRecords(dataSize+4, 1, 1)
for i := 0; i < 2; i++ {
for j := 0; j < 2; j++ {
testReadRecords(dataSize, i, j)
}
}
}
func TestUnpackedVerification(t *testing.T) {
// create random keys
k := crypto.NewRandomKey()
blobs := []restic.Blob{
{
BlobHandle: restic.NewRandomBlobHandle(),
Length: 42,
Offset: 0,
UncompressedLength: 2 * 42,
},
}
type DamageType string
const (
damageData DamageType = "data"
damageCiphertext DamageType = "ciphertext"
damageLength DamageType = "length"
)
for _, test := range []struct {
damage DamageType
msg string
}{
{"", ""},
{damageData, "pack header entry mismatch"},
{damageCiphertext, "ciphertext verification failed"},
{damageLength, "header decoding failed"},
} {
header, err := makeHeader(blobs)
rtest.OK(t, err)
if test.damage == damageData {
header[8] ^= 0x42
}
encryptedHeader := make([]byte, 0, crypto.CiphertextLength(len(header)))
nonce := crypto.NewRandomNonce()
encryptedHeader = append(encryptedHeader, nonce...)
encryptedHeader = k.Seal(encryptedHeader, nonce, header, nil)
encryptedHeader = binary.LittleEndian.AppendUint32(encryptedHeader, uint32(len(encryptedHeader)))
if test.damage == damageCiphertext {
encryptedHeader[8] ^= 0x42
}
if test.damage == damageLength {
encryptedHeader[len(encryptedHeader)-1] ^= 0x42
}
err = verifyHeader(k, encryptedHeader, blobs)
if test.msg == "" {
rtest.Assert(t, err == nil, "expected no error, got %v", err)
} else {
rtest.Assert(t, strings.Contains(err.Error(), test.msg), "expected error to contain %q, got %q", test.msg, err)
}
}
}

View File

@@ -0,0 +1,146 @@
package pack_test
import (
"bytes"
"context"
"crypto/rand"
"crypto/sha256"
"encoding/json"
"io"
"testing"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/backend/mem"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/repository/pack"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
var testLens = []int{23, 31650, 25860, 10928, 13769, 19862, 5211, 127, 13690, 30231}
type Buf struct {
data []byte
id restic.ID
}
func newPack(t testing.TB, k *crypto.Key, lengths []int) ([]Buf, []byte, uint) {
bufs := []Buf{}
for _, l := range lengths {
b := make([]byte, l)
_, err := io.ReadFull(rand.Reader, b)
rtest.OK(t, err)
h := sha256.Sum256(b)
bufs = append(bufs, Buf{data: b, id: h})
}
// pack blobs
var buf bytes.Buffer
p := pack.NewPacker(k, &buf)
for _, b := range bufs {
_, err := p.Add(restic.TreeBlob, b.id, b.data, 2*len(b.data))
rtest.OK(t, err)
}
err := p.Finalize()
rtest.OK(t, err)
return bufs, buf.Bytes(), p.Size()
}
func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, rd io.ReaderAt, packSize uint) {
written := 0
for _, buf := range bufs {
written += len(buf.data)
}
// read and parse it again
entries, hdrSize, err := pack.List(k, rd, int64(packSize))
rtest.OK(t, err)
rtest.Equals(t, len(entries), len(bufs))
// check the head size calculation for consistency
headerSize := pack.CalculateHeaderSize(entries)
written += headerSize
// check length
rtest.Equals(t, uint(written), packSize)
rtest.Equals(t, headerSize, int(hdrSize))
var buf []byte
for i, b := range bufs {
e := entries[i]
rtest.Equals(t, b.id, e.ID)
if len(buf) < int(e.Length) {
buf = make([]byte, int(e.Length))
}
buf = buf[:int(e.Length)]
n, err := rd.ReadAt(buf, int64(e.Offset))
rtest.OK(t, err)
buf = buf[:n]
rtest.Assert(t, bytes.Equal(b.data, buf),
"data for blob %v doesn't match", i)
}
}
func TestCreatePack(t *testing.T) {
// create random keys
k := crypto.NewRandomKey()
bufs, packData, packSize := newPack(t, k, testLens)
rtest.Equals(t, uint(len(packData)), packSize)
verifyBlobs(t, bufs, k, bytes.NewReader(packData), packSize)
}
var blobTypeJSON = []struct {
t restic.BlobType
res string
}{
{restic.DataBlob, `"data"`},
{restic.TreeBlob, `"tree"`},
}
func TestBlobTypeJSON(t *testing.T) {
for _, test := range blobTypeJSON {
// test serialize
buf, err := json.Marshal(test.t)
rtest.OK(t, err)
rtest.Equals(t, test.res, string(buf))
// test unserialize
var v restic.BlobType
err = json.Unmarshal([]byte(test.res), &v)
rtest.OK(t, err)
rtest.Equals(t, test.t, v)
}
}
func TestUnpackReadSeeker(t *testing.T) {
// create random keys
k := crypto.NewRandomKey()
bufs, packData, packSize := newPack(t, k, testLens)
b := mem.New()
id := restic.Hash(packData)
handle := backend.Handle{Type: backend.PackFile, Name: id.String()}
rtest.OK(t, b.Save(context.TODO(), handle, backend.NewByteReader(packData, b.Hasher())))
verifyBlobs(t, bufs, k, backend.ReaderAt(context.TODO(), b, handle), packSize)
}
func TestShortPack(t *testing.T) {
k := crypto.NewRandomKey()
bufs, packData, packSize := newPack(t, k, []int{23})
b := mem.New()
id := restic.Hash(packData)
handle := backend.Handle{Type: backend.PackFile, Name: id.String()}
rtest.OK(t, b.Save(context.TODO(), handle, backend.NewByteReader(packData, b.Hasher())))
verifyBlobs(t, bufs, k, backend.ReaderAt(context.TODO(), b, handle), packSize)
}