refactor: extract DocumentBase abstract model shared by Document and DocumentVersion

Moves shared file-level fields (checksum, archive_checksum, content,
mime_type, added) and shared properties (has_archive_version, archive_path,
archive_file, file_type, thumbnail_file, get_public_filename) into a new
abstract base class DocumentBase. DocumentVersion overrides
_public_display_name() to use the parent document title. Produces no
migration changes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Trenton H
2026-04-13 14:38:15 -07:00
parent 1139e7f59b
commit 8283642bfb
+92 -153
View File
@@ -155,7 +155,93 @@ class StoragePath(MatchingModel):
verbose_name_plural = _("storage paths")
class DocumentVersion(models.Model):
class DocumentBase(models.Model):
"""Abstract base shared by Document and DocumentVersion.
Holds the file-level fields (checksums, content, mime_type, added) and
the file-serving properties that both concrete models share identically.
Subclasses must implement ``source_path`` and ``thumbnail_path``.
"""
checksum = models.CharField(
_("checksum"),
max_length=64,
editable=False,
)
archive_checksum = models.CharField(
_("archive checksum"),
max_length=64,
blank=True,
null=True,
editable=False,
)
content = models.TextField(
_("content"),
blank=True,
)
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
added = models.DateTimeField(
_("added"),
default=timezone.now,
editable=False,
db_index=True,
)
class Meta:
abstract = True
def _public_display_name(self) -> str:
"""Return the human-readable title used in get_public_filename."""
return str(self)
@property
def has_archive_version(self) -> bool:
return self.archive_filename is not None
@property
def archive_path(self) -> Path | None:
if self.archive_filename is not None:
return (settings.ARCHIVE_DIR / Path(str(self.archive_filename))).resolve()
return None
@property
def archive_file(self):
path = self.archive_path
if path is None:
raise ValueError(f"{self!r} has no archive file")
return path.open("rb")
@property
def file_type(self) -> str:
return get_default_file_extension(self.mime_type)
@property
def thumbnail_file(self):
return self.thumbnail_path.open("rb")
def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str:
"""Return a sanitized filename for download."""
result = self._public_display_name()
if counter:
result += f"_{counter:02}"
if suffix:
result += suffix
if archive:
result += ".pdf"
else:
result += self.file_type
return pathvalidate.sanitize_filename(result, replacement_text="-")
class DocumentVersion(DocumentBase):
"""
Stores per-version file data for a document.
Version 1 is created on initial consume; subsequent uploads add higher numbers.
@@ -186,34 +272,6 @@ class DocumentVersion(models.Model):
help_text=_("Optional short label for this version."),
)
added = models.DateTimeField(
_("added"),
default=timezone.now,
editable=False,
db_index=True,
)
checksum = models.CharField(
_("checksum"),
max_length=64,
editable=False,
help_text=_("SHA-256 checksum of the original file for this version."),
)
archive_checksum = models.CharField(
_("archive checksum"),
max_length=64,
blank=True,
null=True,
editable=False,
)
content = models.TextField(
_("content"),
blank=True,
help_text=_("OCR text content of this version."),
)
page_count = models.PositiveIntegerField(
_("page count"),
blank=True,
@@ -221,8 +279,6 @@ class DocumentVersion(models.Model):
validators=[MinValueValidator(1)],
)
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
original_filename = models.CharField(
_("original filename"),
max_length=MAX_STORED_FILENAME_LENGTH,
@@ -262,22 +318,15 @@ class DocumentVersion(models.Model):
def __str__(self) -> str:
return f"DocumentVersion {self.version_number} of document {self.document_id}"
def _public_display_name(self) -> str:
return str(self.document)
@property
def source_path(self) -> Path:
if self.filename is None:
raise ValueError(f"DocumentVersion {self.pk} has no filename set")
return (settings.ORIGINALS_DIR / Path(str(self.filename))).resolve()
@property
def has_archive_version(self) -> bool:
return self.archive_filename is not None
@property
def archive_path(self) -> Path | None:
if self.archive_filename is not None:
return (settings.ARCHIVE_DIR / Path(str(self.archive_filename))).resolve()
return None
@property
def thumbnail_path(self) -> Path:
# Prefix "v" avoids collision with Document thumbnails ({pk:07}.webp)
@@ -287,42 +336,8 @@ class DocumentVersion(models.Model):
def source_file(self):
return self.source_path.open("rb")
@property
def archive_file(self):
return Path(self.archive_path).open("rb")
@property
def thumbnail_file(self):
return self.thumbnail_path.open("rb")
@property
def file_type(self) -> str:
return get_default_file_extension(self.mime_type)
def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str:
"""
Returns a sanitized filename for download, mirroring Document.get_public_filename().
Uses the parent document's title and correspondent for the human-readable name,
and this version's own mime_type for the file extension.
"""
doc = self.document # cached FK access -- no extra query if already in memory
result = str(doc) # "YYYY-MM-DD [Correspondent] Title" from Document.__str__
if counter:
result += f"_{counter:02}"
if suffix:
result += suffix
if archive:
result += ".pdf"
else:
result += self.file_type
return pathvalidate.sanitize_filename(result, replacement_text="-")
class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-missing]
class Document(DocumentBase, SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-missing]
MAX_STORED_FILENAME_LENGTH: Final[int] = 1024
correspondent = models.ForeignKey(
@@ -354,15 +369,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
verbose_name=_("document type"),
)
content = models.TextField(
_("content"),
blank=True,
help_text=_(
"The raw, text-only data of the document. This field is "
"primarily used for searching.",
),
)
content_length = models.GeneratedField(
expression=Length("content"),
output_field=PositiveIntegerField(default=0),
@@ -372,8 +378,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
)
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
tags = models.ManyToManyField(
Tag,
related_name="documents",
@@ -381,22 +385,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
verbose_name=_("tags"),
)
checksum = models.CharField(
_("checksum"),
max_length=64,
editable=False,
help_text=_("The checksum of the original document."),
)
archive_checksum = models.CharField(
_("archive checksum"),
max_length=64,
editable=False,
blank=True,
null=True,
help_text=_("The checksum of the archived document."),
)
page_count = models.PositiveIntegerField(
_("page count"),
blank=False,
@@ -422,13 +410,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
db_index=True,
)
added = models.DateTimeField(
_("added"),
default=timezone.now,
editable=False,
db_index=True,
)
filename = models.FilePathField(
_("filename"),
max_length=MAX_STORED_FILENAME_LENGTH,
@@ -520,45 +501,7 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
@property
def source_file(self):
return Path(self.source_path).open("rb")
@property
def has_archive_version(self) -> bool:
return self.archive_filename is not None
@property
def archive_path(self) -> Path | None:
if self.has_archive_version:
return (settings.ARCHIVE_DIR / Path(str(self.archive_filename))).resolve()
else:
return None
@property
def archive_file(self):
return Path(self.archive_path).open("rb")
def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str:
"""
Returns a sanitized filename for the document, not including any paths.
"""
result = str(self)
if counter:
result += f"_{counter:02}"
if suffix:
result += suffix
if archive:
result += ".pdf"
else:
result += self.file_type
return pathvalidate.sanitize_filename(result, replacement_text="-")
@property
def file_type(self):
return get_default_file_extension(self.mime_type)
return self.source_path.open("rb")
@property
def thumbnail_path(self) -> Path:
@@ -568,10 +511,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
return webp_file_path.resolve()
@property
def thumbnail_file(self):
return Path(self.thumbnail_path).open("rb")
@property
def created_date(self):
return self.created