Compare commits

..

4 Commits

Author SHA1 Message Date
shamoon
f85a360c21 Add to diagram 2026-03-09 10:58:07 -07:00
shamoon
add2c68b7c Move migration 2026-03-09 10:58:06 -07:00
shamoon
3f0770a9a6 make it a separate one 2026-03-09 10:58:06 -07:00
shamoon
c0c09bd0da Save this 2026-03-09 10:51:45 -07:00
18 changed files with 152 additions and 146 deletions

View File

@@ -41,7 +41,7 @@ jobs:
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi

View File

@@ -43,7 +43,7 @@ jobs:
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi

View File

@@ -38,7 +38,7 @@ jobs:
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi

View File

@@ -458,7 +458,7 @@ fields and permissions, which will be merged.
#### Types {#workflow-trigger-types}
Currently, there are four events that correspond to workflow trigger 'types':
Currently, there are five events that correspond to workflow trigger 'types':
1. **Consumption Started**: _before_ a document is consumed, so events can include filters by source (mail, consumption
folder or API), file path, file name, mail rule
@@ -470,8 +470,10 @@ Currently, there are four events that correspond to workflow trigger 'types':
4. **Scheduled**: a scheduled trigger that can be used to run workflows at a specific time. The date used can be either the document
added, created, updated date or you can specify a (date) custom field. You can also specify a day offset from the date (positive
offsets will trigger after the date, negative offsets will trigger before).
5. **Version Added**: when a new version is added for an existing document. This trigger evaluates filters against the root document
and applies actions to the root document.
The following flow diagram illustrates the four document trigger types:
The following flow diagram illustrates the document trigger types:
```mermaid
flowchart TD
@@ -487,6 +489,10 @@ flowchart TD
'Updated'
trigger(s)"}
version{"Matching
'Version Added'
trigger(s)"}
scheduled{"Documents
matching
trigger(s)"}
@@ -503,11 +509,15 @@ flowchart TD
updated --> |Yes| J[Workflow Actions Run]
updated --> |No| K
J --> K[Document Saved]
L[Scheduled Task Check<br/>hourly at :05] --> M[Get All Scheduled Triggers]
M --> scheduled
scheduled --> |Yes| N[Workflow Actions Run]
scheduled --> |No| O[Document Saved]
N --> O
L[New Document Version Added] --> version
version --> |Yes| V[Workflow Actions Run]
version --> |No| W
V --> W[Document Saved]
X[Scheduled Task Check<br/>hourly at :05] --> Y[Get All Scheduled Triggers]
Y --> scheduled
scheduled --> |Yes| Z[Workflow Actions Run]
scheduled --> |No| AA[Document Saved]
Z --> AA
```
#### Filters {#workflow-trigger-filters}

View File

@@ -164,7 +164,7 @@
<pngx-input-text i18n-title title="Filter path" formControlName="filter_path" horizontal="true" i18n-hint hint="Apply to documents that match this path. Wildcards specified as * are allowed. Case-normalized." [error]="error?.filter_path"></pngx-input-text>
<pngx-input-select i18n-title title="Filter mail rule" [items]="mailRules" horizontal="true" [allowNull]="true" formControlName="filter_mailrule" i18n-hint hint="Apply to documents consumed via this mail rule." [error]="error?.filter_mailrule"></pngx-input-select>
}
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated || formGroup.get('type').value === WorkflowTriggerType.Scheduled) {
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated || formGroup.get('type').value === WorkflowTriggerType.Scheduled || formGroup.get('type').value === WorkflowTriggerType.VersionAdded) {
<pngx-input-select i18n-title title="Content matching algorithm" horizontal="true" [items]="getMatchingAlgorithms()" formControlName="matching_algorithm"></pngx-input-select>
@if (matchingPatternRequired(formGroup)) {
<pngx-input-text i18n-title title="Content matching pattern" horizontal="true" formControlName="match" [error]="error?.match"></pngx-input-text>
@@ -175,7 +175,7 @@
}
</div>
</div>
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated || formGroup.get('type').value === WorkflowTriggerType.Scheduled) {
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated || formGroup.get('type').value === WorkflowTriggerType.Scheduled || formGroup.get('type').value === WorkflowTriggerType.VersionAdded) {
<div class="row mt-3">
<div class="col">
<div class="trigger-filters mb-3">

View File

@@ -120,6 +120,10 @@ export const WORKFLOW_TYPE_OPTIONS = [
id: WorkflowTriggerType.Scheduled,
name: $localize`Scheduled`,
},
{
id: WorkflowTriggerType.VersionAdded,
name: $localize`Version Added`,
},
]
export const WORKFLOW_ACTION_OPTIONS = [

View File

@@ -12,6 +12,7 @@ export enum WorkflowTriggerType {
DocumentAdded = 2,
DocumentUpdated = 3,
Scheduled = 4,
VersionAdded = 5,
}
export enum ScheduleDateField {

View File

@@ -10,11 +10,13 @@ class DocumentsConfig(AppConfig):
def ready(self) -> None:
from documents.signals import document_consumption_finished
from documents.signals import document_updated
from documents.signals import document_version_added
from documents.signals.handlers import add_inbox_tags
from documents.signals.handlers import add_or_update_document_in_llm_index
from documents.signals.handlers import add_to_index
from documents.signals.handlers import run_workflows_added
from documents.signals.handlers import run_workflows_updated
from documents.signals.handlers import run_workflows_version_added
from documents.signals.handlers import send_websocket_document_updated
from documents.signals.handlers import set_correspondent
from documents.signals.handlers import set_document_type
@@ -28,6 +30,7 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(set_storage_path)
document_consumption_finished.connect(add_to_index)
document_consumption_finished.connect(run_workflows_added)
document_version_added.connect(run_workflows_version_added)
document_consumption_finished.connect(add_or_update_document_in_llm_index)
document_updated.connect(run_workflows_updated)
document_updated.connect(send_websocket_document_updated)

View File

@@ -46,6 +46,7 @@ from documents.plugins.helpers import ProgressStatusOptions
from documents.signals import document_consumption_finished
from documents.signals import document_consumption_started
from documents.signals import document_updated
from documents.signals import document_version_added
from documents.signals.handlers import run_workflows
from documents.templating.workflows import parse_w_workflow_placeholders
from documents.utils import copy_basic_file_stats
@@ -601,6 +602,12 @@ class ConsumerPlugin(
if self.unmodified_original
else self.working_copy,
)
if document.root_document_id:
document_version_added.send(
sender=self.__class__,
document=document,
logging_group=self.logging_group,
)
# After everything is in the database, copy the files into
# place. If this fails, we'll also rollback the transaction.

View File

@@ -169,7 +169,7 @@ def match_storage_paths(document: Document, classifier: DocumentClassifier, user
def matches(matching_model: MatchingModel, document: Document):
search_flags = 0
document_content = document.get_effective_content() or ""
document_content = document.content
# Check that match is not empty
if not matching_model.match.strip():
@@ -689,6 +689,7 @@ def document_matches_workflow(
trigger_type == WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED
or trigger_type == WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED
or trigger_type == WorkflowTrigger.WorkflowTriggerType.SCHEDULED
or trigger_type == WorkflowTrigger.WorkflowTriggerType.VERSION_ADDED
):
trigger_matched, reason = existing_document_matches_workflow(
document,

View File

@@ -0,0 +1,28 @@
# Generated by Django 5.2.7 on 2026-03-02 00:00
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0016_document_version_index_and_more"),
]
operations = [
migrations.AlterField(
model_name="workflowtrigger",
name="type",
field=models.PositiveSmallIntegerField(
choices=[
(1, "Consumption Started"),
(2, "Document Added"),
(3, "Document Updated"),
(4, "Scheduled"),
(5, "Version Added"),
],
default=1,
verbose_name="Workflow Trigger Type",
),
),
]

View File

@@ -361,42 +361,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
res += f" {self.title}"
return res
def get_effective_content(self) -> str | None:
"""
Returns the effective content for the document.
For root documents, this is the latest version's content when available.
For version documents, this is always the document's own content.
If the queryset already annotated ``effective_content``, that value is used.
"""
if hasattr(self, "effective_content"):
return getattr(self, "effective_content")
if self.root_document_id is not None or self.pk is None:
return self.content
prefetched_cache = getattr(self, "_prefetched_objects_cache", None)
prefetched_versions = (
prefetched_cache.get("versions")
if isinstance(prefetched_cache, dict)
else None
)
if prefetched_versions:
latest_prefetched = max(prefetched_versions, key=lambda doc: doc.id)
return latest_prefetched.content
latest_version_content = (
Document.objects.filter(root_document=self)
.order_by("-id")
.values_list("content", flat=True)
.first()
)
return (
latest_version_content
if latest_version_content is not None
else self.content
)
@property
def suggestion_content(self):
"""
@@ -409,21 +373,15 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
This improves processing speed for large documents while keeping
enough context for accurate suggestions.
"""
effective_content = self.get_effective_content()
if not effective_content or len(effective_content) <= 1200000:
return effective_content
if not self.content or len(self.content) <= 1200000:
return self.content
else:
# Use 80% from the start and 20% from the end
# to preserve both opening and closing context.
head_len = 800000
tail_len = 200000
return " ".join(
(
effective_content[:head_len],
effective_content[-tail_len:],
),
)
return " ".join((self.content[:head_len], self.content[-tail_len:]))
@property
def source_path(self) -> Path:
@@ -1174,6 +1132,7 @@ class WorkflowTrigger(models.Model):
DOCUMENT_ADDED = 2, _("Document Added")
DOCUMENT_UPDATED = 3, _("Document Updated")
SCHEDULED = 4, _("Scheduled")
VERSION_ADDED = 5, _("Version Added")
class DocumentSourceChoices(models.IntegerChoices):
CONSUME_FOLDER = DocumentSource.ConsumeFolder.value, _("Consume Folder")

View File

@@ -2,5 +2,6 @@ from django.dispatch import Signal
document_consumption_started = Signal()
document_consumption_finished = Signal()
document_version_added = Signal()
document_consumer_declaration = Signal()
document_updated = Signal()

View File

@@ -783,6 +783,19 @@ def run_workflows_added(
)
def run_workflows_version_added(
sender,
document: Document,
logging_group: uuid.UUID | None = None,
**kwargs,
) -> None:
run_workflows(
trigger_type=WorkflowTrigger.WorkflowTriggerType.VERSION_ADDED,
document=document.root_document,
logging_group=logging_group,
)
def run_workflows_updated(
sender,
document: Document,

View File

@@ -715,9 +715,16 @@ class TestConsumer(
self._assert_first_last_send_progress()
@override_settings(AUDIT_LOG_ENABLED=True)
@mock.patch("documents.consumer.document_updated.send")
@mock.patch("documents.consumer.document_version_added.send")
@mock.patch("documents.consumer.load_classifier")
def test_consume_version_creates_new_version(self, m) -> None:
m.return_value = MagicMock()
def test_consume_version_creates_new_version(
self,
mock_load_classifier: mock.Mock,
mock_document_version_added_send: mock.Mock,
mock_document_updated_send: mock.Mock,
) -> None:
mock_load_classifier.return_value = MagicMock()
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
@@ -785,6 +792,16 @@ class TestConsumer(
self.assertIsNone(version.archive_serial_number)
self.assertEqual(version.original_filename, version_file.name)
self.assertTrue(bool(version.content))
mock_document_version_added_send.assert_called_once()
self.assertEqual(
mock_document_version_added_send.call_args.kwargs["document"].id,
version.id,
)
mock_document_updated_send.assert_called_once()
self.assertEqual(
mock_document_updated_send.call_args.kwargs["document"].id,
root_doc.id,
)
@override_settings(AUDIT_LOG_ENABLED=True)
@mock.patch("documents.consumer.load_classifier")

View File

@@ -156,46 +156,6 @@ class TestDocument(TestCase):
)
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")
def test_suggestion_content_uses_latest_version_content_for_root_documents(
self,
) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="outdated root content",
)
version = Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="latest version content",
)
self.assertEqual(root.suggestion_content, version.content)
def test_content_length_is_per_document_row_for_versions(self) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="abc",
)
version = Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="abcdefgh",
)
root.refresh_from_db()
version.refresh_from_db()
self.assertEqual(root.content_length, 3)
self.assertEqual(version.content_length, 8)
def test_suggestion_content() -> None:
"""

View File

@@ -48,52 +48,6 @@ class _TestMatchingBase(TestCase):
class TestMatching(_TestMatchingBase):
def test_matches_uses_latest_version_content_for_root_documents(self) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root content without token",
)
Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="latest version contains keyword",
)
tag = Tag.objects.create(
name="tag",
match="keyword",
matching_algorithm=Tag.MATCH_ANY,
)
self.assertTrue(matching.matches(tag, root))
def test_matches_does_not_fall_back_to_root_content_when_version_exists(
self,
) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root contains keyword",
)
Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="latest version without token",
)
tag = Tag.objects.create(
name="tag",
match="keyword",
matching_algorithm=Tag.MATCH_ANY,
)
self.assertFalse(matching.matches(tag, root))
def test_match_none(self) -> None:
self._test_matching(
"",

View File

@@ -60,6 +60,7 @@ from documents.models import WorkflowTrigger
from documents.plugins.base import StopConsumeTaskError
from documents.serialisers import WorkflowTriggerSerializer
from documents.signals import document_consumption_finished
from documents.signals import document_version_added
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import DummyProgressManager
from documents.tests.utils import FileSystemAssertsMixin
@@ -1786,6 +1787,53 @@ class TestWorkflows(
).exists(),
)
def test_version_added_workflow_runs_on_root_document(self) -> None:
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.VERSION_ADDED,
)
action = WorkflowAction.objects.create(
assign_title="Updated by version",
assign_owner=self.user2,
)
workflow = Workflow.objects.create(
name="Version workflow",
order=0,
)
workflow.triggers.add(trigger)
workflow.actions.add(action)
root_doc = Document.objects.create(
title="root",
correspondent=self.c,
original_filename="root.pdf",
)
version_doc = Document.objects.create(
title="version",
correspondent=self.c,
original_filename="version.pdf",
root_document=root_doc,
)
document_version_added.send(
sender=self.__class__,
document=version_doc,
)
root_doc.refresh_from_db()
version_doc.refresh_from_db()
self.assertEqual(root_doc.title, "Updated by version")
self.assertEqual(root_doc.owner, self.user2)
self.assertIsNone(version_doc.owner)
self.assertEqual(
WorkflowRun.objects.filter(
workflow=workflow,
type=WorkflowTrigger.WorkflowTriggerType.VERSION_ADDED,
document=root_doc,
).count(),
1,
)
def test_document_updated_workflow(self) -> None:
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,