mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-29 16:54:27 +00:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 665a724221 | |||
| 7a4cddebbe | |||
| 1b7c0af22e | |||
| 20a855444b | |||
| c3a3939387 | |||
| a1fad8309f | |||
| 613b528b7e | |||
| bf73b5b1d1 |
@@ -63,7 +63,6 @@ The following are not generally considered vulnerabilities unless accompanied by
|
||||
- optional webhook, mail, AI, OCR, or integration behavior described without a product-level vulnerability
|
||||
- missing limits or hardening settings presented without concrete impact
|
||||
- generic AI or static-analysis output that is not confirmed against the current codebase and a real deployment scenario
|
||||
- the ability to attach objects that a user cannot access to a document by ID is an intentional design choice, and not considered a vulnerability
|
||||
|
||||
## Transparency
|
||||
|
||||
|
||||
+88
-131
@@ -768,7 +768,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">19</context>
|
||||
<context context-type="linenumber">16</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3894950702316166331" datatype="html">
|
||||
@@ -783,7 +783,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">26</context>
|
||||
<context context-type="linenumber">23</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/trash/trash.component.html</context>
|
||||
@@ -1700,7 +1700,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/tags/tags.component.ts</context>
|
||||
<context context-type="linenumber">81</context>
|
||||
<context context-type="linenumber">80</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/suggestions-dropdown/suggestions-dropdown.component.html</context>
|
||||
@@ -1830,18 +1830,11 @@
|
||||
<context context-type="linenumber">147</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8829078752502782653" datatype="html">
|
||||
<source>Dismiss all</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">15</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1616102757855967475" datatype="html">
|
||||
<source>All</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">39</context>
|
||||
<context context-type="linenumber">36</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
@@ -1849,7 +1842,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">154</context>
|
||||
<context context-type="linenumber">151</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/filterable-dropdown/filterable-dropdown.component.html</context>
|
||||
@@ -1880,36 +1873,36 @@
|
||||
<source>Filter by</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">59</context>
|
||||
<context context-type="linenumber">56</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="424356320420294719" datatype="html">
|
||||
<source>All types</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">64</context>
|
||||
<context context-type="linenumber">61</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">215</context>
|
||||
<context context-type="linenumber">209</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="131016739441837046" datatype="html">
|
||||
<source>All sources</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">73</context>
|
||||
<context context-type="linenumber">70</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">227</context>
|
||||
<context context-type="linenumber">221</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6849725902312323996" datatype="html">
|
||||
<source>Reset filters</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">104</context>
|
||||
<context context-type="linenumber">101</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/document-list.component.html</context>
|
||||
@@ -1924,14 +1917,14 @@
|
||||
<source>{VAR_PLURAL, plural, =1 {1 task} other {<x id="INTERPOLATION"/> tasks}}</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">125</context>
|
||||
<context context-type="linenumber">122</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8953033926734869941" datatype="html">
|
||||
<source>Name</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">147</context>
|
||||
<context context-type="linenumber">144</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
@@ -2042,7 +2035,7 @@
|
||||
<source>Created</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">148</context>
|
||||
<context context-type="linenumber">145</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/dates-dropdown/dates-dropdown.component.html</context>
|
||||
@@ -2073,21 +2066,21 @@
|
||||
<source>Results</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">150</context>
|
||||
<context context-type="linenumber">147</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="314315645942131479" datatype="html">
|
||||
<source>Info</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">152</context>
|
||||
<context context-type="linenumber">149</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3193976279273491157" datatype="html">
|
||||
<source>Actions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">153</context>
|
||||
<context context-type="linenumber">150</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/trash/trash.component.html</context>
|
||||
@@ -2158,22 +2151,18 @@
|
||||
<source>click for full output</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">204</context>
|
||||
<context context-type="linenumber">201</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1536087519743707362" datatype="html">
|
||||
<source>Dismiss</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">217</context>
|
||||
<context context-type="linenumber">214</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">317</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">351</context>
|
||||
<context context-type="linenumber">310</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
@@ -2188,28 +2177,28 @@
|
||||
<source>Open Document</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">222</context>
|
||||
<context context-type="linenumber">219</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5404759957685833020" datatype="html">
|
||||
<source>Result message</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">235</context>
|
||||
<context context-type="linenumber">232</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6621329748219109148" datatype="html">
|
||||
<source>Duplicate</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">242</context>
|
||||
<context context-type="linenumber">239</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7593555694782789615" datatype="html">
|
||||
<source>Open</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">250</context>
|
||||
<context context-type="linenumber">247</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/global-search/global-search.component.html</context>
|
||||
@@ -2240,21 +2229,21 @@
|
||||
<source>Input data</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">260</context>
|
||||
<context context-type="linenumber">257</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1585185618099050920" datatype="html">
|
||||
<source>Result data</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">266</context>
|
||||
<context context-type="linenumber">263</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7976920528153858271" datatype="html">
|
||||
<source>No tasks match the current filters.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">288</context>
|
||||
<context context-type="linenumber">285</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2525230676386818985" datatype="html">
|
||||
@@ -2431,78 +2420,60 @@
|
||||
<source>Dismiss selected</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">239</context>
|
||||
<context context-type="linenumber">233</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="9169677036332103838" datatype="html">
|
||||
<source>Dismiss visible</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">240</context>
|
||||
<context context-type="linenumber">234</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3169751690815214293" datatype="html">
|
||||
<source>Confirm Dismiss</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">314</context>
|
||||
<context context-type="linenumber">307</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5029621907742319073" datatype="html">
|
||||
<source>Dismiss <x id="PH" equiv-text="tasks.size"/> tasks?</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">315</context>
|
||||
<context context-type="linenumber">308</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3597309129998924778" datatype="html">
|
||||
<source>Error dismissing tasks</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">326</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">360</context>
|
||||
<context context-type="linenumber">319</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2132179171926568807" datatype="html">
|
||||
<source>Error dismissing task</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">338</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1323591410517879795" datatype="html">
|
||||
<source>Confirm Dismiss All</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">348</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4157200209636243740" datatype="html">
|
||||
<source>Dismiss all <x id="PH" equiv-text="this.totalTasks"/> tasks?</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">349</context>
|
||||
<context context-type="linenumber">331</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8149502458056418229" datatype="html">
|
||||
<source>Success. New document id <x id="PH" equiv-text="documentId"/> created</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">408</context>
|
||||
<context context-type="linenumber">377</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8760066891202884337" datatype="html">
|
||||
<source>Duplicate of document #<x id="PH" equiv-text="duplicateOf"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">418</context>
|
||||
<context context-type="linenumber">387</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">452</context>
|
||||
<context context-type="linenumber">421</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3418677553313974490" datatype="html">
|
||||
@@ -3693,42 +3664,42 @@
|
||||
<source>{VAR_PLURAL, plural, =1 {One page} other {<x id="INTERPOLATION"/> pages}}</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||
<context context-type="linenumber">28</context>
|
||||
<context context-type="linenumber">25</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7508164375697837821" datatype="html">
|
||||
<source>Use metadata from:</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||
<context context-type="linenumber">38</context>
|
||||
<context context-type="linenumber">34</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2020403212524346652" datatype="html">
|
||||
<source>Regenerate all metadata</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||
<context context-type="linenumber">40</context>
|
||||
<context context-type="linenumber">36</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2710430925353472741" datatype="html">
|
||||
<source>Try to include archive version in merge for non-PDF files</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||
<context context-type="linenumber">48</context>
|
||||
<context context-type="linenumber">44</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5612366187076076264" datatype="html">
|
||||
<source>Delete original documents after successful merge</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||
<context context-type="linenumber">52</context>
|
||||
<context context-type="linenumber">48</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5138283234724909648" datatype="html">
|
||||
<source>Note that only PDFs will be included.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||
<context context-type="linenumber">55</context>
|
||||
<context context-type="linenumber">51</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1309641780471803652" datatype="html">
|
||||
@@ -3843,7 +3814,7 @@
|
||||
<source>Saved field "<x id="PH" equiv-text="newField.name"/>".</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-dropdown/custom-fields-dropdown.component.ts</context>
|
||||
<context context-type="linenumber">129</context>
|
||||
<context context-type="linenumber">130</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/manage/document-attributes/custom-fields/custom-fields.component.ts</context>
|
||||
@@ -3854,7 +3825,7 @@
|
||||
<source>Error saving field.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-dropdown/custom-fields-dropdown.component.ts</context>
|
||||
<context context-type="linenumber">138</context>
|
||||
<context context-type="linenumber">139</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/manage/document-attributes/custom-fields/custom-fields.component.ts</context>
|
||||
@@ -3939,11 +3910,11 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">96</context>
|
||||
<context context-type="linenumber">94</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">102</context>
|
||||
<context context-type="linenumber">100</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3800326155195149498" datatype="html">
|
||||
@@ -3954,29 +3925,29 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">97</context>
|
||||
<context context-type="linenumber">95</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">103</context>
|
||||
<context context-type="linenumber">101</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7551700625201096185" datatype="html">
|
||||
<source>Search docs...</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">70</context>
|
||||
<context context-type="linenumber">69</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">119</context>
|
||||
<context context-type="linenumber">117</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3184700926171002527" datatype="html">
|
||||
<source>Any</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">152</context>
|
||||
<context context-type="linenumber">149</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/filterable-dropdown/filterable-dropdown.component.html</context>
|
||||
@@ -3987,21 +3958,21 @@
|
||||
<source>Not</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">157</context>
|
||||
<context context-type="linenumber">154</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6548676277933116532" datatype="html">
|
||||
<source>Add query</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">176</context>
|
||||
<context context-type="linenumber">173</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5599577087865387184" datatype="html">
|
||||
<source>Add expression</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/custom-fields-query-dropdown/custom-fields-query-dropdown.component.html</context>
|
||||
<context context-type="linenumber">179</context>
|
||||
<context context-type="linenumber">176</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6312759212949884929" datatype="html">
|
||||
@@ -4670,23 +4641,23 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">199</context>
|
||||
<context context-type="linenumber">197</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">233</context>
|
||||
<context context-type="linenumber">231</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">267</context>
|
||||
<context context-type="linenumber">265</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">277</context>
|
||||
<context context-type="linenumber">275</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">315</context>
|
||||
<context context-type="linenumber">313</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/toast/toast.component.html</context>
|
||||
@@ -6008,11 +5979,11 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/select/select.component.html</context>
|
||||
<context context-type="linenumber">62</context>
|
||||
<context context-type="linenumber">61</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/tags/tags.component.html</context>
|
||||
<context context-type="linenumber">66</context>
|
||||
<context context-type="linenumber">65</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6344437738844463465" datatype="html">
|
||||
@@ -6023,7 +5994,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/select/select.component.ts</context>
|
||||
<context context-type="linenumber">176</context>
|
||||
<context context-type="linenumber">172</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1880237574877817137" datatype="html">
|
||||
@@ -6127,7 +6098,7 @@
|
||||
<source>Private</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/select/select.component.ts</context>
|
||||
<context context-type="linenumber">72</context>
|
||||
<context context-type="linenumber">71</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/tag/tag.component.html</context>
|
||||
@@ -6150,7 +6121,7 @@
|
||||
<source>No items found</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/select/select.component.ts</context>
|
||||
<context context-type="linenumber">110</context>
|
||||
<context context-type="linenumber">106</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6541407358060244620" datatype="html">
|
||||
@@ -6164,21 +6135,21 @@
|
||||
<source>Add tag</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/tags/tags.component.html</context>
|
||||
<context context-type="linenumber">18</context>
|
||||
<context context-type="linenumber">17</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3392754525167799121" datatype="html">
|
||||
<source>Remove tag</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/tags/tags.component.html</context>
|
||||
<context context-type="linenumber">24</context>
|
||||
<context context-type="linenumber">23</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2561408369057364131" datatype="html">
|
||||
<source>Filter documents with these Tags</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/tags/tags.component.html</context>
|
||||
<context context-type="linenumber">56</context>
|
||||
<context context-type="linenumber">55</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1400555558847223243" datatype="html">
|
||||
@@ -6470,7 +6441,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">334</context>
|
||||
<context context-type="linenumber">332</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/manage/mail/mail.component.html</context>
|
||||
@@ -7179,28 +7150,28 @@
|
||||
<source>Recent Task Activity <x id="START_TAG_SPAN" ctype="x-span" equiv-text="<span class="small text-muted fw-light">"/>(<x id="INTERPOLATION" equiv-text="{{status.tasks.summary.days}}"/> days)<x id="CLOSE_TAG_SPAN" ctype="x-span" equiv-text="</span>"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">147</context>
|
||||
<context context-type="linenumber">145</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3448462145758383019" datatype="html">
|
||||
<source>Total</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">152</context>
|
||||
<context context-type="linenumber">150</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3521084103654700903" datatype="html">
|
||||
<source>Successful</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">156</context>
|
||||
<context context-type="linenumber">154</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7256395947475975935" datatype="html">
|
||||
<source>Failed</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">160</context>
|
||||
<context context-type="linenumber">158</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/data/share-link-bundle.ts</context>
|
||||
@@ -7211,7 +7182,7 @@
|
||||
<source>Pending</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">164</context>
|
||||
<context context-type="linenumber">162</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/data/share-link-bundle.ts</context>
|
||||
@@ -7222,96 +7193,96 @@
|
||||
<source>No recent tasks</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">169</context>
|
||||
<context context-type="linenumber">167</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2041675390931385838" datatype="html">
|
||||
<source>Health</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">180</context>
|
||||
<context context-type="linenumber">178</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="31377277941774469" datatype="html">
|
||||
<source>Search Index</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">184</context>
|
||||
<context context-type="linenumber">182</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4089509911694721896" datatype="html">
|
||||
<source>Last Updated</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">197</context>
|
||||
<context context-type="linenumber">195</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="46628344485199198" datatype="html">
|
||||
<source>Classifier</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">202</context>
|
||||
<context context-type="linenumber">200</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="9127131074422113272" datatype="html">
|
||||
<source>Run Task</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">224</context>
|
||||
<context context-type="linenumber">222</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">258</context>
|
||||
<context context-type="linenumber">256</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">306</context>
|
||||
<context context-type="linenumber">304</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6096684179126491743" datatype="html">
|
||||
<source>Last Trained</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">231</context>
|
||||
<context context-type="linenumber">229</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6427836860962380759" datatype="html">
|
||||
<source>Sanity Checker</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">236</context>
|
||||
<context context-type="linenumber">234</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6578747070254776938" datatype="html">
|
||||
<source>Last Run</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">265</context>
|
||||
<context context-type="linenumber">263</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">313</context>
|
||||
<context context-type="linenumber">311</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5921685253729220446" datatype="html">
|
||||
<source>WebSocket Connection</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">270</context>
|
||||
<context context-type="linenumber">268</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8998179362936748717" datatype="html">
|
||||
<source>OK</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">274</context>
|
||||
<context context-type="linenumber">272</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3804349597565969872" datatype="html">
|
||||
<source>AI Index</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">283</context>
|
||||
<context context-type="linenumber">281</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6732151329960766506" datatype="html">
|
||||
@@ -10939,20 +10910,6 @@
|
||||
<context context-type="linenumber">361</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4493921125434706859" datatype="html">
|
||||
<source>LLM Request Timeout</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/data/paperless-config.ts</context>
|
||||
<context context-type="linenumber">365</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="483994032066441287" datatype="html">
|
||||
<source>Timeout in seconds for LLM requests.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/data/paperless-config.ts</context>
|
||||
<context context-type="linenumber">369</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="9155387182259025015" datatype="html">
|
||||
<source>Processing</source>
|
||||
<context-group purpose="location">
|
||||
|
||||
@@ -13,6 +13,8 @@ import { DocumentDetailComponent } from './components/document-detail/document-d
|
||||
import { DocumentListComponent } from './components/document-list/document-list.component'
|
||||
import { DocumentAttributesComponent } from './components/manage/document-attributes/document-attributes.component'
|
||||
import { MailComponent } from './components/manage/mail/mail.component'
|
||||
import { OcrTemplateEditorComponent } from './components/manage/ocr-templates/ocr-template-editor/ocr-template-editor.component'
|
||||
import { OcrTemplatesComponent } from './components/manage/ocr-templates/ocr-templates.component'
|
||||
import { SavedViewsComponent } from './components/manage/saved-views/saved-views.component'
|
||||
import { WorkflowsComponent } from './components/manage/workflows/workflows.component'
|
||||
import { NotFoundComponent } from './components/not-found/not-found.component'
|
||||
@@ -274,6 +276,42 @@ export const routes: Routes = [
|
||||
componentName: 'WorkflowsComponent',
|
||||
},
|
||||
},
|
||||
{
|
||||
path: 'ocr-templates',
|
||||
component: OcrTemplatesComponent,
|
||||
canActivate: [PermissionsGuard],
|
||||
data: {
|
||||
requiredPermission: {
|
||||
action: PermissionAction.View,
|
||||
type: PermissionType.OcrTemplate,
|
||||
},
|
||||
componentName: 'OcrTemplatesComponent',
|
||||
},
|
||||
},
|
||||
{
|
||||
path: 'ocr-templates/new',
|
||||
component: OcrTemplateEditorComponent,
|
||||
canActivate: [PermissionsGuard],
|
||||
data: {
|
||||
requiredPermission: {
|
||||
action: PermissionAction.Add,
|
||||
type: PermissionType.OcrTemplate,
|
||||
},
|
||||
componentName: 'OcrTemplateEditorComponent',
|
||||
},
|
||||
},
|
||||
{
|
||||
path: 'ocr-templates/:id',
|
||||
component: OcrTemplateEditorComponent,
|
||||
canActivate: [PermissionsGuard],
|
||||
data: {
|
||||
requiredPermission: {
|
||||
action: PermissionAction.Change,
|
||||
type: PermissionType.OcrTemplate,
|
||||
},
|
||||
componentName: 'OcrTemplateEditorComponent',
|
||||
},
|
||||
},
|
||||
{
|
||||
path: 'mail',
|
||||
component: MailComponent,
|
||||
|
||||
@@ -243,6 +243,14 @@
|
||||
<i-bs class="me-2" name="boxes"></i-bs><span><ng-container i18n>Workflows</ng-container></span>
|
||||
</a>
|
||||
</li>
|
||||
<li class="nav-item app-link"
|
||||
*pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.OcrTemplate }">
|
||||
<a class="nav-link" routerLink="ocr-templates" routerLinkActive="active" (click)="closeMenu()"
|
||||
ngbPopover="OCR Templates" i18n-ngbPopover [disablePopover]="!slimSidebarEnabled" placement="end"
|
||||
container="body" triggers="mouseenter:mouseleave" popoverClass="popover-slim">
|
||||
<i-bs class="me-2" name="file-earmark-break"></i-bs><span><ng-container i18n>OCR Templates</ng-container></span>
|
||||
</a>
|
||||
</li>
|
||||
<li class="nav-item app-link" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.MailAccount }"
|
||||
tourAnchor="tour.mail">
|
||||
<a class="nav-link" routerLink="mail" routerLinkActive="active" (click)="closeMenu()" ngbPopover="Mail"
|
||||
|
||||
@@ -82,6 +82,23 @@
|
||||
<i-bs name="pencil" class="me-1"></i-bs><ng-container i18n>PDF Editor</ng-container>
|
||||
</button>
|
||||
|
||||
<button
|
||||
ngbDropdownItem
|
||||
(click)="runZoneOcr()"
|
||||
[disabled]="!userCanEdit || !document?.document_type"
|
||||
*pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.OcrTemplate }"
|
||||
>
|
||||
<i-bs width="1em" height="1em" name="file-earmark-ruled" class="me-1"></i-bs><span i18n>Run Zone OCR</span>
|
||||
</button>
|
||||
|
||||
<button
|
||||
ngbDropdownItem
|
||||
(click)="createOcrTemplate()"
|
||||
*pngxIfPermissions="{ action: PermissionAction.Add, type: PermissionType.OcrTemplate }"
|
||||
>
|
||||
<i-bs width="1em" height="1em" name="file-earmark-medical" class="me-1"></i-bs><span i18n>Create OCR Template</span>
|
||||
</button>
|
||||
|
||||
@if (userIsOwner && (requiresPassword || password)) {
|
||||
<button ngbDropdownItem (click)="removePassword()" [disabled]="!password">
|
||||
<i-bs name="unlock" class="me-1"></i-bs><ng-container i18n>Remove Password</ng-container>
|
||||
|
||||
@@ -1405,6 +1405,48 @@ export class DocumentDetailComponent
|
||||
})
|
||||
}
|
||||
|
||||
runZoneOcr() {
|
||||
this.documentsService.runZoneOcr(this.document.id).subscribe({
|
||||
next: (res) => {
|
||||
const results = res.results ?? []
|
||||
if (results.length) {
|
||||
const failed = results.filter(
|
||||
(r) =>
|
||||
r.value === null ||
|
||||
r.value === undefined ||
|
||||
`${r.value}`.trim() === ''
|
||||
)
|
||||
const filled = results.length - failed.length
|
||||
let msg = $localize`Filled ${filled} of ${results.length} fields`
|
||||
if (failed.length) {
|
||||
const names = failed.map((r) => r.zone).join(', ')
|
||||
msg = `${msg}. ${$localize`Failed to match zones: ${names}`}`
|
||||
}
|
||||
this.toastService.showInfo(msg)
|
||||
} else {
|
||||
this.toastService.showInfo(
|
||||
$localize`Zone OCR ran but no results extracted.`
|
||||
)
|
||||
}
|
||||
this.documentsService
|
||||
.get(this.documentId)
|
||||
.subscribe((doc) => this.updateComponent(doc))
|
||||
},
|
||||
error: (error) => {
|
||||
this.toastService.showError($localize`Zone OCR failed`, error)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
createOcrTemplate() {
|
||||
this.router.navigate(['/ocr-templates', 'new'], {
|
||||
queryParams: {
|
||||
document_type: this.document.document_type,
|
||||
sample_document: this.document.id,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
private getSelectedNonLatestVersionId(): number | null {
|
||||
const versions = this.document?.versions ?? []
|
||||
if (!versions.length || !this.selectedVersionId) {
|
||||
|
||||
@@ -95,6 +95,9 @@
|
||||
<button ngbDropdownItem (click)="mergeSelected()" [disabled]="!userCanAdd || list.allSelected || list.selectedCount < 2">
|
||||
<i-bs name="journals" class="me-1"></i-bs><ng-container i18n>Merge</ng-container>
|
||||
</button>
|
||||
<button ngbDropdownItem (click)="runZoneOcrSelected()" [disabled]="!userCanEditAll || list.allSelected">
|
||||
<i-bs name="file-earmark-ruled" class="me-1"></i-bs><ng-container i18n>Run Zone OCR</ng-container>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -12,7 +12,15 @@ import {
|
||||
} from '@ng-bootstrap/ng-bootstrap'
|
||||
import { saveAs } from 'file-saver'
|
||||
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
||||
import { first, map, Observable, Subject, switchMap, takeUntil } from 'rxjs'
|
||||
import {
|
||||
first,
|
||||
forkJoin,
|
||||
map,
|
||||
Observable,
|
||||
Subject,
|
||||
switchMap,
|
||||
takeUntil,
|
||||
} from 'rxjs'
|
||||
import { ConfirmDialogComponent } from 'src/app/components/common/confirm-dialog/confirm-dialog.component'
|
||||
import { CustomField } from 'src/app/data/custom-field'
|
||||
import { MatchingModel } from 'src/app/data/matching-model'
|
||||
@@ -908,6 +916,27 @@ export class BulkEditorComponent
|
||||
})
|
||||
}
|
||||
|
||||
runZoneOcrSelected() {
|
||||
const ids = Array.from(this.list.selected)
|
||||
if (!ids.length) return
|
||||
const modal = this.modalService.open(ConfirmDialogComponent, {
|
||||
backdrop: 'static',
|
||||
})
|
||||
modal.componentInstance.title = $localize`Run Zone OCR`
|
||||
modal.componentInstance.messageBold = $localize`Run zone OCR on ${this.getSelectionSize()} selected document(s)?`
|
||||
modal.componentInstance.message = $localize`Each document's type template (if it has one) is applied, overwriting the mapped fields.`
|
||||
modal.componentInstance.btnCaption = $localize`Proceed`
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
forkJoin(ids.map((id) => this.documentService.runZoneOcr(id)))
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
setPermissions() {
|
||||
let modal = this.modalService.open(PermissionsDialogComponent, {
|
||||
backdrop: 'static',
|
||||
|
||||
+34
@@ -0,0 +1,34 @@
|
||||
@if (zones.length === 0) {
|
||||
<p class="text-muted" i18n>
|
||||
No zones defined. Load a document preview and draw rectangles to add zones.
|
||||
</p>
|
||||
}
|
||||
|
||||
<div class="list-group">
|
||||
@for (zone of zones; track $index; let i = $index) {
|
||||
<div
|
||||
class="list-group-item list-group-item-action d-flex justify-content-between align-items-center"
|
||||
[style.box-shadow]="selectedZoneIndex === i ? 'inset 3px 0 0 0 var(--bs-primary)' : null"
|
||||
>
|
||||
<div class="flex-grow-1" role="button" style="cursor: pointer;" (click)="zoneSelected.emit(i)">
|
||||
<div>
|
||||
<strong [class.text-primary]="selectedZoneIndex === i">
|
||||
{{ zone.name }}
|
||||
</strong>
|
||||
</div>
|
||||
<div class="small text-muted">
|
||||
{{ getZoneTargetName(zone) }} - {{ zone.width }}x{{ zone.height }}px
|
||||
<ng-container i18n>p.</ng-container>{{ zonePage(zone) }}
|
||||
</div>
|
||||
</div>
|
||||
<div class="btn-group">
|
||||
<button class="btn btn-sm btn-outline-secondary" type="button" (click)="zoneSelected.emit(i)" title="Edit" i18n-title>
|
||||
<i-bs name="pencil"></i-bs>
|
||||
</button>
|
||||
<button class="btn btn-sm btn-outline-danger" type="button" (click)="zoneRemoved.emit(i)" title="Delete" i18n-title>
|
||||
<i-bs name="trash"></i-bs>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
+72
@@ -0,0 +1,72 @@
|
||||
import { ComponentFixture, TestBed } from '@angular/core/testing'
|
||||
import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
|
||||
import { CustomField } from 'src/app/data/custom-field'
|
||||
import { OcrTemplateZone } from 'src/app/data/ocr-template'
|
||||
import { OcrTemplateEditorZoneListComponent } from './ocr-template-editor-zone-list.component'
|
||||
|
||||
function zone(overrides: Partial<OcrTemplateZone> = {}): OcrTemplateZone {
|
||||
return {
|
||||
name: 'Zone 1',
|
||||
target: 'custom_field',
|
||||
custom_field: 7,
|
||||
x: 10,
|
||||
y: 20,
|
||||
width: 30,
|
||||
height: 40,
|
||||
page: 1,
|
||||
ocr_language: 'eng',
|
||||
transform: 'strip',
|
||||
validation_regex: '',
|
||||
order: 0,
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
describe('OcrTemplateEditorZoneListComponent', () => {
|
||||
let fixture: ComponentFixture<OcrTemplateEditorZoneListComponent>
|
||||
let component: OcrTemplateEditorZoneListComponent
|
||||
|
||||
beforeEach(async () => {
|
||||
await TestBed.configureTestingModule({
|
||||
imports: [
|
||||
OcrTemplateEditorZoneListComponent,
|
||||
NgxBootstrapIconsModule.pick(allIcons),
|
||||
],
|
||||
}).compileComponents()
|
||||
|
||||
fixture = TestBed.createComponent(OcrTemplateEditorZoneListComponent)
|
||||
component = fixture.componentInstance
|
||||
})
|
||||
|
||||
it('shows empty state when no zones are defined', () => {
|
||||
fixture.detectChanges()
|
||||
|
||||
expect(fixture.nativeElement.textContent).toContain('No zones defined')
|
||||
})
|
||||
|
||||
it('renders zone target, size, and page', () => {
|
||||
component.zones = [zone()]
|
||||
component.customFields = [{ id: 7, name: 'Invoice Number' } as CustomField]
|
||||
fixture.detectChanges()
|
||||
|
||||
const text = fixture.nativeElement.textContent
|
||||
expect(text).toContain('Zone 1')
|
||||
expect(text).toContain('Invoice Number')
|
||||
expect(text).toContain('30x40px')
|
||||
expect(text).toContain('p.1')
|
||||
})
|
||||
|
||||
it('emits select and remove events', () => {
|
||||
component.zones = [zone()]
|
||||
const selectSpy = jest.spyOn(component.zoneSelected, 'emit')
|
||||
const removeSpy = jest.spyOn(component.zoneRemoved, 'emit')
|
||||
fixture.detectChanges()
|
||||
|
||||
const buttons = fixture.nativeElement.querySelectorAll('button')
|
||||
buttons[0].click()
|
||||
buttons[1].click()
|
||||
|
||||
expect(selectSpy).toHaveBeenCalledWith(0)
|
||||
expect(removeSpy).toHaveBeenCalledWith(0)
|
||||
})
|
||||
})
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
import { Component, EventEmitter, Input, Output } from '@angular/core'
|
||||
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
||||
import { CustomField } from 'src/app/data/custom-field'
|
||||
import { OCR_BUILTIN_TARGETS, OcrTemplateZone } from 'src/app/data/ocr-template'
|
||||
import { getZonePage } from '../zone-geometry'
|
||||
|
||||
@Component({
|
||||
selector: 'pngx-ocr-template-zone-list',
|
||||
imports: [NgxBootstrapIconsModule],
|
||||
templateUrl: './ocr-template-editor-zone-list.component.html',
|
||||
})
|
||||
export class OcrTemplateEditorZoneListComponent {
|
||||
@Input() zones: OcrTemplateZone[] = []
|
||||
@Input() selectedZoneIndex: number | null = null
|
||||
@Input() previewPage = 0
|
||||
@Input() previewPageCount: number | null = null
|
||||
@Input() customFields: CustomField[] = []
|
||||
|
||||
@Output() zoneSelected = new EventEmitter<number>()
|
||||
@Output() zoneRemoved = new EventEmitter<number>()
|
||||
|
||||
zonePage(zone: OcrTemplateZone): number {
|
||||
return getZonePage(zone, this.previewPage, this.previewPageCount)
|
||||
}
|
||||
|
||||
getZoneTargetName(zone: OcrTemplateZone): string {
|
||||
const target = zone.target || 'custom_field'
|
||||
if (target === 'custom_field') {
|
||||
return zone.custom_field
|
||||
? this.getCustomFieldName(zone.custom_field)
|
||||
: $localize`(no field)`
|
||||
}
|
||||
return OCR_BUILTIN_TARGETS.find((t) => t.id === target)?.name ?? target
|
||||
}
|
||||
|
||||
private getCustomFieldName(id: number): string {
|
||||
return (
|
||||
this.customFields.find((field) => field.id === id)?.name ?? `Field #${id}`
|
||||
)
|
||||
}
|
||||
}
|
||||
+395
@@ -0,0 +1,395 @@
|
||||
<pngx-page-header [title]="pageTitle" [id]="template.id">
|
||||
<div class="input-group input-group-sm me-5 align-items-center">
|
||||
<div class="input-group-text">
|
||||
<i-bs name="file-text"></i-bs>
|
||||
</div>
|
||||
<input
|
||||
type="text"
|
||||
class="form-control"
|
||||
[(ngModel)]="previewDocModel"
|
||||
[ngbTypeahead]="searchDocuments"
|
||||
[inputFormatter]="documentFormatter"
|
||||
[resultFormatter]="documentFormatter"
|
||||
(selectItem)="onPreviewDocSelected($event)"
|
||||
[editable]="false"
|
||||
placeholder="Search documents by title..."
|
||||
i18n-placeholder
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div class="d-flex align-items-center flex-wrap gap-2">
|
||||
<div class="input-group input-group-sm ms-2 d-none d-md-flex">
|
||||
<div class="input-group-text" i18n>Page</div>
|
||||
<input class="form-control flex-grow-0 w-auto" type="number" min="1" [max]="previewPageCount" [(ngModel)]="previewPageDisplay" />
|
||||
<div class="input-group-text" i18n>of {{previewPageCount}}</div>
|
||||
</div>
|
||||
<button type="button" class="btn btn-sm btn-outline-secondary" i18n-title title="Previous" (click)="prevPage()" [disabled]="!pageImageUrl || previewPage <= 0">
|
||||
<i-bs width="1.2em" height="1.2em" name="arrow-left"></i-bs>
|
||||
</button>
|
||||
<button type="button" class="btn btn-sm btn-outline-secondary" i18n-title title="Next" (click)="nextPage()" [disabled]="!pageImageUrl || previewPage >= (previewPageCount ?? 1) - 1">
|
||||
<i-bs width="1.2em" height="1.2em" name="arrow-right"></i-bs>
|
||||
</button>
|
||||
|
||||
<div class="input-group input-group-sm">
|
||||
<button class="btn btn-outline-secondary" (click)="zoomOut()" i18n>-</button>
|
||||
<span class="input-group-text">{{ zoom * 100 | number: '1.0-0' }}%</span>
|
||||
<button class="btn btn-outline-secondary" (click)="zoomIn()" i18n>+</button>
|
||||
</div>
|
||||
</div>
|
||||
</pngx-page-header>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-md-4">
|
||||
<div class="btn-toolbar mb-1 border-bottom">
|
||||
<div class="btn-group pb-3">
|
||||
<a routerLink="/ocr-templates" class="btn btn-sm btn-outline-secondary">
|
||||
<i-bs width="1.2em" height="1.2em" name="x"></i-bs>
|
||||
<span class="ms-1" i18n>Close</span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="btn-group ms-auto pb-3">
|
||||
<button class="btn btn-sm btn-primary" (click)="save()" [disabled]="saving">
|
||||
@if (saving) {
|
||||
<span class="spinner-border spinner-border-sm me-1"></span>
|
||||
}
|
||||
<span i18n>Save</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<ul ngbNav #nav="ngbNav" [(activeId)]="activeTab" class="nav-underline flex-nowrap flex-md-wrap overflow-auto">
|
||||
<li ngbNavItem="settings">
|
||||
<a ngbNavLink i18n>Settings</a>
|
||||
<ng-template ngbNavContent>
|
||||
<div class="row mb-3">
|
||||
<div class="col-9">
|
||||
<pngx-input-text [(ngModel)]="template.name" title="Template name" i18n-title></pngx-input-text>
|
||||
</div>
|
||||
<div class="col-3">
|
||||
<pngx-input-switch [(ngModel)]="template.enabled" title="Enabled" i18n-title></pngx-input-switch>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<pngx-input-select [(ngModel)]="template.document_type" [items]="documentTypes" bindLabel="name" bindValue="id" title="Document type" i18n-title></pngx-input-select>
|
||||
|
||||
<small class="text-muted" i18n>
|
||||
Draw rectangles on the preview to define extraction zones. Use the
|
||||
page controls above the preview to add zones on different pages.
|
||||
</small>
|
||||
</ng-template>
|
||||
</li>
|
||||
|
||||
<li ngbNavItem="zones">
|
||||
<a ngbNavLink><ng-container i18n>Zones</ng-container> <span class="badge bg-primary ms-2">{{ template.zones.length }}</span></a>
|
||||
<ng-template ngbNavContent>
|
||||
<pngx-ocr-template-zone-list
|
||||
[zones]="template.zones"
|
||||
[selectedZoneIndex]="selectedZoneIndex"
|
||||
[previewPage]="previewPage"
|
||||
[previewPageCount]="previewPageCount"
|
||||
[customFields]="customFields"
|
||||
(zoneSelected)="selectZone($event)"
|
||||
(zoneRemoved)="removeZone($event)"
|
||||
></pngx-ocr-template-zone-list>
|
||||
</ng-template>
|
||||
</li>
|
||||
|
||||
<li ngbNavItem="zone">
|
||||
<a ngbNavLink i18n>Zone</a>
|
||||
<ng-template ngbNavContent>
|
||||
@if (selectedZone; as zone) {
|
||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||
<strong>{{ zone.name }}</strong>
|
||||
<div class="d-flex gap-2">
|
||||
<button class="btn btn-sm btn-primary" (click)="save()" [disabled]="saving">
|
||||
@if (saving) {
|
||||
<span class="spinner-border spinner-border-sm me-1"></span>
|
||||
}
|
||||
<span i18n>Save</span>
|
||||
</button>
|
||||
<button class="btn btn-sm btn-outline-danger" (click)="deleteSelectedZone()">
|
||||
<i-bs name="trash" class="me-1"></i-bs><ng-container i18n>Delete zone</ng-container>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label" i18n>Zone Name</label>
|
||||
<input
|
||||
type="text"
|
||||
class="form-control"
|
||||
[(ngModel)]="zone.name"
|
||||
(ngModelChange)="redrawCanvas()"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label" i18n>Page</label>
|
||||
<input
|
||||
type="number"
|
||||
class="form-control"
|
||||
[(ngModel)]="zone.page"
|
||||
min="-1"
|
||||
(ngModelChange)="redrawCanvas()"
|
||||
/>
|
||||
<small class="text-muted" i18n>Page this zone is on. Use -1 for the last page. Set automatically when you draw it.</small>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label" i18n>Field</label>
|
||||
<div class="input-group">
|
||||
<select class="form-select" [ngModel]="zoneFieldValue(zone)" (ngModelChange)="setZoneField(zone, $event)">
|
||||
<optgroup label="Built-in fields" i18n-label>
|
||||
@for (t of builtinTargets; track t.id) {
|
||||
<option [ngValue]="t.id">{{ t.name }}</option>
|
||||
}
|
||||
</optgroup>
|
||||
<optgroup label="Custom fields" i18n-label>
|
||||
@for (cf of customFields; track cf.id) {
|
||||
<option [ngValue]="cf.id">{{ cf.name }} ({{ cf.data_type }})</option>
|
||||
}
|
||||
</optgroup>
|
||||
</select>
|
||||
<button
|
||||
class="btn btn-outline-secondary"
|
||||
type="button"
|
||||
(click)="openQuickCreate(selectedZoneIndex)"
|
||||
title="Create new custom field"
|
||||
i18n-title
|
||||
>
|
||||
<i-bs name="plus"></i-bs>
|
||||
</button>
|
||||
</div>
|
||||
<small class="text-muted" i18n>Write the extracted value to a custom field, or to a built-in field (Title, ASN, Date created).</small>
|
||||
</div>
|
||||
|
||||
@if (isFieldShared(zone)) {
|
||||
<div class="card mb-3 border-info">
|
||||
<div class="card-body">
|
||||
<h6 class="card-title d-flex align-items-center gap-2">
|
||||
<i-bs name="braces"></i-bs>
|
||||
<span i18n>Combine zones into this field</span>
|
||||
</h6>
|
||||
<p class="small text-muted mb-2" i18n>
|
||||
More than one zone writes to this field. Build the combined
|
||||
value below: click a zone to insert its token, and type any
|
||||
separators or literal text between tokens.
|
||||
</p>
|
||||
<div class="d-flex flex-wrap gap-1 mb-2">
|
||||
@for (z of zonesForField(zone); track $index) {
|
||||
<button
|
||||
type="button"
|
||||
class="btn btn-sm btn-outline-info"
|
||||
(click)="insertCombineToken(zone, z)"
|
||||
title="Insert token"
|
||||
i18n-title
|
||||
>
|
||||
+ {{ z.name || 'Zone' }}
|
||||
</button>
|
||||
}
|
||||
</div>
|
||||
<input
|
||||
type="text"
|
||||
class="form-control font-monospace"
|
||||
[ngModel]="getCombineFormat(zone)"
|
||||
(ngModelChange)="setCombineFormat(zone, $event)"
|
||||
placeholder="{Zone 1} - {Zone 2}"
|
||||
/>
|
||||
<small class="text-muted" i18n>
|
||||
Tokens are matched by zone name. An empty zone leaves its
|
||||
token blank and the stray separator is trimmed. Leave empty
|
||||
to just join the zones in order with a space.
|
||||
</small>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
|
||||
@if (showQuickCreate) {
|
||||
<div class="card mb-3 border-primary">
|
||||
<div class="card-body">
|
||||
<h6 class="card-title" i18n>Create Custom Field</h6>
|
||||
<div class="mb-2">
|
||||
<label class="form-label small" i18n>Field Name</label>
|
||||
<input type="text" class="form-control form-control-sm"
|
||||
[(ngModel)]="quickCreateName" placeholder="e.g. Invoice Number" />
|
||||
</div>
|
||||
<div class="mb-2">
|
||||
<label class="form-label small" i18n>Field Type</label>
|
||||
<select class="form-select form-select-sm" [(ngModel)]="quickCreateType">
|
||||
@for (t of quickCreateTypes; track t.id) {
|
||||
<option [ngValue]="t.id">{{ t.name }}</option>
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
<div class="d-flex gap-2">
|
||||
<button class="btn btn-primary btn-sm" (click)="submitQuickCreate()"
|
||||
[disabled]="!quickCreateName.trim()" i18n>
|
||||
Create & Assign
|
||||
</button>
|
||||
<button class="btn btn-outline-secondary btn-sm" (click)="cancelQuickCreate()" i18n>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label" i18n>OCR Language</label>
|
||||
<ng-select
|
||||
[items]="ocrLanguageOptions"
|
||||
bindLabel="name"
|
||||
bindValue="id"
|
||||
[multiple]="true"
|
||||
[closeOnSelect]="false"
|
||||
[ngModel]="ocrLanguageArray(zone)"
|
||||
(ngModelChange)="setOcrLanguages(zone, $event)"
|
||||
placeholder="Select languages"
|
||||
i18n-placeholder
|
||||
></ng-select>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label" i18n>Transform</label>
|
||||
<select class="form-select" [(ngModel)]="zone.transform">
|
||||
@for (opt of transformOptions; track opt.id) {
|
||||
<option [ngValue]="opt.id">{{ opt.name }}</option>
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
@if (zone.transform === dateTransform) {
|
||||
<div class="mb-3">
|
||||
<label class="form-label" i18n>Date format</label>
|
||||
<select class="form-select" [ngModel]="dateFormatChoice(zone)" (ngModelChange)="setDateFormatChoice(zone, $event)">
|
||||
@for (opt of dateFormatOptions; track opt.id) {
|
||||
<option [ngValue]="opt.id">{{ opt.name }}</option>
|
||||
}
|
||||
<option [ngValue]="customDateFormatChoice" i18n>Custom...</option>
|
||||
</select>
|
||||
@if (usesCustomDateFormat(zone)) {
|
||||
<div class="input-group mt-2">
|
||||
<input type="text" class="form-control font-monospace" [(ngModel)]="zone.date_format" placeholder="%d.%m.%Y" />
|
||||
<button class="btn btn-outline-secondary" type="button" [ngbPopover]="dateFmtHelp" [autoClose]="true" title="Date format help" i18n-title>
|
||||
<i-bs name="question-circle"></i-bs>
|
||||
</button>
|
||||
</div>
|
||||
<ng-template #dateFmtHelp>
|
||||
<p class="mb-1" i18n>Python date codes:</p>
|
||||
<ul class="mb-1 ps-3">
|
||||
<li><code>%d</code> <ng-container i18n>day (01-31)</ng-container></li>
|
||||
<li><code>%m</code> <ng-container i18n>month (01-12)</ng-container></li>
|
||||
<li><code>%Y</code> <ng-container i18n>year, 4-digit</ng-container></li>
|
||||
<li><code>%y</code> <ng-container i18n>year, 2-digit</ng-container></li>
|
||||
<li><code>%b</code> <ng-container i18n>month name (Jan)</ng-container></li>
|
||||
</ul>
|
||||
<span i18n>Example:</span> <code>%d.%m.%Y</code> -> 03.03.2026
|
||||
</ng-template>
|
||||
}
|
||||
</div>
|
||||
}
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label" i18n>Validation Regex</label>
|
||||
<input
|
||||
type="text"
|
||||
class="form-control font-monospace"
|
||||
[(ngModel)]="zone.validation_regex"
|
||||
placeholder="e.g. \d{2}\.\d{2}\.\d{4}"
|
||||
>
|
||||
</div>
|
||||
|
||||
<div class="text-muted small">
|
||||
{{ zone.x }}, {{ zone.y }} - {{ zone.width }}x{{ zone.height }}px
|
||||
</div>
|
||||
|
||||
<hr class="my-3" />
|
||||
<h6 i18n>Test</h6>
|
||||
@if (!previewDocId) {
|
||||
<p class="text-muted small mb-0" i18n>
|
||||
Load a document in the Settings tab to test this zone.
|
||||
</p>
|
||||
} @else {
|
||||
<button class="btn btn-sm btn-outline-secondary" (click)="testZone()" [disabled]="zoneTesting">
|
||||
@if (zoneTesting) {
|
||||
<span class="spinner-border spinner-border-sm me-1"></span>
|
||||
}
|
||||
<span i18n>Test this zone</span>
|
||||
</button>
|
||||
@if (zoneTestResult) {
|
||||
@if (zoneTestResult.error) {
|
||||
<div class="alert alert-warning py-2 mt-2 mb-0 small">{{ zoneTestResult.error }}</div>
|
||||
} @else {
|
||||
<dl class="row small mt-2 mb-0">
|
||||
<dt class="col-sm-4" i18n>OCR text</dt>
|
||||
<dd class="col-sm-8"><code>{{ zoneTestResult.raw_text || '(nothing detected)' }}</code></dd>
|
||||
<dt class="col-sm-4" i18n>Value</dt>
|
||||
<dd class="col-sm-8"><code>{{ zoneTestResult.value || '(empty)' }}</code></dd>
|
||||
@if (zoneTestResult.regex) {
|
||||
<dt class="col-sm-4" i18n>Validation</dt>
|
||||
<dd class="col-sm-8">
|
||||
@if (zoneTestResult.regex_match) {
|
||||
<span class="badge bg-success" i18n>Regex matches</span>
|
||||
} @else {
|
||||
<span class="badge bg-danger" i18n>Regex does not match</span>
|
||||
}
|
||||
</dd>
|
||||
}
|
||||
</dl>
|
||||
}
|
||||
}
|
||||
}
|
||||
} @else {
|
||||
<p class="text-muted" i18n>
|
||||
Select a zone from the Zones tab, or draw a rectangle on the document to create one.
|
||||
</p>
|
||||
}
|
||||
</ng-template>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<div [ngbNavOutlet]="nav" class="mt-3"></div>
|
||||
</div>
|
||||
|
||||
<!-- Right column: Document preview with zone overlay -->
|
||||
<div class="col-md-8">
|
||||
@if (pageImageUrl) {
|
||||
<div class="border" style="overflow: auto; max-height: 78vh;">
|
||||
<div class="position-relative d-inline-block" [style.width.%]="zoom * 100">
|
||||
<img
|
||||
#pageImage
|
||||
[src]="pageImageUrl"
|
||||
(load)="onImageLoad()"
|
||||
style="width: 100%; display: block;"
|
||||
[style.visibility]="imageLoaded ? 'visible' : 'hidden'"
|
||||
crossorigin="use-credentials"
|
||||
/>
|
||||
@if (imageLoaded) {
|
||||
<canvas
|
||||
#zoneCanvas
|
||||
class="position-absolute top-0 start-0"
|
||||
style="width: 100%; height: 100%; cursor: crosshair;"
|
||||
(mousedown)="onCanvasMouseDown($event)"
|
||||
(mousemove)="onCanvasMouseMove($event)"
|
||||
(mouseup)="onCanvasMouseUp($event)"
|
||||
></canvas>
|
||||
}
|
||||
@if (!imageLoaded) {
|
||||
<div class="d-flex justify-content-center p-5">
|
||||
<div class="spinner-border" role="status">
|
||||
<span class="visually-hidden" i18n>Loading page...</span>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
} @else {
|
||||
<div class="border rounded p-5 text-center text-muted">
|
||||
<i-bs name="file-earmark-image" width="48" height="48"></i-bs>
|
||||
<p class="mt-3" i18n>
|
||||
Enter a document ID and click "Load" to preview a page and draw extraction zones.
|
||||
</p>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
+3
@@ -0,0 +1,3 @@
|
||||
:host {
|
||||
display: block;
|
||||
}
|
||||
+990
@@ -0,0 +1,990 @@
|
||||
import { CommonModule } from '@angular/common'
|
||||
import {
|
||||
Component,
|
||||
ElementRef,
|
||||
HostListener,
|
||||
inject,
|
||||
OnDestroy,
|
||||
OnInit,
|
||||
ViewChild,
|
||||
} from '@angular/core'
|
||||
import { FormsModule } from '@angular/forms'
|
||||
import { ActivatedRoute, Router, RouterModule } from '@angular/router'
|
||||
import {
|
||||
NgbNavModule,
|
||||
NgbPopoverModule,
|
||||
NgbTypeaheadModule,
|
||||
NgbTypeaheadSelectItemEvent,
|
||||
} from '@ng-bootstrap/ng-bootstrap'
|
||||
import { NgSelectModule } from '@ng-select/ng-select'
|
||||
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
||||
import {
|
||||
catchError,
|
||||
debounceTime,
|
||||
distinctUntilChanged,
|
||||
map,
|
||||
Observable,
|
||||
of,
|
||||
Subject,
|
||||
switchMap,
|
||||
takeUntil,
|
||||
} from 'rxjs'
|
||||
import { SelectComponent } from 'src/app/components/common/input/select/select.component'
|
||||
import { SwitchComponent } from 'src/app/components/common/input/switch/switch.component'
|
||||
import { TextComponent } from 'src/app/components/common/input/text/text.component'
|
||||
import { PageHeaderComponent } from 'src/app/components/common/page-header/page-header.component'
|
||||
import { CustomField, CustomFieldDataType } from 'src/app/data/custom-field'
|
||||
import { Document } from 'src/app/data/document'
|
||||
import { DocumentType } from 'src/app/data/document-type'
|
||||
import {
|
||||
DATE_FORMAT_OPTIONS,
|
||||
DEFAULT_OCR_ZONE_LANGUAGE,
|
||||
DEFAULT_OCR_ZONE_TARGET,
|
||||
DEFAULT_OCR_ZONE_TRANSFORM,
|
||||
isOcrBuiltinTarget,
|
||||
OCR_BUILTIN_TARGETS,
|
||||
OCR_LANGUAGE_OPTIONS,
|
||||
OCR_ZONE_TARGET,
|
||||
OCR_ZONE_TRANSFORM,
|
||||
OcrBuiltinTarget,
|
||||
OcrTemplate,
|
||||
OcrTemplateZone,
|
||||
OcrZoneTestResult,
|
||||
TRANSFORM_OPTIONS,
|
||||
ZoneTestRequest,
|
||||
} from 'src/app/data/ocr-template'
|
||||
import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
|
||||
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
|
||||
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
|
||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
||||
import { OcrTemplateService } from 'src/app/services/rest/ocr-template.service'
|
||||
import { ToastService } from 'src/app/services/toast.service'
|
||||
import { OcrTemplateEditorZoneListComponent } from './ocr-template-editor-zone-list/ocr-template-editor-zone-list.component'
|
||||
import {
|
||||
DisplayRect,
|
||||
DrawingRect,
|
||||
findHandleAt,
|
||||
findZoneAt,
|
||||
getZoneDisplayRect,
|
||||
getZonePage,
|
||||
HANDLE_SIZE,
|
||||
isZoneOnPage,
|
||||
MoveStart,
|
||||
moveZone,
|
||||
ResizeHandle,
|
||||
resizeZone,
|
||||
sourceRectFromDrawing,
|
||||
} from './zone-geometry'
|
||||
|
||||
type ActiveTab = 'settings' | 'zones' | 'zone'
|
||||
type ZoneFieldSelection = OcrBuiltinTarget | number | null
|
||||
type CanvasInteraction =
|
||||
| { kind: 'idle' }
|
||||
| { kind: 'drawing'; rect: DrawingRect }
|
||||
| { kind: 'moving'; zoneIndex: number; start: MoveStart }
|
||||
| { kind: 'resizing'; zoneIndex: number; handle: ResizeHandle }
|
||||
|
||||
const CUSTOM_DATE_FORMAT_CHOICE = 'custom'
|
||||
const MIN_DRAWN_ZONE_SIZE = 10
|
||||
const NO_CANVAS_INTERACTION: CanvasInteraction = { kind: 'idle' }
|
||||
|
||||
@Component({
|
||||
selector: 'pngx-ocr-template-editor',
|
||||
standalone: true,
|
||||
imports: [
|
||||
PageHeaderComponent,
|
||||
TextComponent,
|
||||
SelectComponent,
|
||||
SwitchComponent,
|
||||
CommonModule,
|
||||
FormsModule,
|
||||
RouterModule,
|
||||
NgbNavModule,
|
||||
NgbPopoverModule,
|
||||
NgbTypeaheadModule,
|
||||
NgSelectModule,
|
||||
NgxBootstrapIconsModule,
|
||||
OcrTemplateEditorZoneListComponent,
|
||||
],
|
||||
templateUrl: './ocr-template-editor.component.html',
|
||||
styleUrls: ['./ocr-template-editor.component.scss'],
|
||||
})
|
||||
export class OcrTemplateEditorComponent implements OnInit, OnDestroy {
|
||||
private readonly route = inject(ActivatedRoute)
|
||||
private readonly router = inject(Router)
|
||||
private readonly templateService = inject(OcrTemplateService)
|
||||
private readonly customFieldsService = inject(CustomFieldsService)
|
||||
private readonly documentTypeService = inject(DocumentTypeService)
|
||||
private readonly correspondentService = inject(CorrespondentService)
|
||||
private readonly documentService = inject(DocumentService)
|
||||
private readonly toastService = inject(ToastService)
|
||||
private readonly destroy$ = new Subject<void>()
|
||||
private readonly customDateFormatZones = new WeakSet<OcrTemplateZone>()
|
||||
|
||||
@ViewChild('zoneCanvas') canvasRef: ElementRef<HTMLCanvasElement>
|
||||
@ViewChild('pageImage') imageRef: ElementRef<HTMLImageElement>
|
||||
|
||||
template: OcrTemplate = {
|
||||
id: null,
|
||||
name: '',
|
||||
document_type: null,
|
||||
sample_document: null,
|
||||
source_width: 0,
|
||||
source_height: 0,
|
||||
enabled: true,
|
||||
combine_formats: {},
|
||||
zones: [],
|
||||
}
|
||||
|
||||
customFields: CustomField[] = []
|
||||
documentTypes: DocumentType[] = []
|
||||
transformOptions = TRANSFORM_OPTIONS
|
||||
builtinTargets = OCR_BUILTIN_TARGETS
|
||||
dateFormatOptions = DATE_FORMAT_OPTIONS
|
||||
ocrLanguageOptions = OCR_LANGUAGE_OPTIONS
|
||||
dateTransform = OCR_ZONE_TRANSFORM.Date
|
||||
customDateFormatChoice = CUSTOM_DATE_FORMAT_CHOICE
|
||||
isNew = true
|
||||
saving = false
|
||||
|
||||
previewDocId: number | null = null
|
||||
previewPage = 0
|
||||
previewPageCount: number | null = null
|
||||
private pageCountForDoc: number | null = null
|
||||
pageImageUrl: string | null = null
|
||||
imageLoaded = false
|
||||
zoom = 1
|
||||
previewDocModel: Document | string = ''
|
||||
private correspondentNames = new Map<number, string>()
|
||||
|
||||
public get previewPageDisplay(): number {
|
||||
return this.previewPage + 1
|
||||
}
|
||||
|
||||
public set previewPageDisplay(value: number) {
|
||||
this.goToPage(value - 1)
|
||||
}
|
||||
|
||||
activeTab: ActiveTab = 'settings'
|
||||
|
||||
selectedZoneIndex: number | null = null
|
||||
private canvasInteraction: CanvasInteraction = NO_CANVAS_INTERACTION
|
||||
|
||||
zoneTestResult: OcrZoneTestResult | null = null
|
||||
zoneTesting = false
|
||||
|
||||
showQuickCreate = false
|
||||
quickCreateName = ''
|
||||
quickCreateType = CustomFieldDataType.String
|
||||
quickCreateForZoneIndex: number | null = null
|
||||
quickCreateTypes = [
|
||||
{ id: CustomFieldDataType.String, name: $localize`String` },
|
||||
{ id: CustomFieldDataType.Integer, name: $localize`Integer` },
|
||||
{ id: CustomFieldDataType.Float, name: $localize`Float` },
|
||||
{ id: CustomFieldDataType.Date, name: $localize`Date` },
|
||||
{ id: CustomFieldDataType.Monetary, name: $localize`Monetary` },
|
||||
{ id: CustomFieldDataType.Boolean, name: $localize`Boolean` },
|
||||
{ id: CustomFieldDataType.Url, name: $localize`URL` },
|
||||
{ id: CustomFieldDataType.LongText, name: $localize`Long Text` },
|
||||
]
|
||||
|
||||
get selectedZone(): OcrTemplateZone | null {
|
||||
return this.selectedZoneIndex !== null
|
||||
? (this.template.zones[this.selectedZoneIndex] ?? null)
|
||||
: null
|
||||
}
|
||||
|
||||
get pageTitle(): string {
|
||||
return this.isNew
|
||||
? $localize`New OCR Template`
|
||||
: $localize`Edit OCR Template`
|
||||
}
|
||||
|
||||
ngOnInit() {
|
||||
this.customFieldsService
|
||||
.listAll()
|
||||
.pipe(takeUntil(this.destroy$))
|
||||
.subscribe((r) => (this.customFields = r.results))
|
||||
|
||||
this.documentTypeService
|
||||
.listAll()
|
||||
.pipe(takeUntil(this.destroy$))
|
||||
.subscribe((r) => (this.documentTypes = r.results))
|
||||
|
||||
this.correspondentService
|
||||
.listAll()
|
||||
.pipe(takeUntil(this.destroy$))
|
||||
.subscribe((r) => {
|
||||
this.correspondentNames = new Map(r.results.map((c) => [c.id, c.name]))
|
||||
})
|
||||
|
||||
const id = this.route.snapshot.paramMap.get('id')
|
||||
if (id && id !== 'new') {
|
||||
this.isNew = false
|
||||
this.templateService
|
||||
.get(parseInt(id))
|
||||
.pipe(takeUntil(this.destroy$))
|
||||
.subscribe((t) => {
|
||||
this.template = t
|
||||
this.template.combine_formats ??= {}
|
||||
if (t.sample_document) {
|
||||
this.previewDocId = t.sample_document
|
||||
this.loadPreview()
|
||||
}
|
||||
})
|
||||
} else {
|
||||
const qp = this.route.snapshot.queryParams
|
||||
if (qp['document_type']) {
|
||||
this.template.document_type = parseInt(qp['document_type'])
|
||||
}
|
||||
if (qp['sample_document']) {
|
||||
const docId = parseInt(qp['sample_document'])
|
||||
this.template.sample_document = docId
|
||||
this.previewDocId = docId
|
||||
this.loadPreview()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
searchDocuments = (text$: Observable<string>): Observable<Document[]> =>
|
||||
text$.pipe(
|
||||
debounceTime(250),
|
||||
distinctUntilChanged(),
|
||||
switchMap((term) => {
|
||||
if (!term || term.trim().length < 2) return of([])
|
||||
const params: { title__icontains: string; document_type__id?: number } =
|
||||
{ title__icontains: term.trim() }
|
||||
if (this.template.document_type) {
|
||||
params['document_type__id'] = this.template.document_type
|
||||
}
|
||||
return this.documentService.list(1, 10, 'created', true, params).pipe(
|
||||
map((r) => r.results),
|
||||
catchError(() => of([]))
|
||||
)
|
||||
})
|
||||
)
|
||||
|
||||
documentFormatter = (doc: Document | string): string => {
|
||||
if (typeof doc === 'string') return doc
|
||||
const corr = doc.correspondent
|
||||
? this.correspondentNames.get(doc.correspondent)
|
||||
: null
|
||||
return corr
|
||||
? `#${doc.id} ${doc.title} (${corr})`
|
||||
: `#${doc.id} ${doc.title}`
|
||||
}
|
||||
|
||||
onPreviewDocSelected(event: NgbTypeaheadSelectItemEvent<Document>) {
|
||||
event.preventDefault()
|
||||
const doc: Document = event.item
|
||||
this.previewDocModel = doc
|
||||
this.previewDocId = doc.id
|
||||
if (!this.template.document_type && doc.document_type) {
|
||||
this.template.document_type = doc.document_type
|
||||
}
|
||||
this.previewPage = 0
|
||||
this.loadPreview()
|
||||
}
|
||||
|
||||
clearPreviewDoc() {
|
||||
this.previewDocModel = ''
|
||||
this.previewDocId = null
|
||||
this.previewPageCount = null
|
||||
this.pageCountForDoc = null
|
||||
this.previewPage = 0
|
||||
this.pageImageUrl = null
|
||||
this.imageLoaded = false
|
||||
}
|
||||
|
||||
loadPreview() {
|
||||
if (!this.previewDocId) return
|
||||
if (this.pageCountForDoc !== this.previewDocId) {
|
||||
this.pageCountForDoc = this.previewDocId
|
||||
this.previewPageCount = null
|
||||
this.documentService
|
||||
.get(this.previewDocId)
|
||||
.pipe(takeUntil(this.destroy$))
|
||||
.subscribe({
|
||||
next: (doc) => {
|
||||
this.previewPageCount = doc?.page_count ?? null
|
||||
if (doc && !this.previewDocModel) this.previewDocModel = doc
|
||||
},
|
||||
error: () => (this.previewPageCount = null),
|
||||
})
|
||||
}
|
||||
this.pageImageUrl = this.templateService.getPageImageUrl(
|
||||
this.previewDocId,
|
||||
this.previewPage
|
||||
)
|
||||
this.imageLoaded = false
|
||||
}
|
||||
|
||||
goToPage(page: number) {
|
||||
if (!Number.isFinite(page)) return
|
||||
const max = this.previewPageCount ? this.previewPageCount - 1 : page
|
||||
const clamped = Math.max(0, Math.min(page, max))
|
||||
if (clamped === this.previewPage) return
|
||||
this.previewPage = clamped
|
||||
this.loadPreview()
|
||||
}
|
||||
|
||||
prevPage() {
|
||||
this.goToPage(this.previewPage - 1)
|
||||
}
|
||||
|
||||
nextPage() {
|
||||
this.goToPage(this.previewPage + 1)
|
||||
}
|
||||
|
||||
zoomIn() {
|
||||
this.zoom = Math.min(4, Math.round((this.zoom + 0.25) * 100) / 100)
|
||||
this.afterZoom()
|
||||
}
|
||||
|
||||
zoomOut() {
|
||||
this.zoom = Math.max(0.5, Math.round((this.zoom - 0.25) * 100) / 100)
|
||||
this.afterZoom()
|
||||
}
|
||||
|
||||
resetZoom() {
|
||||
this.zoom = 1
|
||||
this.afterZoom()
|
||||
}
|
||||
|
||||
private afterZoom() {
|
||||
// Defer so the wrapper reflows to the new width before the canvas resizes.
|
||||
setTimeout(() => this.redrawCanvas())
|
||||
}
|
||||
|
||||
zonePage(zone: OcrTemplateZone): number {
|
||||
return getZonePage(zone, this.previewPage, this.previewPageCount)
|
||||
}
|
||||
|
||||
private isOnCurrentPage(zone: OcrTemplateZone): boolean {
|
||||
return isZoneOnPage(zone, this.previewPage, this.previewPageCount)
|
||||
}
|
||||
|
||||
onImageLoad() {
|
||||
this.imageLoaded = true
|
||||
const img = this.imageRef.nativeElement
|
||||
this.template.source_width = img.naturalWidth
|
||||
this.template.source_height = img.naturalHeight
|
||||
// The canvas only exists after @if(imageLoaded) renders, so defer the draw.
|
||||
setTimeout(() => this.redrawCanvas())
|
||||
}
|
||||
|
||||
onCanvasMouseDown(event: MouseEvent) {
|
||||
const rect = this.canvasRef.nativeElement.getBoundingClientRect()
|
||||
const x = event.clientX - rect.left
|
||||
const y = event.clientY - rect.top
|
||||
|
||||
if (this.selectedZoneIndex !== null) {
|
||||
const handle = this.findHandleAt({ x, y }, this.selectedZoneIndex)
|
||||
if (handle) {
|
||||
this.canvasInteraction = {
|
||||
kind: 'resizing',
|
||||
zoneIndex: this.selectedZoneIndex,
|
||||
handle,
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
const clickedIdx = this.findZoneAt({ x, y })
|
||||
if (clickedIdx !== null && !event.shiftKey) {
|
||||
this.selectZone(clickedIdx)
|
||||
const zone = this.template.zones[clickedIdx]
|
||||
this.canvasInteraction = {
|
||||
kind: 'moving',
|
||||
zoneIndex: clickedIdx,
|
||||
start: { mouseX: x, mouseY: y, zoneX: zone.x, zoneY: zone.y },
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Shift+click or click on empty area starts a new zone.
|
||||
this.canvasInteraction = {
|
||||
kind: 'drawing',
|
||||
rect: { startX: x, startY: y, endX: x, endY: y },
|
||||
}
|
||||
this.selectedZoneIndex = null
|
||||
}
|
||||
|
||||
onCanvasMouseMove(event: MouseEvent) {
|
||||
const rect = this.canvasRef.nativeElement.getBoundingClientRect()
|
||||
const mx = event.clientX - rect.left
|
||||
const my = event.clientY - rect.top
|
||||
|
||||
if (this.canvasInteraction.kind === 'resizing') {
|
||||
this.applyResize(
|
||||
this.canvasInteraction.zoneIndex,
|
||||
this.canvasInteraction.handle,
|
||||
mx,
|
||||
my
|
||||
)
|
||||
this.redrawCanvas()
|
||||
return
|
||||
}
|
||||
|
||||
if (this.canvasInteraction.kind === 'moving') {
|
||||
moveZone(
|
||||
this.template.zones[this.canvasInteraction.zoneIndex],
|
||||
{ x: mx, y: my },
|
||||
this.canvasInteraction.start,
|
||||
this.canvasSize(),
|
||||
this.imageNaturalSize()
|
||||
)
|
||||
this.redrawCanvas()
|
||||
return
|
||||
}
|
||||
|
||||
if (this.canvasInteraction.kind === 'drawing') {
|
||||
this.canvasInteraction.rect.endX = mx
|
||||
this.canvasInteraction.rect.endY = my
|
||||
this.redrawCanvas()
|
||||
return
|
||||
}
|
||||
|
||||
// Cursor feedback: resize handle > move (over a zone) > crosshair.
|
||||
const canvas = this.canvasRef.nativeElement
|
||||
if (this.selectedZoneIndex !== null) {
|
||||
const handle = this.findHandleAt({ x: mx, y: my }, this.selectedZoneIndex)
|
||||
if (handle) {
|
||||
const cursorMap: Record<ResizeHandle, string> = {
|
||||
nw: 'nw-resize',
|
||||
ne: 'ne-resize',
|
||||
sw: 'sw-resize',
|
||||
se: 'se-resize',
|
||||
n: 'n-resize',
|
||||
s: 's-resize',
|
||||
w: 'w-resize',
|
||||
e: 'e-resize',
|
||||
}
|
||||
canvas.style.cursor = cursorMap[handle] || 'crosshair'
|
||||
return
|
||||
}
|
||||
}
|
||||
canvas.style.cursor =
|
||||
this.findZoneAt({ x: mx, y: my }) !== null ? 'move' : 'crosshair'
|
||||
}
|
||||
|
||||
onCanvasMouseUp(_event: MouseEvent) {
|
||||
if (
|
||||
this.canvasInteraction.kind === 'moving' ||
|
||||
this.canvasInteraction.kind === 'resizing'
|
||||
) {
|
||||
this.stopCanvasInteraction()
|
||||
return
|
||||
}
|
||||
|
||||
if (this.canvasInteraction.kind !== 'drawing') return
|
||||
const drawingRect = this.canvasInteraction.rect
|
||||
this.stopCanvasInteraction()
|
||||
|
||||
const rect = sourceRectFromDrawing(
|
||||
drawingRect,
|
||||
this.canvasSize(),
|
||||
this.imageNaturalSize()
|
||||
)
|
||||
|
||||
// Ignore tiny accidental clicks.
|
||||
if (rect.w < MIN_DRAWN_ZONE_SIZE || rect.h < MIN_DRAWN_ZONE_SIZE) {
|
||||
this.redrawCanvas()
|
||||
return
|
||||
}
|
||||
|
||||
this.template.zones.push(this.createZoneFromRect(rect))
|
||||
this.selectZone(this.template.zones.length - 1)
|
||||
}
|
||||
|
||||
private createZoneFromRect(rect: DisplayRect): OcrTemplateZone {
|
||||
const imageSize = this.imageNaturalSize()
|
||||
return {
|
||||
name: `Zone ${this.template.zones.length + 1}`,
|
||||
target: DEFAULT_OCR_ZONE_TARGET,
|
||||
custom_field: this.defaultCustomFieldId(),
|
||||
x: rect.x,
|
||||
y: rect.y,
|
||||
width: rect.w,
|
||||
height: rect.h,
|
||||
page: this.previewPageDisplay,
|
||||
ocr_language: DEFAULT_OCR_ZONE_LANGUAGE,
|
||||
transform: DEFAULT_OCR_ZONE_TRANSFORM,
|
||||
date_format: '',
|
||||
validation_regex: '',
|
||||
order: this.template.zones.length,
|
||||
zone_source_width: imageSize.width,
|
||||
zone_source_height: imageSize.height,
|
||||
}
|
||||
}
|
||||
|
||||
private defaultCustomFieldId(): number | null {
|
||||
return this.customFields[0]?.id ?? null
|
||||
}
|
||||
|
||||
@HostListener('document:mouseup')
|
||||
onDocumentMouseUp() {
|
||||
if (this.canvasInteraction.kind === 'idle') return
|
||||
this.stopCanvasInteraction()
|
||||
this.redrawCanvas()
|
||||
}
|
||||
|
||||
private stopCanvasInteraction() {
|
||||
this.canvasInteraction = NO_CANVAS_INTERACTION
|
||||
}
|
||||
|
||||
private drawingRect(): DrawingRect | null {
|
||||
return this.canvasInteraction.kind === 'drawing'
|
||||
? this.canvasInteraction.rect
|
||||
: null
|
||||
}
|
||||
|
||||
private getZoneDisplayRect(zoneIdx: number): DisplayRect | null {
|
||||
const canvas = this.canvasRef?.nativeElement
|
||||
const img = this.imageRef?.nativeElement
|
||||
if (!canvas || !img || !img.naturalWidth) return null
|
||||
const zone = this.template.zones[zoneIdx]
|
||||
if (!zone) return null
|
||||
if (!this.isOnCurrentPage(zone)) return null
|
||||
return getZoneDisplayRect(zone, this.canvasSize(), this.imageNaturalSize())
|
||||
}
|
||||
|
||||
private findHandleAt(
|
||||
point: { x: number; y: number },
|
||||
zoneIdx: number
|
||||
): ResizeHandle | null {
|
||||
const r = this.getZoneDisplayRect(zoneIdx)
|
||||
if (!r) return null
|
||||
return findHandleAt(point, r)
|
||||
}
|
||||
|
||||
private applyResize(
|
||||
zoneIndex: number,
|
||||
handle: ResizeHandle,
|
||||
mx: number,
|
||||
my: number
|
||||
) {
|
||||
const zone = this.template.zones[zoneIndex]
|
||||
if (!zone) return
|
||||
resizeZone(
|
||||
zone,
|
||||
handle,
|
||||
{ x: mx, y: my },
|
||||
this.canvasSize(),
|
||||
this.imageNaturalSize()
|
||||
)
|
||||
}
|
||||
|
||||
private findZoneAt(point: { x: number; y: number }): number | null {
|
||||
const img = this.imageRef.nativeElement
|
||||
if (!img.naturalWidth) return null
|
||||
|
||||
return findZoneAt(
|
||||
point,
|
||||
this.template.zones,
|
||||
this.previewPage,
|
||||
this.previewPageCount,
|
||||
this.canvasSize(),
|
||||
this.imageNaturalSize()
|
||||
)
|
||||
}
|
||||
|
||||
redrawCanvas() {
|
||||
if (!this.canvasRef || !this.imageRef) return
|
||||
const canvas = this.canvasRef.nativeElement
|
||||
const img = this.imageRef.nativeElement
|
||||
const ctx = canvas.getContext('2d')
|
||||
|
||||
canvas.width = img.clientWidth
|
||||
canvas.height = img.clientHeight
|
||||
|
||||
ctx.clearRect(0, 0, canvas.width, canvas.height)
|
||||
|
||||
const colors = [
|
||||
'#4f8ff7',
|
||||
'#ff6b6b',
|
||||
'#51cf66',
|
||||
'#ffd43b',
|
||||
'#cc5de8',
|
||||
'#ff922b',
|
||||
'#20c997',
|
||||
'#e599f7',
|
||||
]
|
||||
|
||||
this.template.zones.forEach((zone, idx) => {
|
||||
if (!this.isOnCurrentPage(zone)) return
|
||||
const color = colors[idx % colors.length]
|
||||
const srcW = zone.zone_source_width || img.naturalWidth
|
||||
const srcH = zone.zone_source_height || img.naturalHeight
|
||||
const scaleX = canvas.width / srcW
|
||||
const scaleY = canvas.height / srcH
|
||||
const x = zone.x * scaleX
|
||||
const y = zone.y * scaleY
|
||||
const w = zone.width * scaleX
|
||||
const h = zone.height * scaleY
|
||||
|
||||
ctx.strokeStyle = color
|
||||
ctx.lineWidth = idx === this.selectedZoneIndex ? 3 : 2
|
||||
ctx.strokeRect(x, y, w, h)
|
||||
|
||||
ctx.fillStyle = color + '20'
|
||||
ctx.fillRect(x, y, w, h)
|
||||
|
||||
const label = zone.name || `Zone ${idx + 1}`
|
||||
ctx.font = '12px sans-serif'
|
||||
ctx.textBaseline = 'middle'
|
||||
const padX = 6
|
||||
const pillH = 17
|
||||
const pillW = ctx.measureText(label).width + padX * 2
|
||||
const pillX = x
|
||||
const pillY = Math.max(0, y - pillH - 2)
|
||||
const r = 4
|
||||
ctx.fillStyle = color
|
||||
ctx.beginPath()
|
||||
ctx.moveTo(pillX + r, pillY)
|
||||
ctx.arcTo(pillX + pillW, pillY, pillX + pillW, pillY + pillH, r)
|
||||
ctx.arcTo(pillX + pillW, pillY + pillH, pillX, pillY + pillH, r)
|
||||
ctx.arcTo(pillX, pillY + pillH, pillX, pillY, r)
|
||||
ctx.arcTo(pillX, pillY, pillX + pillW, pillY, r)
|
||||
ctx.closePath()
|
||||
ctx.fill()
|
||||
ctx.fillStyle = '#ffffff'
|
||||
ctx.fillText(label, pillX + padX, pillY + pillH / 2 + 0.5)
|
||||
ctx.textBaseline = 'alphabetic'
|
||||
|
||||
if (idx === this.selectedZoneIndex) {
|
||||
ctx.fillStyle = color
|
||||
const handles = [
|
||||
[x, y],
|
||||
[x + w / 2, y],
|
||||
[x + w, y],
|
||||
[x, y + h / 2],
|
||||
[x + w, y + h / 2],
|
||||
[x, y + h],
|
||||
[x + w / 2, y + h],
|
||||
[x + w, y + h],
|
||||
]
|
||||
for (const [hx, hy] of handles) {
|
||||
ctx.fillRect(
|
||||
hx - HANDLE_SIZE / 2,
|
||||
hy - HANDLE_SIZE / 2,
|
||||
HANDLE_SIZE,
|
||||
HANDLE_SIZE
|
||||
)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
const drawingRect = this.drawingRect()
|
||||
if (drawingRect) {
|
||||
const cw = drawingRect.endX - drawingRect.startX
|
||||
const ch = drawingRect.endY - drawingRect.startY
|
||||
ctx.fillStyle = 'rgba(105, 219, 124, 0.25)'
|
||||
ctx.fillRect(drawingRect.startX, drawingRect.startY, cw, ch)
|
||||
ctx.strokeStyle = '#69db7c'
|
||||
ctx.lineWidth = 2
|
||||
ctx.setLineDash([5, 5])
|
||||
ctx.strokeRect(drawingRect.startX, drawingRect.startY, cw, ch)
|
||||
ctx.setLineDash([])
|
||||
}
|
||||
}
|
||||
|
||||
private canvasSize() {
|
||||
const canvas = this.canvasRef.nativeElement
|
||||
return { width: canvas.width, height: canvas.height }
|
||||
}
|
||||
|
||||
private imageNaturalSize() {
|
||||
const img = this.imageRef.nativeElement
|
||||
return { width: img.naturalWidth, height: img.naturalHeight }
|
||||
}
|
||||
|
||||
removeZone(index: number) {
|
||||
this.template.zones.splice(index, 1)
|
||||
if (this.selectedZoneIndex === index) {
|
||||
this.selectedZoneIndex = null
|
||||
} else if (this.selectedZoneIndex > index) {
|
||||
this.selectedZoneIndex--
|
||||
}
|
||||
this.redrawCanvas()
|
||||
}
|
||||
|
||||
selectZone(index: number) {
|
||||
this.selectedZoneIndex = index
|
||||
this.activeTab = 'zone'
|
||||
this.zoneTestResult = null
|
||||
const zone = this.template.zones[index]
|
||||
if (zone) {
|
||||
this.seedCombineDefault(zone)
|
||||
this.goToPage(this.zonePage(zone) - 1)
|
||||
}
|
||||
this.redrawCanvas()
|
||||
}
|
||||
|
||||
testZone() {
|
||||
const zone = this.selectedZone
|
||||
if (!zone || !this.previewDocId) return
|
||||
this.zoneTesting = true
|
||||
this.zoneTestResult = null
|
||||
this.templateService
|
||||
.testZone(this.previewDocId, this.zoneTestRequest(zone))
|
||||
.pipe(takeUntil(this.destroy$))
|
||||
.subscribe({
|
||||
next: (res) => {
|
||||
this.zoneTestResult = res
|
||||
this.zoneTesting = false
|
||||
},
|
||||
error: (err) => {
|
||||
this.zoneTestResult = {
|
||||
error: err.error?.error || $localize`Test failed`,
|
||||
}
|
||||
this.zoneTesting = false
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
private zoneTestRequest(zone: OcrTemplateZone): ZoneTestRequest {
|
||||
return {
|
||||
name: zone.name,
|
||||
x: zone.x,
|
||||
y: zone.y,
|
||||
width: zone.width,
|
||||
height: zone.height,
|
||||
page: zone.page ?? 1,
|
||||
ocr_language: zone.ocr_language,
|
||||
transform: zone.transform,
|
||||
date_format: zone.date_format,
|
||||
validation_regex: zone.validation_regex,
|
||||
zone_source_width: zone.zone_source_width,
|
||||
zone_source_height: zone.zone_source_height,
|
||||
}
|
||||
}
|
||||
|
||||
deleteSelectedZone() {
|
||||
if (this.selectedZoneIndex === null) return
|
||||
this.removeZone(this.selectedZoneIndex)
|
||||
this.activeTab = 'zones'
|
||||
}
|
||||
|
||||
save() {
|
||||
this.saving = true
|
||||
this.pruneCombineFormats()
|
||||
this.template.sample_document = this.previewDocId
|
||||
const obs = this.isNew
|
||||
? this.templateService.create(this.template)
|
||||
: this.templateService.update(this.template)
|
||||
|
||||
obs.pipe(takeUntil(this.destroy$)).subscribe({
|
||||
next: (saved) => {
|
||||
const idx = this.selectedZoneIndex
|
||||
this.template = saved
|
||||
this.isNew = false
|
||||
this.selectedZoneIndex = idx
|
||||
this.saving = false
|
||||
this.toastService.showInfo($localize`OCR template saved.`)
|
||||
this.redrawCanvas()
|
||||
},
|
||||
error: (e) => {
|
||||
this.saving = false
|
||||
this.toastService.showError($localize`Error saving OCR template.`, e)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
private ocrLangCache = new WeakMap<
|
||||
OcrTemplateZone,
|
||||
{ src: string; arr: string[] }
|
||||
>()
|
||||
|
||||
ocrLanguageArray(zone: OcrTemplateZone): string[] {
|
||||
const src = zone.ocr_language || ''
|
||||
const cached = this.ocrLangCache.get(zone)
|
||||
if (cached && cached.src === src) return cached.arr
|
||||
const arr = src ? src.split('+').filter(Boolean) : []
|
||||
this.ocrLangCache.set(zone, { src, arr })
|
||||
return arr
|
||||
}
|
||||
|
||||
setOcrLanguages(zone: OcrTemplateZone, langs: string[]) {
|
||||
zone.ocr_language = (langs || []).join('+')
|
||||
this.ocrLangCache.set(zone, {
|
||||
src: zone.ocr_language,
|
||||
arr: langs ? [...langs] : [],
|
||||
})
|
||||
}
|
||||
|
||||
getCustomFieldName(id: number): string {
|
||||
const cf = this.customFields.find((f) => f.id === id)
|
||||
return cf ? cf.name : `Field #${id}`
|
||||
}
|
||||
|
||||
/** Value bound to the field select: a built-in id string or a custom-field id. */
|
||||
zoneFieldValue(zone: OcrTemplateZone): ZoneFieldSelection {
|
||||
const target = zone.target || DEFAULT_OCR_ZONE_TARGET
|
||||
return target === OCR_ZONE_TARGET.CustomField ? zone.custom_field : target
|
||||
}
|
||||
|
||||
setZoneField(zone: OcrTemplateZone, value: ZoneFieldSelection) {
|
||||
if (isOcrBuiltinTarget(value)) {
|
||||
zone.target = value
|
||||
zone.custom_field = null
|
||||
} else {
|
||||
zone.target = OCR_ZONE_TARGET.CustomField
|
||||
zone.custom_field = typeof value === 'number' ? value : null
|
||||
}
|
||||
this.seedCombineDefault(zone)
|
||||
}
|
||||
|
||||
fieldKeyFor(zone: OcrTemplateZone): string | null {
|
||||
const v = this.zoneFieldValue(zone)
|
||||
return v === null || v === undefined ? null : String(v)
|
||||
}
|
||||
|
||||
zonesForField(zone: OcrTemplateZone): OcrTemplateZone[] {
|
||||
const key = this.fieldKeyFor(zone)
|
||||
if (!key) return []
|
||||
return this.template.zones.filter((z) => this.fieldKeyFor(z) === key)
|
||||
}
|
||||
|
||||
isFieldShared(zone: OcrTemplateZone): boolean {
|
||||
return this.zonesForField(zone).length > 1
|
||||
}
|
||||
|
||||
getCombineFormat(zone: OcrTemplateZone): string {
|
||||
const key = this.fieldKeyFor(zone)
|
||||
return (key && this.template.combine_formats?.[key]) || ''
|
||||
}
|
||||
|
||||
setCombineFormat(zone: OcrTemplateZone, value: string) {
|
||||
const key = this.fieldKeyFor(zone)
|
||||
if (!key) return
|
||||
this.template.combine_formats ??= {}
|
||||
this.template.combine_formats[key] = value
|
||||
}
|
||||
|
||||
insertCombineToken(zone: OcrTemplateZone, tokenZone: OcrTemplateZone) {
|
||||
const token = `{${tokenZone.name}}`
|
||||
const current = this.getCombineFormat(zone)
|
||||
const sep = current && !current.endsWith(' ') ? ' ' : ''
|
||||
this.setCombineFormat(zone, `${current}${sep}${token}`)
|
||||
}
|
||||
|
||||
private seedCombineDefault(zone: OcrTemplateZone) {
|
||||
const key = this.fieldKeyFor(zone)
|
||||
if (!key) return
|
||||
const shared = this.zonesForField(zone)
|
||||
if (shared.length <= 1) return
|
||||
this.template.combine_formats ??= {}
|
||||
if (!this.template.combine_formats[key]) {
|
||||
this.template.combine_formats[key] = shared
|
||||
.map((z) => `{${z.name}}`)
|
||||
.join(' ')
|
||||
}
|
||||
}
|
||||
|
||||
private pruneCombineFormats() {
|
||||
const formats = this.template.combine_formats
|
||||
if (!formats) return
|
||||
const counts = new Map<string, number>()
|
||||
for (const z of this.template.zones) {
|
||||
const key = this.fieldKeyFor(z)
|
||||
if (key) counts.set(key, (counts.get(key) ?? 0) + 1)
|
||||
}
|
||||
for (const key of Object.keys(formats)) {
|
||||
if ((counts.get(key) ?? 0) <= 1) delete formats[key]
|
||||
}
|
||||
}
|
||||
|
||||
/** Value bound to the date-format select: a preset, '' (auto), or 'custom'. */
|
||||
dateFormatChoice(zone: OcrTemplateZone): string {
|
||||
return this.usesCustomDateFormat(zone)
|
||||
? CUSTOM_DATE_FORMAT_CHOICE
|
||||
: zone.date_format || ''
|
||||
}
|
||||
|
||||
setDateFormatChoice(zone: OcrTemplateZone, value: string) {
|
||||
if (value === CUSTOM_DATE_FORMAT_CHOICE) {
|
||||
this.customDateFormatZones.add(zone)
|
||||
zone.date_format ||= ''
|
||||
} else {
|
||||
this.customDateFormatZones.delete(zone)
|
||||
zone.date_format = value
|
||||
}
|
||||
}
|
||||
|
||||
usesCustomDateFormat(zone: OcrTemplateZone): boolean {
|
||||
return (
|
||||
this.customDateFormatZones.has(zone) ||
|
||||
(!!zone.date_format &&
|
||||
!this.dateFormatOptions.some(
|
||||
(option) => option.id === zone.date_format
|
||||
))
|
||||
)
|
||||
}
|
||||
|
||||
getZoneTargetName(zone: OcrTemplateZone): string {
|
||||
const target = zone.target || DEFAULT_OCR_ZONE_TARGET
|
||||
if (target === OCR_ZONE_TARGET.CustomField) {
|
||||
return zone.custom_field
|
||||
? this.getCustomFieldName(zone.custom_field)
|
||||
: $localize`(no field)`
|
||||
}
|
||||
return this.builtinTargets.find((t) => t.id === target)?.name ?? target
|
||||
}
|
||||
|
||||
getDocumentTypeName(id: number): string {
|
||||
const dt = this.documentTypes.find((d) => d.id === id)
|
||||
return dt ? dt.name : `Type #${id}`
|
||||
}
|
||||
|
||||
openQuickCreate(zoneIndex: number | null) {
|
||||
if (zoneIndex === null) return
|
||||
this.quickCreateForZoneIndex = zoneIndex
|
||||
this.quickCreateName = this.template.zones[zoneIndex]?.name || ''
|
||||
this.quickCreateType = CustomFieldDataType.String
|
||||
this.showQuickCreate = true
|
||||
}
|
||||
|
||||
cancelQuickCreate() {
|
||||
this.showQuickCreate = false
|
||||
this.quickCreateForZoneIndex = null
|
||||
}
|
||||
|
||||
submitQuickCreate() {
|
||||
if (!this.quickCreateName.trim()) return
|
||||
|
||||
this.templateService
|
||||
.quickCreateField(this.quickCreateName.trim(), this.quickCreateType)
|
||||
.pipe(takeUntil(this.destroy$))
|
||||
.subscribe({
|
||||
next: (result) => {
|
||||
this.customFieldsService.clearCache()
|
||||
this.customFieldsService
|
||||
.listAll()
|
||||
.pipe(takeUntil(this.destroy$))
|
||||
.subscribe((r) => {
|
||||
this.customFields = r.results
|
||||
if (this.quickCreateForZoneIndex !== null) {
|
||||
this.template.zones[this.quickCreateForZoneIndex].custom_field =
|
||||
result.id
|
||||
this.template.zones[this.quickCreateForZoneIndex].target =
|
||||
OCR_ZONE_TARGET.CustomField
|
||||
}
|
||||
this.showQuickCreate = false
|
||||
this.quickCreateForZoneIndex = null
|
||||
})
|
||||
},
|
||||
error: (err) => {
|
||||
this.toastService.showError(
|
||||
$localize`Failed to create custom field.`,
|
||||
err
|
||||
)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
ngOnDestroy() {
|
||||
this.destroy$.next()
|
||||
this.destroy$.complete()
|
||||
}
|
||||
}
|
||||
+140
@@ -0,0 +1,140 @@
|
||||
import { OcrTemplateZone } from 'src/app/data/ocr-template'
|
||||
import {
|
||||
findHandleAt,
|
||||
findZoneAt,
|
||||
getZoneDisplayRect,
|
||||
getZonePage,
|
||||
isZoneOnPage,
|
||||
moveZone,
|
||||
resizeZone,
|
||||
sourceRectFromDrawing,
|
||||
} from './zone-geometry'
|
||||
|
||||
function zone(overrides: Partial<OcrTemplateZone> = {}): OcrTemplateZone {
|
||||
return {
|
||||
name: 'Zone',
|
||||
target: 'custom_field',
|
||||
custom_field: 1,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 300,
|
||||
height: 400,
|
||||
page: 1,
|
||||
ocr_language: 'eng',
|
||||
transform: 'strip',
|
||||
validation_regex: '',
|
||||
order: 0,
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
describe('OCR template editor geometry', () => {
|
||||
it('normalizes zone pages', () => {
|
||||
expect(getZonePage(zone({ page: 2 }), 0, 5)).toBe(2)
|
||||
expect(getZonePage(zone({ page: -1 }), 0, 5)).toBe(5)
|
||||
expect(getZonePage(zone({ page: -1 }), 2, null)).toBe(3)
|
||||
expect(getZonePage(zone({ page: 0 }), 0, 5)).toBe(1)
|
||||
expect(getZonePage(zone({ page: undefined }), 0, 5)).toBe(1)
|
||||
})
|
||||
|
||||
it('checks whether a zone is on the current preview page', () => {
|
||||
expect(isZoneOnPage(zone({ page: 2 }), 1, 5)).toBe(true)
|
||||
expect(isZoneOnPage(zone({ page: 2 }), 0, 5)).toBe(false)
|
||||
expect(isZoneOnPage(zone({ page: -1 }), 4, 5)).toBe(true)
|
||||
})
|
||||
|
||||
it('scales source coordinates to canvas display coordinates', () => {
|
||||
expect(
|
||||
getZoneDisplayRect(
|
||||
zone({ x: 100, y: 200, width: 300, height: 400 }),
|
||||
{ width: 500, height: 1000 },
|
||||
{ width: 1000, height: 2000 }
|
||||
)
|
||||
).toEqual({ x: 50, y: 100, w: 150, h: 200 })
|
||||
})
|
||||
|
||||
it('uses per-zone source dimensions when present', () => {
|
||||
expect(
|
||||
getZoneDisplayRect(
|
||||
zone({
|
||||
x: 100,
|
||||
y: 100,
|
||||
width: 100,
|
||||
height: 100,
|
||||
zone_source_width: 1000,
|
||||
zone_source_height: 1000,
|
||||
}),
|
||||
{ width: 500, height: 500 },
|
||||
{ width: 2000, height: 2000 }
|
||||
)
|
||||
).toEqual({ x: 50, y: 50, w: 50, h: 50 })
|
||||
})
|
||||
|
||||
it('finds zones from topmost to bottommost on the current page', () => {
|
||||
const zones = [
|
||||
zone({ name: 'first', x: 0, y: 0, width: 100, height: 100, page: 1 }),
|
||||
zone({ name: 'second', x: 0, y: 0, width: 50, height: 50, page: 1 }),
|
||||
zone({ name: 'third', x: 0, y: 0, width: 50, height: 50, page: 2 }),
|
||||
]
|
||||
|
||||
expect(
|
||||
findZoneAt(
|
||||
{ x: 25, y: 25 },
|
||||
zones,
|
||||
0,
|
||||
2,
|
||||
{ width: 100, height: 100 },
|
||||
{ width: 100, height: 100 }
|
||||
)
|
||||
).toBe(1)
|
||||
})
|
||||
|
||||
it('finds resize handles around a display rect', () => {
|
||||
const rect = { x: 10, y: 20, w: 100, h: 200 }
|
||||
|
||||
expect(findHandleAt({ x: 10, y: 20 }, rect)).toBe('nw')
|
||||
expect(findHandleAt({ x: 110, y: 220 }, rect)).toBe('se')
|
||||
expect(findHandleAt({ x: 60, y: 20 }, rect)).toBe('n')
|
||||
expect(findHandleAt({ x: 90, y: 160 }, rect)).toBeNull()
|
||||
})
|
||||
|
||||
it('moves zones without leaving source image bounds', () => {
|
||||
const z = zone({ x: 50, y: 50, width: 100, height: 100 })
|
||||
|
||||
moveZone(
|
||||
z,
|
||||
{ x: 500, y: 500 },
|
||||
{ mouseX: 50, mouseY: 50, zoneX: 50, zoneY: 50 },
|
||||
{ width: 500, height: 500 },
|
||||
{ width: 500, height: 500 }
|
||||
)
|
||||
|
||||
expect(z.x).toBe(400)
|
||||
expect(z.y).toBe(400)
|
||||
})
|
||||
|
||||
it('resizes zones without leaving source image bounds', () => {
|
||||
const z = zone({ x: 50, y: 50, width: 100, height: 100 })
|
||||
|
||||
resizeZone(
|
||||
z,
|
||||
'se',
|
||||
{ x: 500, y: 500 },
|
||||
{ width: 500, height: 500 },
|
||||
{ width: 200, height: 200 }
|
||||
)
|
||||
|
||||
expect(z.width).toBe(150)
|
||||
expect(z.height).toBe(150)
|
||||
})
|
||||
|
||||
it('converts drawn canvas rectangles to source rectangles', () => {
|
||||
expect(
|
||||
sourceRectFromDrawing(
|
||||
{ startX: 100, startY: 200, endX: 50, endY: 100 },
|
||||
{ width: 500, height: 1000 },
|
||||
{ width: 1000, height: 2000 }
|
||||
)
|
||||
).toEqual({ x: 100, y: 200, w: 100, h: 200 })
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,201 @@
|
||||
import { OcrTemplateZone } from 'src/app/data/ocr-template'
|
||||
|
||||
export interface DrawingRect {
|
||||
startX: number
|
||||
startY: number
|
||||
endX: number
|
||||
endY: number
|
||||
}
|
||||
|
||||
export interface Dimensions {
|
||||
width: number
|
||||
height: number
|
||||
}
|
||||
|
||||
export interface Point {
|
||||
x: number
|
||||
y: number
|
||||
}
|
||||
|
||||
export interface DisplayRect {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface MoveStart {
|
||||
mouseX: number
|
||||
mouseY: number
|
||||
zoneX: number
|
||||
zoneY: number
|
||||
}
|
||||
|
||||
export type ResizeHandle = 'n' | 's' | 'e' | 'w' | 'ne' | 'nw' | 'se' | 'sw'
|
||||
|
||||
export const HANDLE_SIZE = 8
|
||||
export const MIN_ZONE_SIZE = 10
|
||||
|
||||
export function getZonePage(
|
||||
zone: OcrTemplateZone,
|
||||
previewPage: number,
|
||||
previewPageCount: number | null
|
||||
): number {
|
||||
const page = zone.page ?? 1
|
||||
if (page === -1) return previewPageCount ?? previewPage + 1
|
||||
return page >= 1 ? page : 1
|
||||
}
|
||||
|
||||
export function isZoneOnPage(
|
||||
zone: OcrTemplateZone,
|
||||
previewPage: number,
|
||||
previewPageCount: number | null
|
||||
): boolean {
|
||||
return getZonePage(zone, previewPage, previewPageCount) === previewPage + 1
|
||||
}
|
||||
|
||||
export function getZoneSourceSize(
|
||||
zone: OcrTemplateZone,
|
||||
imageSize: Dimensions
|
||||
): Dimensions {
|
||||
return {
|
||||
width: zone.zone_source_width || imageSize.width,
|
||||
height: zone.zone_source_height || imageSize.height,
|
||||
}
|
||||
}
|
||||
|
||||
export function getZoneDisplayRect(
|
||||
zone: OcrTemplateZone,
|
||||
canvasSize: Dimensions,
|
||||
imageSize: Dimensions
|
||||
): DisplayRect {
|
||||
const sourceSize = getZoneSourceSize(zone, imageSize)
|
||||
const scaleX = canvasSize.width / sourceSize.width
|
||||
const scaleY = canvasSize.height / sourceSize.height
|
||||
|
||||
return {
|
||||
x: zone.x * scaleX,
|
||||
y: zone.y * scaleY,
|
||||
w: zone.width * scaleX,
|
||||
h: zone.height * scaleY,
|
||||
}
|
||||
}
|
||||
|
||||
export function findHandleAt(
|
||||
point: Point,
|
||||
rect: DisplayRect,
|
||||
handleSize = HANDLE_SIZE
|
||||
): ResizeHandle | null {
|
||||
const handles: [ResizeHandle, number, number][] = [
|
||||
['nw', rect.x, rect.y],
|
||||
['n', rect.x + rect.w / 2, rect.y],
|
||||
['ne', rect.x + rect.w, rect.y],
|
||||
['w', rect.x, rect.y + rect.h / 2],
|
||||
['e', rect.x + rect.w, rect.y + rect.h / 2],
|
||||
['sw', rect.x, rect.y + rect.h],
|
||||
['s', rect.x + rect.w / 2, rect.y + rect.h],
|
||||
['se', rect.x + rect.w, rect.y + rect.h],
|
||||
]
|
||||
|
||||
return (
|
||||
handles.find(
|
||||
([, x, y]) =>
|
||||
Math.abs(point.x - x) <= handleSize &&
|
||||
Math.abs(point.y - y) <= handleSize
|
||||
)?.[0] ?? null
|
||||
)
|
||||
}
|
||||
|
||||
export function findZoneAt(
|
||||
point: Point,
|
||||
zones: OcrTemplateZone[],
|
||||
previewPage: number,
|
||||
previewPageCount: number | null,
|
||||
canvasSize: Dimensions,
|
||||
imageSize: Dimensions
|
||||
): number | null {
|
||||
for (let i = zones.length - 1; i >= 0; i--) {
|
||||
const zone = zones[i]
|
||||
if (!isZoneOnPage(zone, previewPage, previewPageCount)) continue
|
||||
const rect = getZoneDisplayRect(zone, canvasSize, imageSize)
|
||||
|
||||
if (
|
||||
point.x >= rect.x &&
|
||||
point.x <= rect.x + rect.w &&
|
||||
point.y >= rect.y &&
|
||||
point.y <= rect.y + rect.h
|
||||
) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
export function moveZone(
|
||||
zone: OcrTemplateZone,
|
||||
point: Point,
|
||||
moveStart: MoveStart,
|
||||
canvasSize: Dimensions,
|
||||
imageSize: Dimensions
|
||||
) {
|
||||
const sourceSize = getZoneSourceSize(zone, imageSize)
|
||||
const scaleX = sourceSize.width / canvasSize.width
|
||||
const scaleY = sourceSize.height / canvasSize.height
|
||||
const dx = Math.round((point.x - moveStart.mouseX) * scaleX)
|
||||
const dy = Math.round((point.y - moveStart.mouseY) * scaleY)
|
||||
|
||||
zone.x = clamp(moveStart.zoneX + dx, 0, sourceSize.width - zone.width)
|
||||
zone.y = clamp(moveStart.zoneY + dy, 0, sourceSize.height - zone.height)
|
||||
}
|
||||
|
||||
export function resizeZone(
|
||||
zone: OcrTemplateZone,
|
||||
handle: ResizeHandle,
|
||||
point: Point,
|
||||
canvasSize: Dimensions,
|
||||
imageSize: Dimensions
|
||||
) {
|
||||
const sourceSize = getZoneSourceSize(zone, imageSize)
|
||||
const scaleX = sourceSize.width / canvasSize.width
|
||||
const scaleY = sourceSize.height / canvasSize.height
|
||||
const imageX = clamp(Math.round(point.x * scaleX), 0, sourceSize.width)
|
||||
const imageY = clamp(Math.round(point.y * scaleY), 0, sourceSize.height)
|
||||
|
||||
if (handle.includes('w')) {
|
||||
const right = Math.min(zone.x + zone.width, sourceSize.width)
|
||||
zone.x = clamp(imageX, 0, right - MIN_ZONE_SIZE)
|
||||
zone.width = right - zone.x
|
||||
}
|
||||
if (handle.includes('e')) {
|
||||
zone.width = Math.max(MIN_ZONE_SIZE, imageX - zone.x)
|
||||
}
|
||||
if (handle.includes('n')) {
|
||||
const bottom = Math.min(zone.y + zone.height, sourceSize.height)
|
||||
zone.y = clamp(imageY, 0, bottom - MIN_ZONE_SIZE)
|
||||
zone.height = bottom - zone.y
|
||||
}
|
||||
if (handle.includes('s')) {
|
||||
zone.height = Math.max(MIN_ZONE_SIZE, imageY - zone.y)
|
||||
}
|
||||
}
|
||||
|
||||
export function sourceRectFromDrawing(
|
||||
rect: DrawingRect,
|
||||
canvasSize: Dimensions,
|
||||
imageSize: Dimensions
|
||||
): DisplayRect {
|
||||
const scaleX = imageSize.width / canvasSize.width
|
||||
const scaleY = imageSize.height / canvasSize.height
|
||||
|
||||
return {
|
||||
x: Math.round(Math.min(rect.startX, rect.endX) * scaleX),
|
||||
y: Math.round(Math.min(rect.startY, rect.endY) * scaleY),
|
||||
w: Math.round(Math.abs(rect.endX - rect.startX) * scaleX),
|
||||
h: Math.round(Math.abs(rect.endY - rect.startY) * scaleY),
|
||||
}
|
||||
}
|
||||
|
||||
function clamp(value: number, min: number, max: number): number {
|
||||
return Math.max(min, Math.min(value, max))
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
<pngx-page-header
|
||||
title="OCR Templates"
|
||||
i18n-title
|
||||
info="Define extraction zones on document types to automatically populate custom fields via OCR."
|
||||
i18n-info
|
||||
>
|
||||
<button type="button" class="btn btn-sm btn-outline-primary" (click)="createTemplate()" *pngxIfPermissions="{ action: PermissionAction.Add, type: PermissionType.OcrTemplate }">
|
||||
<i-bs name="plus-circle" class="me-1"></i-bs><ng-container i18n>Create Template</ng-container>
|
||||
</button>
|
||||
</pngx-page-header>
|
||||
|
||||
<ul class="list-group">
|
||||
|
||||
<li class="list-group-item">
|
||||
<div class="row">
|
||||
<div class="col" i18n>Name</div>
|
||||
<div class="col d-none d-sm-flex" i18n>Document Type</div>
|
||||
<div class="col d-none d-sm-flex" i18n>Zones</div>
|
||||
<div class="col" i18n>Status</div>
|
||||
<div class="col" i18n>Actions</div>
|
||||
</div>
|
||||
</li>
|
||||
|
||||
@if (loading && templates.length === 0) {
|
||||
<li class="list-group-item">
|
||||
<div class="spinner-border spinner-border-sm me-2" role="status"></div>
|
||||
<ng-container i18n>Loading...</ng-container>
|
||||
</li>
|
||||
}
|
||||
|
||||
@for (t of templates; track t.id) {
|
||||
<li class="list-group-item">
|
||||
<div class="row fade" [class.show]="show">
|
||||
<div class="col d-flex align-items-center"><button class="btn btn-link p-0 text-start" type="button" (click)="editTemplate(t)" [disabled]="!permissionsService.currentUserCan(PermissionAction.Change, PermissionType.OcrTemplate)">{{t.name}}</button></div>
|
||||
<div class="col d-flex align-items-center d-none d-sm-flex">{{getDocumentTypeName(t)}}</div>
|
||||
<div class="col d-flex align-items-center d-none d-sm-flex"><code>{{t.zones?.length || 0}}</code></div>
|
||||
<div class="col d-flex align-items-center">
|
||||
<div class="form-check form-switch mb-0">
|
||||
<input type="checkbox" class="form-check-input cursor-pointer" [id]="t.id+'_enable'" [(ngModel)]="t.enabled" (change)="toggleTemplate(t)" *pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.OcrTemplate }">
|
||||
<label class="form-check-label cursor-pointer" [for]="t.id+'_enable'">
|
||||
<code> @if(t.enabled) { <ng-container i18n>Enabled</ng-container> } @else { <span i18n class="text-muted">Disabled</span> }</code>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col">
|
||||
|
||||
<div class="btn-group d-block d-sm-none">
|
||||
<div ngbDropdown container="body" class="d-inline-block">
|
||||
<button type="button" class="btn btn-link" id="actionsMenuMobile{{t.id}}" (click)="$event.stopPropagation()" ngbDropdownToggle>
|
||||
<i-bs name="three-dots-vertical"></i-bs>
|
||||
</button>
|
||||
<div ngbDropdownMenu aria-labelledby="actionsMenuMobile{{t.id}}">
|
||||
<button (click)="editTemplate(t)" *pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.OcrTemplate }" ngbDropdownItem i18n>Edit</button>
|
||||
<button (click)="deleteTemplate(t)" *pngxIfPermissions="{ action: PermissionAction.Delete, type: PermissionType.OcrTemplate }" ngbDropdownItem i18n>Delete</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="btn-toolbar d-none d-sm-flex gap-2" role="toolbar">
|
||||
<div class="btn-group">
|
||||
<button *pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.OcrTemplate }" class="btn btn-sm btn-outline-secondary" type="button" (click)="editTemplate(t)">
|
||||
<i-bs width="1em" height="1em" name="pencil" class="me-1"></i-bs><ng-container i18n>Edit</ng-container>
|
||||
</button>
|
||||
<button *pngxIfPermissions="{ action: PermissionAction.Delete, type: PermissionType.OcrTemplate }" class="btn btn-sm btn-outline-danger" type="button" (click)="deleteTemplate(t)">
|
||||
<i-bs width="1em" height="1em" name="trash" class="me-1"></i-bs><ng-container i18n>Delete</ng-container>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</li>
|
||||
}
|
||||
@if (!loading && templates.length === 0) {
|
||||
<li class="list-group-item" [class.show]="show" i18n>No OCR templates defined.</li>
|
||||
}
|
||||
</ul>
|
||||
@@ -0,0 +1,109 @@
|
||||
import { Component, OnInit, inject } from '@angular/core'
|
||||
import { FormsModule } from '@angular/forms'
|
||||
import { Router } from '@angular/router'
|
||||
import { NgbDropdownModule, NgbModal } from '@ng-bootstrap/ng-bootstrap'
|
||||
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
||||
import { delay, takeUntil, tap } from 'rxjs'
|
||||
import { OcrTemplate } from 'src/app/data/ocr-template'
|
||||
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
|
||||
import { PermissionsService } from 'src/app/services/permissions.service'
|
||||
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
|
||||
import { OcrTemplateService } from 'src/app/services/rest/ocr-template.service'
|
||||
import { ToastService } from 'src/app/services/toast.service'
|
||||
import { ConfirmDialogComponent } from '../../common/confirm-dialog/confirm-dialog.component'
|
||||
import { PageHeaderComponent } from '../../common/page-header/page-header.component'
|
||||
import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
|
||||
|
||||
@Component({
|
||||
selector: 'pngx-ocr-templates',
|
||||
templateUrl: './ocr-templates.component.html',
|
||||
imports: [
|
||||
PageHeaderComponent,
|
||||
IfPermissionsDirective,
|
||||
FormsModule,
|
||||
NgbDropdownModule,
|
||||
NgxBootstrapIconsModule,
|
||||
],
|
||||
})
|
||||
export class OcrTemplatesComponent
|
||||
extends LoadingComponentWithPermissions
|
||||
implements OnInit
|
||||
{
|
||||
private readonly service = inject(OcrTemplateService)
|
||||
private readonly documentTypeService = inject(DocumentTypeService)
|
||||
private readonly router = inject(Router)
|
||||
private readonly modalService = inject(NgbModal)
|
||||
private readonly toastService = inject(ToastService)
|
||||
permissionsService = inject(PermissionsService)
|
||||
|
||||
public templates: OcrTemplate[] = []
|
||||
private documentTypeNames: Map<number, string> = new Map()
|
||||
|
||||
ngOnInit() {
|
||||
this.documentTypeService
|
||||
.listAll()
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe((r) => {
|
||||
this.documentTypeNames = new Map(
|
||||
r.results.map((dt) => [dt.id, dt.name])
|
||||
)
|
||||
})
|
||||
this.reload()
|
||||
}
|
||||
|
||||
reload() {
|
||||
this.loading = true
|
||||
this.service
|
||||
.listAll()
|
||||
.pipe(
|
||||
takeUntil(this.unsubscribeNotifier),
|
||||
tap((r) => (this.templates = r.results)),
|
||||
delay(100)
|
||||
)
|
||||
.subscribe(() => {
|
||||
this.show = true
|
||||
this.loading = false
|
||||
})
|
||||
}
|
||||
|
||||
getDocumentTypeName(t: OcrTemplate): string {
|
||||
return (
|
||||
this.documentTypeNames.get(t.document_type) ?? `${t.document_type ?? ''}`
|
||||
)
|
||||
}
|
||||
|
||||
createTemplate() {
|
||||
this.router.navigate(['/ocr-templates', 'new'])
|
||||
}
|
||||
|
||||
editTemplate(t: OcrTemplate) {
|
||||
this.router.navigate(['/ocr-templates', t.id])
|
||||
}
|
||||
|
||||
toggleTemplate(t: OcrTemplate) {
|
||||
// ngModel has already flipped t.enabled; restore it if persistence fails.
|
||||
const enabled = t.enabled
|
||||
this.service.patch(t).subscribe({
|
||||
error: (error) => {
|
||||
t.enabled = !enabled
|
||||
this.toastService.showError(
|
||||
$localize`Error updating OCR template.`,
|
||||
error
|
||||
)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
deleteTemplate(t: OcrTemplate) {
|
||||
const modal = this.modalService.open(ConfirmDialogComponent)
|
||||
modal.componentInstance.title = $localize`Delete OCR Template`
|
||||
modal.componentInstance.messageBoldPart = t.name
|
||||
modal.componentInstance.message = $localize`Do you really want to delete this OCR template?`
|
||||
modal.componentInstance.btnClass = 'btn-danger'
|
||||
modal.componentInstance.btnCaption = $localize`Delete`
|
||||
modal.componentInstance.confirmClicked.subscribe(() => {
|
||||
modal.close()
|
||||
this.service.delete(t).subscribe(() => this.reload())
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
import { ObjectWithId } from './object-with-id'
|
||||
|
||||
export type OcrZoneTarget = 'custom_field' | 'title' | 'asn' | 'created'
|
||||
export type OcrBuiltinTarget = Exclude<OcrZoneTarget, 'custom_field'>
|
||||
export type OcrZoneTransform =
|
||||
| 'none'
|
||||
| 'strip'
|
||||
| 'uppercase'
|
||||
| 'lowercase'
|
||||
| 'numeric'
|
||||
| 'strip_punctuation'
|
||||
| 'date'
|
||||
| 'qr_code'
|
||||
|
||||
export const OCR_ZONE_TARGET = {
|
||||
CustomField: 'custom_field',
|
||||
Title: 'title',
|
||||
Asn: 'asn',
|
||||
Created: 'created',
|
||||
} as const satisfies Record<string, OcrZoneTarget>
|
||||
|
||||
export const OCR_ZONE_TRANSFORM = {
|
||||
None: 'none',
|
||||
Strip: 'strip',
|
||||
Uppercase: 'uppercase',
|
||||
Lowercase: 'lowercase',
|
||||
Numeric: 'numeric',
|
||||
StripPunctuation: 'strip_punctuation',
|
||||
Date: 'date',
|
||||
QrCode: 'qr_code',
|
||||
} as const satisfies Record<string, OcrZoneTransform>
|
||||
|
||||
export const DEFAULT_OCR_ZONE_TARGET = OCR_ZONE_TARGET.CustomField
|
||||
export const DEFAULT_OCR_ZONE_TRANSFORM = OCR_ZONE_TRANSFORM.Strip
|
||||
export const DEFAULT_OCR_ZONE_LANGUAGE = 'deu+eng'
|
||||
|
||||
export function isOcrBuiltinTarget(value: unknown): value is OcrBuiltinTarget {
|
||||
return (
|
||||
value === OCR_ZONE_TARGET.Title ||
|
||||
value === OCR_ZONE_TARGET.Asn ||
|
||||
value === OCR_ZONE_TARGET.Created
|
||||
)
|
||||
}
|
||||
|
||||
export const OCR_BUILTIN_TARGETS = [
|
||||
{ id: OCR_ZONE_TARGET.Title, name: $localize`Title` },
|
||||
{ id: OCR_ZONE_TARGET.Asn, name: $localize`Archive serial number` },
|
||||
{ id: OCR_ZONE_TARGET.Created, name: $localize`Date created` },
|
||||
]
|
||||
|
||||
export interface OcrTemplateZone {
|
||||
id?: number
|
||||
name: string
|
||||
target?: OcrZoneTarget
|
||||
custom_field: number | null
|
||||
page?: number
|
||||
x: number
|
||||
y: number
|
||||
width: number
|
||||
height: number
|
||||
ocr_language: string
|
||||
transform: OcrZoneTransform
|
||||
date_format?: string
|
||||
validation_regex: string
|
||||
order: number
|
||||
zone_source_width?: number
|
||||
zone_source_height?: number
|
||||
}
|
||||
|
||||
export const TRANSFORM_OPTIONS = [
|
||||
{ id: OCR_ZONE_TRANSFORM.None, name: $localize`None` },
|
||||
{ id: OCR_ZONE_TRANSFORM.Strip, name: $localize`Strip whitespace` },
|
||||
{ id: OCR_ZONE_TRANSFORM.Uppercase, name: $localize`Uppercase` },
|
||||
{ id: OCR_ZONE_TRANSFORM.Lowercase, name: $localize`Lowercase` },
|
||||
{ id: OCR_ZONE_TRANSFORM.Numeric, name: $localize`Numeric only` },
|
||||
{
|
||||
id: OCR_ZONE_TRANSFORM.StripPunctuation,
|
||||
name: $localize`Remove leading/trailing punctuation`,
|
||||
},
|
||||
{ id: OCR_ZONE_TRANSFORM.Date, name: $localize`Parse date` },
|
||||
{ id: OCR_ZONE_TRANSFORM.QrCode, name: $localize`Read QR/barcode` },
|
||||
]
|
||||
|
||||
export const OCR_LANGUAGE_OPTIONS = [
|
||||
{ id: 'eng', name: $localize`English` },
|
||||
{ id: 'deu', name: $localize`German` },
|
||||
{ id: 'fra', name: $localize`French` },
|
||||
{ id: 'ita', name: $localize`Italian` },
|
||||
{ id: 'spa', name: $localize`Spanish` },
|
||||
{ id: 'por', name: $localize`Portuguese` },
|
||||
{ id: 'nld', name: $localize`Dutch` },
|
||||
]
|
||||
|
||||
export const DATE_FORMAT_OPTIONS = [
|
||||
{ id: '', name: $localize`Auto-detect` },
|
||||
{ id: '%d.%m.%Y', name: 'DD.MM.YYYY' },
|
||||
{ id: '%Y/%m/%d', name: 'YYYY/MM/DD' },
|
||||
{ id: '%d/%m/%Y', name: 'DD/MM/YYYY' },
|
||||
]
|
||||
|
||||
export interface OcrTemplate extends ObjectWithId {
|
||||
name: string
|
||||
document_type: number
|
||||
sample_document: number | null
|
||||
source_width: number
|
||||
source_height: number
|
||||
enabled: boolean
|
||||
combine_formats?: Record<string, string>
|
||||
created?: string
|
||||
updated?: string
|
||||
zones: OcrTemplateZone[]
|
||||
}
|
||||
|
||||
export interface ZoneTestRequest {
|
||||
name: string
|
||||
x: number
|
||||
y: number
|
||||
width: number
|
||||
height: number
|
||||
page: number
|
||||
ocr_language: string
|
||||
transform: OcrZoneTransform
|
||||
date_format?: string
|
||||
validation_regex: string
|
||||
zone_source_width?: number
|
||||
zone_source_height?: number
|
||||
}
|
||||
|
||||
export interface OcrZoneTestResult {
|
||||
raw_text?: string | null
|
||||
value?: string | null
|
||||
regex?: string
|
||||
regex_match?: boolean | null
|
||||
error?: string
|
||||
}
|
||||
|
||||
export interface OcrZoneRunResult {
|
||||
template: string
|
||||
zone: string
|
||||
custom_field: string
|
||||
value: string | number | null
|
||||
}
|
||||
@@ -28,6 +28,7 @@ export enum PermissionType {
|
||||
ShareLink = '%s_sharelink',
|
||||
CustomField = '%s_customfield',
|
||||
Workflow = '%s_workflow',
|
||||
OcrTemplate = '%s_ocrtemplate',
|
||||
ProcessedMail = '%s_processedmail',
|
||||
GlobalStatistics = '%s_global_statistics',
|
||||
SystemMonitoring = '%s_system_monitoring',
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
import { DocumentMetadata } from 'src/app/data/document-metadata'
|
||||
import { DocumentSuggestions } from 'src/app/data/document-suggestions'
|
||||
import { FilterRule } from 'src/app/data/filter-rule'
|
||||
import { OcrZoneRunResult } from 'src/app/data/ocr-template'
|
||||
import { Results, SelectionData } from 'src/app/data/results'
|
||||
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||
import { queryParamsFromFilterRules } from '../../utils/query-params'
|
||||
@@ -355,6 +356,13 @@ export class DocumentService extends AbstractPaperlessService<Document> {
|
||||
})
|
||||
}
|
||||
|
||||
runZoneOcr(id: number): Observable<{ results: OcrZoneRunResult[] }> {
|
||||
return this.http.post<{ results: OcrZoneRunResult[] }>(
|
||||
this.getResourceUrl(id, 'run-zone-ocr'),
|
||||
{}
|
||||
)
|
||||
}
|
||||
|
||||
rotateDocuments(
|
||||
selection: DocumentSelectionQuery,
|
||||
degrees: number,
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
import { Injectable } from '@angular/core'
|
||||
import { Observable } from 'rxjs'
|
||||
import {
|
||||
OcrTemplate,
|
||||
OcrZoneTestResult,
|
||||
ZoneTestRequest,
|
||||
} from '../../data/ocr-template'
|
||||
import { AbstractPaperlessService } from './abstract-paperless-service'
|
||||
|
||||
export interface QuickCreateFieldResult {
|
||||
id: number
|
||||
name: string
|
||||
data_type: string
|
||||
created: boolean
|
||||
}
|
||||
|
||||
@Injectable({ providedIn: 'root' })
|
||||
export class OcrTemplateService extends AbstractPaperlessService<OcrTemplate> {
|
||||
constructor() {
|
||||
super()
|
||||
this.resourceName = 'ocr_templates'
|
||||
}
|
||||
|
||||
getPageImageUrl(docId: number, page: number): string {
|
||||
return `${this.baseUrl}${this.resourceName}/document-page-image/${docId}/${page}/`
|
||||
}
|
||||
|
||||
testZone(
|
||||
docId: number,
|
||||
zone: ZoneTestRequest
|
||||
): Observable<OcrZoneTestResult> {
|
||||
return this.http.post<OcrZoneTestResult>(
|
||||
`${this.baseUrl}${this.resourceName}/test-zone/`,
|
||||
{ document: docId, zone }
|
||||
)
|
||||
}
|
||||
|
||||
quickCreateField(
|
||||
name: string,
|
||||
dataType: string
|
||||
): Observable<QuickCreateFieldResult> {
|
||||
return this.http.post<QuickCreateFieldResult>(
|
||||
`${this.baseUrl}${this.resourceName}/quick-create-field/`,
|
||||
{ name, data_type: dataType }
|
||||
)
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -79,13 +79,16 @@ import {
|
||||
exclamationTriangleFill,
|
||||
eye,
|
||||
fileEarmark,
|
||||
fileEarmarkBreak,
|
||||
fileEarmarkCheck,
|
||||
fileEarmarkDiff,
|
||||
fileEarmarkFill,
|
||||
fileEarmarkLock,
|
||||
fileEarmarkMedical,
|
||||
fileEarmarkMinus,
|
||||
fileEarmarkPlus,
|
||||
fileEarmarkRichtext,
|
||||
fileEarmarkRuled,
|
||||
fileText,
|
||||
files,
|
||||
filter,
|
||||
@@ -302,13 +305,16 @@ const icons = {
|
||||
exclamationTriangleFill,
|
||||
eye,
|
||||
fileEarmark,
|
||||
fileEarmarkBreak,
|
||||
fileEarmarkCheck,
|
||||
fileEarmarkDiff,
|
||||
fileEarmarkFill,
|
||||
fileEarmarkLock,
|
||||
fileEarmarkMedical,
|
||||
fileEarmarkMinus,
|
||||
fileEarmarkPlus,
|
||||
fileEarmarkRichtext,
|
||||
fileEarmarkRuled,
|
||||
files,
|
||||
fileText,
|
||||
filter,
|
||||
|
||||
@@ -13,8 +13,11 @@ class DocumentsConfig(AppConfig):
|
||||
from documents.signals.handlers import add_inbox_tags
|
||||
from documents.signals.handlers import add_or_update_document_in_llm_index
|
||||
from documents.signals.handlers import add_to_index
|
||||
from documents.signals.handlers import capture_old_document_type
|
||||
from documents.signals.handlers import run_workflows_added
|
||||
from documents.signals.handlers import run_workflows_updated
|
||||
from documents.signals.handlers import run_zone_ocr_extraction
|
||||
from documents.signals.handlers import run_zone_ocr_on_type_change
|
||||
from documents.signals.handlers import send_websocket_document_updated
|
||||
from documents.signals.handlers import set_correspondent
|
||||
from documents.signals.handlers import set_document_type
|
||||
@@ -29,6 +32,16 @@ class DocumentsConfig(AppConfig):
|
||||
document_consumption_finished.connect(add_to_index)
|
||||
document_consumption_finished.connect(run_workflows_added)
|
||||
document_consumption_finished.connect(add_or_update_document_in_llm_index)
|
||||
document_consumption_finished.connect(run_zone_ocr_extraction)
|
||||
|
||||
from django.db.models.signals import post_save
|
||||
from django.db.models.signals import pre_save
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
pre_save.connect(capture_old_document_type, sender=Document)
|
||||
post_save.connect(run_zone_ocr_on_type_change, sender=Document)
|
||||
|
||||
document_updated.connect(run_workflows_updated)
|
||||
document_updated.connect(send_websocket_document_updated)
|
||||
document_updated.connect(add_or_update_document_in_llm_index)
|
||||
|
||||
@@ -0,0 +1,267 @@
|
||||
# Generated by Django 5.2.14 on 2026-06-16 17:36
|
||||
|
||||
import django.core.validators
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0021_widen_workflow_integer_fields"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="OcrTemplate",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("name", models.CharField(max_length=128, verbose_name="name")),
|
||||
(
|
||||
"source_width",
|
||||
models.PositiveIntegerField(
|
||||
help_text="Width of the image the zones were drawn on (px)",
|
||||
validators=[django.core.validators.MinValueValidator(1)],
|
||||
verbose_name="source width",
|
||||
),
|
||||
),
|
||||
(
|
||||
"source_height",
|
||||
models.PositiveIntegerField(
|
||||
help_text="Height of the image the zones were drawn on (px)",
|
||||
validators=[django.core.validators.MinValueValidator(1)],
|
||||
verbose_name="source height",
|
||||
),
|
||||
),
|
||||
("enabled", models.BooleanField(default=True, verbose_name="enabled")),
|
||||
(
|
||||
"combine_formats",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
default=dict,
|
||||
help_text="Per-target format strings for combining several zones into one field, keyed by target (custom field id, or 'title'/'asn'/'created'). Tokens like {Zone Name} are replaced with that zone's value.",
|
||||
verbose_name="combine formats",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
models.DateTimeField(
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
editable=False,
|
||||
verbose_name="created",
|
||||
),
|
||||
),
|
||||
(
|
||||
"updated",
|
||||
models.DateTimeField(auto_now=True, verbose_name="updated"),
|
||||
),
|
||||
(
|
||||
"document_type",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="ocr_templates",
|
||||
to="documents.documenttype",
|
||||
verbose_name="document type",
|
||||
),
|
||||
),
|
||||
(
|
||||
"sample_document",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
help_text="Document used for previewing zones in the editor",
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="+",
|
||||
to="documents.document",
|
||||
verbose_name="sample document",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"verbose_name": "OCR template",
|
||||
"verbose_name_plural": "OCR templates",
|
||||
"ordering": ("name",),
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="OcrTemplateZone",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"name",
|
||||
models.CharField(
|
||||
help_text="Descriptive name for this zone (e.g. 'Invoice Number')",
|
||||
max_length=128,
|
||||
verbose_name="zone name",
|
||||
),
|
||||
),
|
||||
(
|
||||
"target",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("custom_field", "Custom field"),
|
||||
("title", "Title"),
|
||||
("asn", "Archive serial number"),
|
||||
("created", "Date created"),
|
||||
],
|
||||
default="custom_field",
|
||||
help_text="Where the extracted value is written: a custom field, or a built-in document field (title, ASN, created date)",
|
||||
max_length=20,
|
||||
verbose_name="target",
|
||||
),
|
||||
),
|
||||
(
|
||||
"page",
|
||||
models.IntegerField(
|
||||
blank=True,
|
||||
help_text="Page (1 = first, -1 = last; blank uses the template default)",
|
||||
null=True,
|
||||
verbose_name="page",
|
||||
),
|
||||
),
|
||||
(
|
||||
"x",
|
||||
models.PositiveIntegerField(
|
||||
help_text="Left edge (px)",
|
||||
verbose_name="x",
|
||||
),
|
||||
),
|
||||
(
|
||||
"y",
|
||||
models.PositiveIntegerField(
|
||||
help_text="Top edge (px)",
|
||||
verbose_name="y",
|
||||
),
|
||||
),
|
||||
(
|
||||
"width",
|
||||
models.PositiveIntegerField(
|
||||
help_text="Zone width (px)",
|
||||
validators=[django.core.validators.MinValueValidator(1)],
|
||||
verbose_name="width",
|
||||
),
|
||||
),
|
||||
(
|
||||
"height",
|
||||
models.PositiveIntegerField(
|
||||
help_text="Zone height (px)",
|
||||
validators=[django.core.validators.MinValueValidator(1)],
|
||||
verbose_name="height",
|
||||
),
|
||||
),
|
||||
(
|
||||
"zone_source_width",
|
||||
models.PositiveIntegerField(
|
||||
blank=True,
|
||||
help_text="Width of the page image this zone was drawn on (px). Falls back to template source_width if unset.",
|
||||
null=True,
|
||||
verbose_name="zone source width",
|
||||
),
|
||||
),
|
||||
(
|
||||
"zone_source_height",
|
||||
models.PositiveIntegerField(
|
||||
blank=True,
|
||||
help_text="Height of the page image this zone was drawn on (px). Falls back to template source_height if unset.",
|
||||
null=True,
|
||||
verbose_name="zone source height",
|
||||
),
|
||||
),
|
||||
(
|
||||
"ocr_language",
|
||||
models.CharField(
|
||||
default="deu+eng",
|
||||
help_text="Tesseract language code(s), e.g. 'deu+eng'",
|
||||
max_length=20,
|
||||
verbose_name="OCR language",
|
||||
),
|
||||
),
|
||||
(
|
||||
"transform",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("none", "None"),
|
||||
("strip", "Strip whitespace"),
|
||||
("uppercase", "Uppercase"),
|
||||
("lowercase", "Lowercase"),
|
||||
("numeric", "Numeric only"),
|
||||
(
|
||||
"strip_punctuation",
|
||||
"Remove leading/trailing punctuation",
|
||||
),
|
||||
("date", "Parse date"),
|
||||
("qr_code", "Read QR/barcode"),
|
||||
],
|
||||
default="strip",
|
||||
max_length=20,
|
||||
verbose_name="transform",
|
||||
),
|
||||
),
|
||||
(
|
||||
"date_format",
|
||||
models.CharField(
|
||||
blank=True,
|
||||
default="",
|
||||
help_text="Python strptime format for the 'Parse date' transform (e.g. %d.%m.%Y). Blank = auto-detect.",
|
||||
max_length=64,
|
||||
verbose_name="date format",
|
||||
),
|
||||
),
|
||||
(
|
||||
"validation_regex",
|
||||
models.CharField(
|
||||
blank=True,
|
||||
default="",
|
||||
help_text="Optional regex pattern — extracted text is only accepted if it matches",
|
||||
max_length=256,
|
||||
verbose_name="validation regex",
|
||||
),
|
||||
),
|
||||
("order", models.PositiveIntegerField(default=0, verbose_name="order")),
|
||||
(
|
||||
"custom_field",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
help_text="Target custom field (only used when target is 'custom_field')",
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="ocr_zones",
|
||||
to="documents.customfield",
|
||||
verbose_name="custom field",
|
||||
),
|
||||
),
|
||||
(
|
||||
"template",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="zones",
|
||||
to="documents.ocrtemplate",
|
||||
verbose_name="template",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"verbose_name": "OCR template zone",
|
||||
"verbose_name_plural": "OCR template zones",
|
||||
"ordering": ("template", "order"),
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -1894,3 +1894,248 @@ class WorkflowRun(SoftDeleteModel):
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"WorkflowRun of {self.workflow} at {self.run_at} on {self.document}"
|
||||
|
||||
|
||||
class OcrTemplate(models.Model):
|
||||
"""
|
||||
Defines a set of OCR extraction zones for a specific document type.
|
||||
|
||||
When a document of that type is consumed, each zone in the template is
|
||||
cropped from the document image and OCR'd separately. The extracted text
|
||||
is written to the configured custom field or built-in document field.
|
||||
"""
|
||||
|
||||
name = models.CharField(
|
||||
_("name"),
|
||||
max_length=128,
|
||||
)
|
||||
|
||||
document_type = models.ForeignKey(
|
||||
"documents.DocumentType",
|
||||
on_delete=models.CASCADE,
|
||||
related_name="ocr_templates",
|
||||
verbose_name=_("document type"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
source_width = models.PositiveIntegerField(
|
||||
_("source width"),
|
||||
validators=[MinValueValidator(1)],
|
||||
help_text=_("Width of the image the zones were drawn on (px)"),
|
||||
)
|
||||
|
||||
source_height = models.PositiveIntegerField(
|
||||
_("source height"),
|
||||
validators=[MinValueValidator(1)],
|
||||
help_text=_("Height of the image the zones were drawn on (px)"),
|
||||
)
|
||||
|
||||
sample_document = models.ForeignKey(
|
||||
"documents.Document",
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name="+",
|
||||
verbose_name=_("sample document"),
|
||||
help_text=_("Document used for previewing zones in the editor"),
|
||||
)
|
||||
|
||||
enabled = models.BooleanField(_("enabled"), default=True)
|
||||
|
||||
combine_formats = models.JSONField(
|
||||
_("combine formats"),
|
||||
default=dict,
|
||||
blank=True,
|
||||
help_text=_(
|
||||
"Per-target format strings for combining several zones into one "
|
||||
"field, keyed by target (custom field id, or 'title'/'asn'/'created'). "
|
||||
"Tokens like {Zone Name} are replaced with that zone's value.",
|
||||
),
|
||||
)
|
||||
|
||||
created = models.DateTimeField(
|
||||
_("created"),
|
||||
default=timezone.now,
|
||||
db_index=True,
|
||||
editable=False,
|
||||
)
|
||||
|
||||
updated = models.DateTimeField(
|
||||
_("updated"),
|
||||
auto_now=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
ordering = ("name",)
|
||||
verbose_name = _("OCR template")
|
||||
verbose_name_plural = _("OCR templates")
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.name} ({self.document_type})"
|
||||
|
||||
|
||||
class OcrTemplateZone(models.Model):
|
||||
"""
|
||||
A rectangular region within a document page to OCR and extract into a custom
|
||||
field or built-in document field. Coordinates are relative to the source
|
||||
image dimensions stored on the template.
|
||||
"""
|
||||
|
||||
template = models.ForeignKey(
|
||||
OcrTemplate,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="zones",
|
||||
verbose_name=_("template"),
|
||||
)
|
||||
|
||||
name = models.CharField(
|
||||
_("zone name"),
|
||||
max_length=128,
|
||||
help_text=_("Descriptive name for this zone (e.g. 'Invoice Number')"),
|
||||
)
|
||||
|
||||
class TargetType(models.TextChoices):
|
||||
CUSTOM_FIELD = ("custom_field", _("Custom field"))
|
||||
TITLE = ("title", _("Title"))
|
||||
ASN = ("asn", _("Archive serial number"))
|
||||
CREATED = ("created", _("Date created"))
|
||||
|
||||
target = models.CharField(
|
||||
_("target"),
|
||||
max_length=20,
|
||||
choices=TargetType.choices,
|
||||
default=TargetType.CUSTOM_FIELD,
|
||||
help_text=_(
|
||||
"Where the extracted value is written: a custom field, or a "
|
||||
"built-in document field (title, ASN, created date)",
|
||||
),
|
||||
)
|
||||
|
||||
custom_field = models.ForeignKey(
|
||||
"documents.CustomField",
|
||||
on_delete=models.CASCADE,
|
||||
related_name="ocr_zones",
|
||||
verbose_name=_("custom field"),
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text=_("Target custom field (only used when target is 'custom_field')"),
|
||||
)
|
||||
|
||||
page = models.IntegerField(
|
||||
_("page"),
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text=_("Page (1 = first, -1 = last; blank uses the template default)"),
|
||||
)
|
||||
|
||||
x = models.PositiveIntegerField(_("x"), help_text=_("Left edge (px)"))
|
||||
y = models.PositiveIntegerField(_("y"), help_text=_("Top edge (px)"))
|
||||
width = models.PositiveIntegerField(
|
||||
_("width"),
|
||||
validators=[MinValueValidator(1)],
|
||||
help_text=_("Zone width (px)"),
|
||||
)
|
||||
height = models.PositiveIntegerField(
|
||||
_("height"),
|
||||
validators=[MinValueValidator(1)],
|
||||
help_text=_("Zone height (px)"),
|
||||
)
|
||||
|
||||
# Per-zone source dimensions for coordinate scaling.
|
||||
# Stored from the page image the zone was drawn on.
|
||||
# If null, falls back to the template's source_width/source_height.
|
||||
# This handles PDFs with mixed page sizes (e.g. landscape + portrait,
|
||||
# or different paper formats across pages).
|
||||
zone_source_width = models.PositiveIntegerField(
|
||||
_("zone source width"),
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text=_(
|
||||
"Width of the page image this zone was drawn on (px). "
|
||||
"Falls back to template source_width if unset.",
|
||||
),
|
||||
)
|
||||
zone_source_height = models.PositiveIntegerField(
|
||||
_("zone source height"),
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text=_(
|
||||
"Height of the page image this zone was drawn on (px). "
|
||||
"Falls back to template source_height if unset.",
|
||||
),
|
||||
)
|
||||
|
||||
ocr_language = models.CharField(
|
||||
_("OCR language"),
|
||||
max_length=20,
|
||||
default="deu+eng",
|
||||
help_text=_("Tesseract language code(s), e.g. 'deu+eng'"),
|
||||
)
|
||||
|
||||
class TransformType(models.TextChoices):
|
||||
NONE = ("none", _("None"))
|
||||
STRIP = ("strip", _("Strip whitespace"))
|
||||
UPPERCASE = ("uppercase", _("Uppercase"))
|
||||
LOWERCASE = ("lowercase", _("Lowercase"))
|
||||
NUMERIC = ("numeric", _("Numeric only"))
|
||||
STRIP_PUNCTUATION = (
|
||||
"strip_punctuation",
|
||||
_("Remove leading/trailing punctuation"),
|
||||
)
|
||||
DATE = ("date", _("Parse date"))
|
||||
QR_CODE = ("qr_code", _("Read QR/barcode"))
|
||||
|
||||
transform = models.CharField(
|
||||
_("transform"),
|
||||
max_length=20,
|
||||
choices=TransformType.choices,
|
||||
default=TransformType.STRIP,
|
||||
)
|
||||
|
||||
date_format = models.CharField(
|
||||
_("date format"),
|
||||
max_length=64,
|
||||
blank=True,
|
||||
default="",
|
||||
help_text=_(
|
||||
"Python strptime format for the 'Parse date' transform "
|
||||
"(e.g. %d.%m.%Y). Blank = auto-detect.",
|
||||
),
|
||||
)
|
||||
|
||||
validation_regex = models.CharField(
|
||||
_("validation regex"),
|
||||
max_length=256,
|
||||
blank=True,
|
||||
default="",
|
||||
help_text=_(
|
||||
"Optional regex pattern — extracted text is only accepted if it matches",
|
||||
),
|
||||
)
|
||||
|
||||
order = models.PositiveIntegerField(_("order"), default=0)
|
||||
|
||||
class Meta:
|
||||
ordering = ("template", "order")
|
||||
verbose_name = _("OCR template zone")
|
||||
verbose_name_plural = _("OCR template zones")
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.template.name} -> {self.name}"
|
||||
|
||||
|
||||
# Custom field data types that zone OCR can extract into. DOCUMENTLINK and
|
||||
# SELECT are excluded (they reference other objects, not free text). Single
|
||||
# source of truth for the serializer, the quick-create endpoint and the engine.
|
||||
OCR_SUPPORTED_FIELD_TYPES = frozenset(
|
||||
{
|
||||
CustomField.FieldDataType.STRING,
|
||||
CustomField.FieldDataType.URL,
|
||||
CustomField.FieldDataType.DATE,
|
||||
CustomField.FieldDataType.INT,
|
||||
CustomField.FieldDataType.FLOAT,
|
||||
CustomField.FieldDataType.MONETARY,
|
||||
CustomField.FieldDataType.LONG_TEXT,
|
||||
CustomField.FieldDataType.BOOL,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -45,12 +45,6 @@ class SanityCheckMessages:
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._messages: dict[int | None, list[MessageEntry]] = defaultdict(list)
|
||||
self._document_pks: set[int] = set()
|
||||
self._document_error_pks: set[int] = set()
|
||||
self._document_warning_pks: set[int] = set()
|
||||
self._document_info_pks: set[int] = set()
|
||||
self._document_error_issue_count: int = 0
|
||||
self._document_warning_issue_count: int = 0
|
||||
self.has_error: bool = False
|
||||
self.has_warning: bool = False
|
||||
self.has_info: bool = False
|
||||
@@ -62,33 +56,20 @@ class SanityCheckMessages:
|
||||
|
||||
# -- Recording ----------------------------------------------------------
|
||||
|
||||
def _add_document_issue(self, doc_pk: int, document_pks: set[int]) -> bool:
|
||||
if doc_pk not in self._document_pks:
|
||||
self._document_pks.add(doc_pk)
|
||||
self.document_count += 1
|
||||
|
||||
if doc_pk in document_pks:
|
||||
return False
|
||||
|
||||
document_pks.add(doc_pk)
|
||||
return True
|
||||
|
||||
def error(self, doc_pk: int | None, message: str) -> None:
|
||||
self._messages[doc_pk].append({"level": logging.ERROR, "message": message})
|
||||
self.has_error = True
|
||||
if doc_pk is not None:
|
||||
self._document_error_issue_count += 1
|
||||
if self._add_document_issue(doc_pk, self._document_error_pks):
|
||||
self.document_error_count += 1
|
||||
self.document_count += 1
|
||||
self.document_error_count += 1
|
||||
|
||||
def warning(self, doc_pk: int | None, message: str) -> None:
|
||||
self._messages[doc_pk].append({"level": logging.WARNING, "message": message})
|
||||
self.has_warning = True
|
||||
|
||||
if doc_pk is not None:
|
||||
self._document_warning_issue_count += 1
|
||||
if self._add_document_issue(doc_pk, self._document_warning_pks):
|
||||
self.document_warning_count += 1
|
||||
self.document_count += 1
|
||||
self.document_warning_count += 1
|
||||
else:
|
||||
# This is the only type of global message we do right now
|
||||
self.global_warning_count += 1
|
||||
@@ -97,10 +78,8 @@ class SanityCheckMessages:
|
||||
self._messages[doc_pk].append({"level": logging.INFO, "message": message})
|
||||
self.has_info = True
|
||||
|
||||
if doc_pk is not None and self._add_document_issue(
|
||||
doc_pk,
|
||||
self._document_info_pks,
|
||||
):
|
||||
if doc_pk is not None:
|
||||
self.document_count += 1
|
||||
self.document_info_count += 1
|
||||
|
||||
# -- Iteration / query --------------------------------------------------
|
||||
@@ -126,8 +105,8 @@ class SanityCheckMessages:
|
||||
def total_issue_count(self) -> int:
|
||||
"""Total number of error and warning messages across all documents and global."""
|
||||
return (
|
||||
self._document_error_issue_count
|
||||
+ self._document_warning_issue_count
|
||||
self.document_error_count
|
||||
+ self.document_warning_count
|
||||
+ self.global_warning_count
|
||||
)
|
||||
|
||||
|
||||
@@ -57,6 +57,7 @@ if settings.AUDIT_LOG_ENABLED:
|
||||
from documents import bulk_edit
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.filters import CustomFieldQueryParser
|
||||
from documents.models import OCR_SUPPORTED_FIELD_TYPES
|
||||
from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
@@ -64,6 +65,8 @@ from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import Note
|
||||
from documents.models import OcrTemplate
|
||||
from documents.models import OcrTemplateZone
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import SavedView
|
||||
from documents.models import SavedViewFilterRule
|
||||
@@ -3501,3 +3504,129 @@ class StoragePathTestSerializer(SerializerWithPerms):
|
||||
"documents.view_document",
|
||||
Document,
|
||||
)
|
||||
|
||||
|
||||
class OcrTemplateZoneSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = OcrTemplateZone
|
||||
fields = [
|
||||
"id",
|
||||
"name",
|
||||
"target",
|
||||
"custom_field",
|
||||
"page",
|
||||
"x",
|
||||
"y",
|
||||
"width",
|
||||
"height",
|
||||
"ocr_language",
|
||||
"transform",
|
||||
"date_format",
|
||||
"order",
|
||||
"zone_source_width",
|
||||
"zone_source_height",
|
||||
"validation_regex",
|
||||
]
|
||||
|
||||
def validate_width(self, value):
|
||||
if value < 1:
|
||||
raise serializers.ValidationError("Width must be at least 1.")
|
||||
return value
|
||||
|
||||
def validate_height(self, value):
|
||||
if value < 1:
|
||||
raise serializers.ValidationError("Height must be at least 1.")
|
||||
return value
|
||||
|
||||
def validate_custom_field(self, value):
|
||||
if value is None:
|
||||
# Built-in target (title/asn/created) — no custom field required.
|
||||
return value
|
||||
if value.data_type not in OCR_SUPPORTED_FIELD_TYPES:
|
||||
raise serializers.ValidationError(
|
||||
f"Custom field type '{value.data_type}' is not supported for OCR extraction. "
|
||||
f"Use string, integer, float, date, monetary, boolean, URL, or long text.",
|
||||
)
|
||||
return value
|
||||
|
||||
|
||||
class OcrTemplateSerializer(serializers.ModelSerializer):
|
||||
zones = OcrTemplateZoneSerializer(many=True, required=False)
|
||||
|
||||
class Meta:
|
||||
model = OcrTemplate
|
||||
fields = [
|
||||
"id",
|
||||
"name",
|
||||
"document_type",
|
||||
"source_width",
|
||||
"source_height",
|
||||
"sample_document",
|
||||
"enabled",
|
||||
"combine_formats",
|
||||
"created",
|
||||
"updated",
|
||||
"zones",
|
||||
]
|
||||
read_only_fields = ["created", "updated"]
|
||||
|
||||
def validate_source_width(self, value):
|
||||
if value < 1:
|
||||
raise serializers.ValidationError("Source width must be at least 1.")
|
||||
return value
|
||||
|
||||
def validate_source_height(self, value):
|
||||
if value < 1:
|
||||
raise serializers.ValidationError("Source height must be at least 1.")
|
||||
return value
|
||||
|
||||
def validate_zones(self, zones_data):
|
||||
"""Validate zone coordinates are within the source dimensions."""
|
||||
# source_width/height may not be in initial_data during partial updates
|
||||
source_width = self.initial_data.get("source_width") or (
|
||||
self.instance.source_width if self.instance else None
|
||||
)
|
||||
source_height = self.initial_data.get("source_height") or (
|
||||
self.instance.source_height if self.instance else None
|
||||
)
|
||||
|
||||
if source_width and source_height:
|
||||
for zone in zones_data:
|
||||
x = zone.get("x", 0)
|
||||
y = zone.get("y", 0)
|
||||
w = zone.get("width", 0)
|
||||
h = zone.get("height", 0)
|
||||
if x + w > int(source_width):
|
||||
raise serializers.ValidationError(
|
||||
f"Zone '{zone.get('name', '?')}' extends beyond source width "
|
||||
f"({x + w} > {source_width}).",
|
||||
)
|
||||
if y + h > int(source_height):
|
||||
raise serializers.ValidationError(
|
||||
f"Zone '{zone.get('name', '?')}' extends beyond source height "
|
||||
f"({y + h} > {source_height}).",
|
||||
)
|
||||
|
||||
return zones_data
|
||||
|
||||
def create(self, validated_data):
|
||||
zones_data = validated_data.pop("zones", [])
|
||||
template = OcrTemplate.objects.create(**validated_data)
|
||||
for zone_data in zones_data:
|
||||
OcrTemplateZone.objects.create(template=template, **zone_data)
|
||||
return template
|
||||
|
||||
def update(self, instance, validated_data):
|
||||
zones_data = validated_data.pop("zones", None)
|
||||
|
||||
for attr, value in validated_data.items():
|
||||
setattr(instance, attr, value)
|
||||
instance.save()
|
||||
|
||||
if zones_data is not None:
|
||||
# Replace all zones with the new set
|
||||
instance.zones.all().delete()
|
||||
for zone_data in zones_data:
|
||||
OcrTemplateZone.objects.create(template=instance, **zone_data)
|
||||
|
||||
return instance
|
||||
|
||||
@@ -1340,6 +1340,75 @@ def close_connection_pool_on_worker_init(**kwargs) -> None:
|
||||
conn.close_pool()
|
||||
|
||||
|
||||
def run_zone_ocr_extraction(sender, document, original_file=None, **kwargs):
|
||||
"""
|
||||
Run zone-based OCR extraction if the document's type has an active template.
|
||||
"""
|
||||
try:
|
||||
from documents.zone_ocr import run_zone_extraction
|
||||
|
||||
run_zone_extraction(document, Path(original_file) if original_file else None)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Zone OCR extraction failed for document %s",
|
||||
document.pk,
|
||||
)
|
||||
|
||||
|
||||
def capture_old_document_type(sender, instance, **kwargs):
|
||||
"""pre_save: remember the document's previous type so the post_save handler
|
||||
can tell whether the type actually changed (vs. every other save)."""
|
||||
if instance.pk:
|
||||
instance._old_document_type_id = (
|
||||
Document.objects.filter(pk=instance.pk)
|
||||
.values_list("document_type_id", flat=True)
|
||||
.first()
|
||||
)
|
||||
else:
|
||||
instance._old_document_type_id = None
|
||||
|
||||
|
||||
def run_zone_ocr_on_type_change(sender, instance, *, created=False, **kwargs):
|
||||
"""
|
||||
Run zone OCR only when a document's TYPE actually changes (and the new type
|
||||
has an enabled template). NOT on every save — zone OCR overwrites fields, so
|
||||
re-running it on each edit would clobber the user's changes. Newly created
|
||||
documents are handled by the consumption signal, and the user can always
|
||||
trigger extraction manually via the run-zone-ocr action.
|
||||
"""
|
||||
if created or not instance.pk or not instance.document_type_id:
|
||||
return
|
||||
|
||||
# Only proceed if the type changed compared to what was in the DB before.
|
||||
old_type = getattr(instance, "_old_document_type_id", None)
|
||||
if old_type == instance.document_type_id:
|
||||
return
|
||||
|
||||
from documents.models import OcrTemplate
|
||||
|
||||
if not OcrTemplate.objects.filter(
|
||||
document_type_id=instance.document_type_id,
|
||||
enabled=True,
|
||||
).exists():
|
||||
return
|
||||
|
||||
try:
|
||||
from documents.zone_ocr import run_zone_extraction
|
||||
|
||||
doc_path = instance.archive_path or instance.source_path
|
||||
if doc_path and Path(doc_path).is_file():
|
||||
logger.info(
|
||||
"Zone OCR: running extraction for document %d (type %d)",
|
||||
instance.pk,
|
||||
instance.document_type_id,
|
||||
)
|
||||
run_zone_extraction(instance, None)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Zone OCR extraction failed for document %s",
|
||||
instance.pk,
|
||||
)
|
||||
|
||||
@worker_process_shutdown.connect
|
||||
def close_connection_pool_on_worker_shutdown(**kwargs) -> None: # pragma: no cover
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,449 @@
|
||||
"""Tests for the OCR Template API."""
|
||||
|
||||
import json
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.models import CustomField
|
||||
from documents.models import DocumentType
|
||||
from documents.models import OcrTemplate
|
||||
from documents.models import OcrTemplateZone
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestOcrTemplatesAPI(DirectoriesMixin, APITestCase):
|
||||
ENDPOINT = "/api/ocr_templates/"
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.user = User.objects.create_superuser(username="temp_admin")
|
||||
self.client.force_authenticate(user=self.user)
|
||||
|
||||
self.doc_type = DocumentType.objects.create(name="Invoice")
|
||||
self.custom_field_text = CustomField.objects.create(
|
||||
name="Invoice Number",
|
||||
data_type=CustomField.FieldDataType.STRING,
|
||||
)
|
||||
self.custom_field_date = CustomField.objects.create(
|
||||
name="Invoice Date",
|
||||
data_type=CustomField.FieldDataType.DATE,
|
||||
)
|
||||
self.custom_field_int = CustomField.objects.create(
|
||||
name="Amount",
|
||||
data_type=CustomField.FieldDataType.INT,
|
||||
)
|
||||
self.custom_field_doclink = CustomField.objects.create(
|
||||
name="Related Docs",
|
||||
data_type=CustomField.FieldDataType.DOCUMENTLINK,
|
||||
)
|
||||
|
||||
return super().setUp()
|
||||
|
||||
def _make_template_data(self, **overrides):
|
||||
data = {
|
||||
"name": "Invoice Template",
|
||||
"document_type": self.doc_type.pk,
|
||||
"default_page": 0,
|
||||
"source_width": 2480,
|
||||
"source_height": 3508,
|
||||
"enabled": True,
|
||||
"zones": [],
|
||||
}
|
||||
data.update(overrides)
|
||||
return data
|
||||
|
||||
def _make_zone_data(self, **overrides):
|
||||
data = {
|
||||
"name": "Zone 1",
|
||||
"custom_field": self.custom_field_text.pk,
|
||||
"x": 100,
|
||||
"y": 100,
|
||||
"width": 200,
|
||||
"height": 50,
|
||||
"ocr_language": "deu+eng",
|
||||
"transform": "strip",
|
||||
"order": 0,
|
||||
}
|
||||
data.update(overrides)
|
||||
return data
|
||||
|
||||
# --- Create ---
|
||||
|
||||
def test_create_template(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A document type and custom fields exist
|
||||
WHEN:
|
||||
- API request to create an OCR template with one zone
|
||||
THEN:
|
||||
- The template and zone are created
|
||||
"""
|
||||
data = self._make_template_data(
|
||||
zones=[
|
||||
self._make_zone_data(
|
||||
name="Invoice Number",
|
||||
x=1500,
|
||||
y=200,
|
||||
width=800,
|
||||
height=100,
|
||||
),
|
||||
],
|
||||
)
|
||||
resp = self.client.post(
|
||||
self.ENDPOINT,
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_201_CREATED)
|
||||
|
||||
result = resp.json()
|
||||
self.assertEqual(result["name"], "Invoice Template")
|
||||
self.assertEqual(result["document_type"], self.doc_type.pk)
|
||||
self.assertEqual(len(result["zones"]), 1)
|
||||
self.assertEqual(result["zones"][0]["name"], "Invoice Number")
|
||||
self.assertEqual(OcrTemplate.objects.count(), 1)
|
||||
self.assertEqual(OcrTemplateZone.objects.count(), 1)
|
||||
|
||||
def test_create_template_multiple_zones(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Multiple custom fields exist
|
||||
WHEN:
|
||||
- A template with multiple zones is created
|
||||
THEN:
|
||||
- All zones are created
|
||||
"""
|
||||
data = self._make_template_data(
|
||||
zones=[
|
||||
self._make_zone_data(
|
||||
name="Invoice Number",
|
||||
custom_field=self.custom_field_text.pk,
|
||||
),
|
||||
self._make_zone_data(
|
||||
name="Invoice Date",
|
||||
custom_field=self.custom_field_date.pk,
|
||||
order=1,
|
||||
),
|
||||
],
|
||||
)
|
||||
resp = self.client.post(
|
||||
self.ENDPOINT,
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_201_CREATED)
|
||||
self.assertEqual(len(resp.json()["zones"]), 2)
|
||||
self.assertEqual(OcrTemplateZone.objects.count(), 2)
|
||||
|
||||
def test_create_template_no_zones(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Valid template data without zones
|
||||
WHEN:
|
||||
- Template is created
|
||||
THEN:
|
||||
- Template is created with no zones
|
||||
"""
|
||||
data = self._make_template_data()
|
||||
resp = self.client.post(
|
||||
self.ENDPOINT,
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_201_CREATED)
|
||||
self.assertEqual(len(resp.json()["zones"]), 0)
|
||||
|
||||
# --- Validation ---
|
||||
|
||||
def test_create_template_zero_source_width_rejected(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Template data with source_width=0
|
||||
WHEN:
|
||||
- Create is attempted
|
||||
THEN:
|
||||
- 400 error is returned
|
||||
"""
|
||||
data = self._make_template_data(source_width=0)
|
||||
resp = self.client.post(
|
||||
self.ENDPOINT,
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
def test_create_template_zero_source_height_rejected(self):
|
||||
data = self._make_template_data(source_height=0)
|
||||
resp = self.client.post(
|
||||
self.ENDPOINT,
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
def test_create_zone_zero_width_rejected(self):
|
||||
data = self._make_template_data(
|
||||
zones=[self._make_zone_data(width=0)],
|
||||
)
|
||||
resp = self.client.post(
|
||||
self.ENDPOINT,
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
def test_create_zone_zero_height_rejected(self):
|
||||
data = self._make_template_data(
|
||||
zones=[self._make_zone_data(height=0)],
|
||||
)
|
||||
resp = self.client.post(
|
||||
self.ENDPOINT,
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
def test_create_zone_exceeds_source_width_rejected(self):
|
||||
"""Zone that extends beyond the source image width should be rejected."""
|
||||
data = self._make_template_data(
|
||||
source_width=1000,
|
||||
zones=[self._make_zone_data(x=800, width=300)], # 800+300 > 1000
|
||||
)
|
||||
resp = self.client.post(
|
||||
self.ENDPOINT,
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
def test_create_zone_exceeds_source_height_rejected(self):
|
||||
data = self._make_template_data(
|
||||
source_height=1000,
|
||||
zones=[self._make_zone_data(y=900, height=200)], # 900+200 > 1000
|
||||
)
|
||||
resp = self.client.post(
|
||||
self.ENDPOINT,
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
def test_create_zone_unsupported_custom_field_type_rejected(self):
|
||||
"""DOCUMENTLINK and SELECT fields can't be populated via OCR."""
|
||||
data = self._make_template_data(
|
||||
zones=[self._make_zone_data(custom_field=self.custom_field_doclink.pk)],
|
||||
)
|
||||
resp = self.client.post(
|
||||
self.ENDPOINT,
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
# --- List ---
|
||||
|
||||
def test_list_templates(self):
|
||||
template = OcrTemplate.objects.create(
|
||||
name="Test Template",
|
||||
document_type=self.doc_type,
|
||||
source_width=2480,
|
||||
source_height=3508,
|
||||
)
|
||||
OcrTemplateZone.objects.create(
|
||||
template=template,
|
||||
name="Zone 1",
|
||||
custom_field=self.custom_field_text,
|
||||
x=100,
|
||||
y=100,
|
||||
width=200,
|
||||
height=50,
|
||||
)
|
||||
|
||||
resp = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
data = resp.json()
|
||||
self.assertEqual(data["count"], 1)
|
||||
self.assertEqual(len(data["results"][0]["zones"]), 1)
|
||||
|
||||
def test_list_empty(self):
|
||||
resp = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(resp.json()["count"], 0)
|
||||
|
||||
# --- Update ---
|
||||
|
||||
def test_update_template_replaces_zones(self):
|
||||
"""PUT should replace all zones with the new set."""
|
||||
template = OcrTemplate.objects.create(
|
||||
name="Old Name",
|
||||
document_type=self.doc_type,
|
||||
source_width=2480,
|
||||
source_height=3508,
|
||||
)
|
||||
OcrTemplateZone.objects.create(
|
||||
template=template,
|
||||
name="Old Zone",
|
||||
custom_field=self.custom_field_text,
|
||||
x=0,
|
||||
y=0,
|
||||
width=100,
|
||||
height=100,
|
||||
)
|
||||
|
||||
data = self._make_template_data(
|
||||
name="New Name",
|
||||
zones=[
|
||||
self._make_zone_data(
|
||||
name="New Zone",
|
||||
custom_field=self.custom_field_date.pk,
|
||||
),
|
||||
],
|
||||
)
|
||||
resp = self.client.put(
|
||||
f"{self.ENDPOINT}{template.pk}/",
|
||||
data=json.dumps(data),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
|
||||
template.refresh_from_db()
|
||||
self.assertEqual(template.name, "New Name")
|
||||
self.assertEqual(OcrTemplateZone.objects.count(), 1)
|
||||
self.assertEqual(OcrTemplateZone.objects.first().name, "New Zone")
|
||||
|
||||
# --- Delete ---
|
||||
|
||||
def test_delete_template_cascades_zones(self):
|
||||
template = OcrTemplate.objects.create(
|
||||
name="To Delete",
|
||||
document_type=self.doc_type,
|
||||
source_width=2480,
|
||||
source_height=3508,
|
||||
)
|
||||
OcrTemplateZone.objects.create(
|
||||
template=template,
|
||||
name="Zone",
|
||||
custom_field=self.custom_field_text,
|
||||
x=0,
|
||||
y=0,
|
||||
width=100,
|
||||
height=100,
|
||||
)
|
||||
|
||||
resp = self.client.delete(f"{self.ENDPOINT}{template.pk}/")
|
||||
self.assertEqual(resp.status_code, status.HTTP_204_NO_CONTENT)
|
||||
self.assertEqual(OcrTemplate.objects.count(), 0)
|
||||
self.assertEqual(OcrTemplateZone.objects.count(), 0)
|
||||
|
||||
def test_delete_nonexistent_returns_404(self):
|
||||
resp = self.client.delete(f"{self.ENDPOINT}99999/")
|
||||
self.assertEqual(resp.status_code, status.HTTP_404_NOT_FOUND)
|
||||
|
||||
# --- Patch ---
|
||||
|
||||
def test_patch_toggle_enabled(self):
|
||||
template = OcrTemplate.objects.create(
|
||||
name="Toggle Test",
|
||||
document_type=self.doc_type,
|
||||
source_width=2480,
|
||||
source_height=3508,
|
||||
enabled=True,
|
||||
)
|
||||
|
||||
resp = self.client.patch(
|
||||
f"{self.ENDPOINT}{template.pk}/",
|
||||
data=json.dumps({"enabled": False}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
template.refresh_from_db()
|
||||
self.assertFalse(template.enabled)
|
||||
|
||||
def test_patch_preserves_zones(self):
|
||||
"""PATCH without zones field should not delete existing zones."""
|
||||
template = OcrTemplate.objects.create(
|
||||
name="Patch Test",
|
||||
document_type=self.doc_type,
|
||||
source_width=2480,
|
||||
source_height=3508,
|
||||
)
|
||||
OcrTemplateZone.objects.create(
|
||||
template=template,
|
||||
name="Existing Zone",
|
||||
custom_field=self.custom_field_text,
|
||||
x=0,
|
||||
y=0,
|
||||
width=100,
|
||||
height=100,
|
||||
)
|
||||
|
||||
resp = self.client.patch(
|
||||
f"{self.ENDPOINT}{template.pk}/",
|
||||
data=json.dumps({"name": "Updated Name"}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(OcrTemplateZone.objects.count(), 1)
|
||||
|
||||
# --- Auth ---
|
||||
|
||||
def test_unauthenticated_rejected(self):
|
||||
self.client.logout()
|
||||
resp = self.client.get(self.ENDPOINT)
|
||||
self.assertIn(
|
||||
resp.status_code,
|
||||
(status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN),
|
||||
)
|
||||
|
||||
# --- Quick create field ---
|
||||
|
||||
def test_quick_create_field(self):
|
||||
"""Creating a custom field inline from the template editor."""
|
||||
resp = self.client.post(
|
||||
f"{self.ENDPOINT}quick-create-field/",
|
||||
data=json.dumps({"name": "New Field", "data_type": "string"}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_201_CREATED)
|
||||
data = resp.json()
|
||||
self.assertEqual(data["name"], "New Field")
|
||||
self.assertEqual(data["data_type"], "string")
|
||||
self.assertTrue(data["created"])
|
||||
self.assertTrue(CustomField.objects.filter(name="New Field").exists())
|
||||
|
||||
def test_quick_create_field_existing(self):
|
||||
"""If a field with the same name exists, return it without creating."""
|
||||
resp = self.client.post(
|
||||
f"{self.ENDPOINT}quick-create-field/",
|
||||
data=json.dumps({"name": "Invoice Number", "data_type": "string"}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
data = resp.json()
|
||||
self.assertEqual(data["id"], self.custom_field_text.pk)
|
||||
self.assertFalse(data["created"])
|
||||
|
||||
def test_quick_create_field_empty_name_rejected(self):
|
||||
resp = self.client.post(
|
||||
f"{self.ENDPOINT}quick-create-field/",
|
||||
data=json.dumps({"name": "", "data_type": "string"}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
def test_quick_create_field_unsupported_type_rejected(self):
|
||||
resp = self.client.post(
|
||||
f"{self.ENDPOINT}quick-create-field/",
|
||||
data=json.dumps({"name": "Bad Field", "data_type": "documentlink"}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
def test_quick_create_field_select_type_rejected(self):
|
||||
resp = self.client.post(
|
||||
f"{self.ENDPOINT}quick-create-field/",
|
||||
data=json.dumps({"name": "Bad Field", "data_type": "select"}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
@@ -12,7 +12,6 @@ from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from documents.sanity_checker import SanityCheckMessages
|
||||
from documents.sanity_checker import check_sanity
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -22,26 +21,6 @@ if TYPE_CHECKING:
|
||||
from documents.tests.conftest import PaperlessDirs
|
||||
|
||||
|
||||
class TestSanityCheckMessages:
|
||||
def test_document_counts_are_unique_per_severity(self) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
|
||||
messages.error(1, "first error")
|
||||
messages.error(1, "second error")
|
||||
messages.warning(1, "first warning")
|
||||
messages.warning(1, "second warning")
|
||||
messages.info(1, "first info")
|
||||
messages.info(1, "second info")
|
||||
messages.warning(None, "global warning")
|
||||
|
||||
assert messages.document_count == 1
|
||||
assert messages.document_error_count == 1
|
||||
assert messages.document_warning_count == 1
|
||||
assert messages.document_info_count == 1
|
||||
assert messages.global_warning_count == 1
|
||||
assert messages.total_issue_count == 5
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityNoDocuments:
|
||||
"""Sanity checks against an empty archive."""
|
||||
|
||||
@@ -0,0 +1,454 @@
|
||||
"""Tests for the zone-based OCR extraction engine."""
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import OcrTemplate
|
||||
from documents.models import OcrTemplateZone
|
||||
from documents.zone_ocr import _apply_transform
|
||||
from documents.zone_ocr import _convert_value
|
||||
from documents.zone_ocr import _detect_mime
|
||||
from documents.zone_ocr import _resolve_doc_path
|
||||
from documents.zone_ocr import run_zone_extraction
|
||||
|
||||
|
||||
class TestApplyTransform(TestCase):
|
||||
"""Tests for the _apply_transform function."""
|
||||
|
||||
def test_strip(self):
|
||||
self.assertEqual(_apply_transform(" hello ", "strip"), "hello")
|
||||
|
||||
def test_none_transform(self):
|
||||
self.assertEqual(_apply_transform(" hello ", "none"), "hello")
|
||||
|
||||
def test_uppercase(self):
|
||||
self.assertEqual(_apply_transform("hello world", "uppercase"), "HELLO WORLD")
|
||||
|
||||
def test_lowercase(self):
|
||||
self.assertEqual(_apply_transform("HELLO WORLD", "lowercase"), "hello world")
|
||||
|
||||
def test_numeric_basic(self):
|
||||
self.assertEqual(_apply_transform("INV-2026-001", "numeric"), "2026-001")
|
||||
|
||||
def test_numeric_with_currency(self):
|
||||
self.assertEqual(_apply_transform("€1,234.56", "numeric"), "1,234.56")
|
||||
|
||||
def test_numeric_empty_result_falls_back(self):
|
||||
self.assertEqual(_apply_transform("abc", "numeric"), "abc")
|
||||
|
||||
def test_date_dmy_dots(self):
|
||||
self.assertEqual(_apply_transform("13.04.2026", "date_dmy"), "2026-04-13")
|
||||
|
||||
def test_date_dmy_slashes(self):
|
||||
self.assertEqual(_apply_transform("01/12/2025", "date_dmy"), "2025-12-01")
|
||||
|
||||
def test_date_dmy_two_digit_year(self):
|
||||
self.assertEqual(_apply_transform("13.04.26", "date_dmy"), "2026-04-13")
|
||||
|
||||
def test_date_dmy_with_prefix(self):
|
||||
self.assertEqual(_apply_transform("Date: 01/12/2025", "date_dmy"), "2025-12-01")
|
||||
|
||||
def test_date_dmy_invalid_falls_back(self):
|
||||
self.assertEqual(_apply_transform("32.13.2026", "date_dmy"), "32.13.2026")
|
||||
|
||||
def test_date_dmy_no_match_falls_back(self):
|
||||
self.assertEqual(_apply_transform("not a date", "date_dmy"), "not a date")
|
||||
|
||||
def test_date_ymd_dashes(self):
|
||||
self.assertEqual(_apply_transform("2026-04-13", "date_ymd"), "2026-04-13")
|
||||
|
||||
def test_date_ymd_slashes(self):
|
||||
self.assertEqual(_apply_transform("2026/04/13", "date_ymd"), "2026-04-13")
|
||||
|
||||
def test_date_ymd_invalid_falls_back(self):
|
||||
self.assertEqual(_apply_transform("2026-13-32", "date_ymd"), "2026-13-32")
|
||||
|
||||
def test_empty_string(self):
|
||||
self.assertEqual(_apply_transform("", "strip"), "")
|
||||
|
||||
def test_whitespace_only(self):
|
||||
self.assertEqual(_apply_transform(" ", "strip"), "")
|
||||
|
||||
def test_unknown_transform_strips(self):
|
||||
self.assertEqual(_apply_transform(" hello ", "unknown"), "hello")
|
||||
|
||||
|
||||
class TestConvertValue(TestCase):
|
||||
"""Tests for the _convert_value function."""
|
||||
|
||||
def test_string(self):
|
||||
self.assertEqual(
|
||||
_convert_value("Hello", CustomField.FieldDataType.STRING),
|
||||
"Hello",
|
||||
)
|
||||
|
||||
def test_string_truncation(self):
|
||||
result = _convert_value("x" * 200, CustomField.FieldDataType.STRING)
|
||||
self.assertEqual(len(result), 128)
|
||||
|
||||
def test_url(self):
|
||||
self.assertEqual(
|
||||
_convert_value("https://example.com", CustomField.FieldDataType.URL),
|
||||
"https://example.com",
|
||||
)
|
||||
|
||||
def test_long_text(self):
|
||||
long = "x" * 500
|
||||
self.assertEqual(
|
||||
_convert_value(long, CustomField.FieldDataType.LONG_TEXT),
|
||||
long,
|
||||
)
|
||||
|
||||
def test_int_simple(self):
|
||||
self.assertEqual(_convert_value("42", CustomField.FieldDataType.INT), 42)
|
||||
|
||||
def test_int_with_noise(self):
|
||||
self.assertEqual(_convert_value("INV-123", CustomField.FieldDataType.INT), 123)
|
||||
|
||||
def test_int_negative(self):
|
||||
self.assertEqual(_convert_value("-42", CustomField.FieldDataType.INT), -42)
|
||||
|
||||
def test_int_empty_returns_none(self):
|
||||
self.assertIsNone(_convert_value("abc", CustomField.FieldDataType.INT))
|
||||
|
||||
def test_int_only_dash_returns_none(self):
|
||||
self.assertIsNone(_convert_value("-", CustomField.FieldDataType.INT))
|
||||
|
||||
def test_float_simple(self):
|
||||
self.assertAlmostEqual(
|
||||
_convert_value("1234.56", CustomField.FieldDataType.FLOAT),
|
||||
1234.56,
|
||||
)
|
||||
|
||||
def test_float_european_format(self):
|
||||
self.assertAlmostEqual(
|
||||
_convert_value("1.234,56", CustomField.FieldDataType.FLOAT),
|
||||
1234.56,
|
||||
)
|
||||
|
||||
def test_float_us_format(self):
|
||||
self.assertAlmostEqual(
|
||||
_convert_value("1,234.56", CustomField.FieldDataType.FLOAT),
|
||||
1234.56,
|
||||
)
|
||||
|
||||
def test_float_comma_only(self):
|
||||
self.assertAlmostEqual(
|
||||
_convert_value("1234,56", CustomField.FieldDataType.FLOAT),
|
||||
1234.56,
|
||||
)
|
||||
|
||||
def test_float_empty_returns_none(self):
|
||||
self.assertIsNone(_convert_value("abc", CustomField.FieldDataType.FLOAT))
|
||||
|
||||
def test_float_only_separator_returns_none(self):
|
||||
self.assertIsNone(_convert_value(",", CustomField.FieldDataType.FLOAT))
|
||||
|
||||
def test_date_iso(self):
|
||||
self.assertEqual(
|
||||
_convert_value("2026-04-13", CustomField.FieldDataType.DATE),
|
||||
"2026-04-13",
|
||||
)
|
||||
|
||||
def test_date_invalid_returns_none(self):
|
||||
self.assertIsNone(_convert_value("not a date", CustomField.FieldDataType.DATE))
|
||||
|
||||
def test_date_invalid_values_returns_none(self):
|
||||
self.assertIsNone(_convert_value("2026-13-32", CustomField.FieldDataType.DATE))
|
||||
|
||||
def test_monetary_simple(self):
|
||||
self.assertEqual(
|
||||
_convert_value("123.45", CustomField.FieldDataType.MONETARY),
|
||||
"123.45",
|
||||
)
|
||||
|
||||
def test_monetary_european(self):
|
||||
self.assertEqual(
|
||||
_convert_value("1.234,56", CustomField.FieldDataType.MONETARY),
|
||||
"1234.56",
|
||||
)
|
||||
|
||||
def test_monetary_with_currency_symbol(self):
|
||||
self.assertEqual(
|
||||
_convert_value("€1,234.56", CustomField.FieldDataType.MONETARY),
|
||||
"1234.56",
|
||||
)
|
||||
|
||||
def test_monetary_empty_returns_none(self):
|
||||
self.assertIsNone(_convert_value("CHF", CustomField.FieldDataType.MONETARY))
|
||||
|
||||
def test_bool_true(self):
|
||||
for val in ("true", "True", "yes", "1", "ja", "x", "X"):
|
||||
self.assertTrue(
|
||||
_convert_value(val, CustomField.FieldDataType.BOOL),
|
||||
f"Expected True for {val!r}",
|
||||
)
|
||||
|
||||
def test_bool_false(self):
|
||||
for val in ("false", "False", "no", "0", "nein"):
|
||||
self.assertFalse(
|
||||
_convert_value(val, CustomField.FieldDataType.BOOL),
|
||||
f"Expected False for {val!r}",
|
||||
)
|
||||
|
||||
def test_bool_unknown_returns_none(self):
|
||||
self.assertIsNone(_convert_value("maybe", CustomField.FieldDataType.BOOL))
|
||||
|
||||
def test_unsupported_type_returns_none(self):
|
||||
self.assertIsNone(
|
||||
_convert_value("test", CustomField.FieldDataType.DOCUMENTLINK),
|
||||
)
|
||||
self.assertIsNone(
|
||||
_convert_value("test", CustomField.FieldDataType.SELECT),
|
||||
)
|
||||
|
||||
def test_empty_string_returns_none(self):
|
||||
self.assertIsNone(_convert_value("", CustomField.FieldDataType.STRING))
|
||||
|
||||
|
||||
class TestDetectMime(TestCase):
|
||||
"""Tests for _detect_mime."""
|
||||
|
||||
def test_pdf_extension(self):
|
||||
self.assertEqual(_detect_mime(Path("test.pdf")), "application/pdf")
|
||||
|
||||
def test_png_extension(self):
|
||||
self.assertEqual(_detect_mime(Path("test.png")), "image/png")
|
||||
|
||||
def test_jpg_extension(self):
|
||||
self.assertEqual(_detect_mime(Path("test.jpg")), "image/jpeg")
|
||||
|
||||
def test_unknown_extension(self):
|
||||
self.assertIsNone(_detect_mime(Path("test.xyz")))
|
||||
|
||||
def test_webp_extension(self):
|
||||
self.assertEqual(_detect_mime(Path("test.webp")), "image/webp")
|
||||
|
||||
|
||||
class TestResolveDocPath(TestCase):
|
||||
"""Tests for _resolve_doc_path."""
|
||||
|
||||
def test_returns_none_when_no_files_exist(self):
|
||||
doc = MagicMock()
|
||||
doc.has_archive_version = False
|
||||
doc.source_path = Path("/nonexistent/source.pdf")
|
||||
result = _resolve_doc_path(doc, None)
|
||||
self.assertIsNone(result)
|
||||
|
||||
def test_returns_original_file_as_fallback(self):
|
||||
doc = MagicMock()
|
||||
doc.has_archive_version = False
|
||||
doc.source_path = Path("/nonexistent/source.pdf")
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".pdf") as f:
|
||||
result = _resolve_doc_path(doc, Path(f.name))
|
||||
self.assertEqual(result, Path(f.name))
|
||||
|
||||
def test_returns_none_for_none_original_file(self):
|
||||
doc = MagicMock()
|
||||
doc.has_archive_version = False
|
||||
doc.source_path = Path("/nonexistent/source.pdf")
|
||||
result = _resolve_doc_path(doc, None)
|
||||
self.assertIsNone(result)
|
||||
|
||||
|
||||
class TestRunZoneExtraction(TestCase):
|
||||
"""Tests for the full extraction pipeline."""
|
||||
|
||||
def setUp(self):
|
||||
self.doc_type = DocumentType.objects.create(name="Invoice")
|
||||
self.custom_field = CustomField.objects.create(
|
||||
name="Invoice Number",
|
||||
data_type=CustomField.FieldDataType.STRING,
|
||||
)
|
||||
|
||||
def test_skips_document_without_type(self):
|
||||
doc = Document.objects.create(
|
||||
title="No Type",
|
||||
content="test",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
run_zone_extraction(doc, Path("/nonexistent"))
|
||||
self.assertEqual(CustomFieldInstance.objects.count(), 0)
|
||||
|
||||
def test_skips_document_without_matching_template(self):
|
||||
other_type = DocumentType.objects.create(name="Other")
|
||||
doc = Document.objects.create(
|
||||
title="No Template",
|
||||
content="test",
|
||||
mime_type="application/pdf",
|
||||
document_type=other_type,
|
||||
)
|
||||
run_zone_extraction(doc, Path("/nonexistent"))
|
||||
self.assertEqual(CustomFieldInstance.objects.count(), 0)
|
||||
|
||||
def test_skips_disabled_template(self):
|
||||
template = OcrTemplate.objects.create(
|
||||
name="Disabled",
|
||||
document_type=self.doc_type,
|
||||
source_width=2480,
|
||||
source_height=3508,
|
||||
enabled=False,
|
||||
)
|
||||
OcrTemplateZone.objects.create(
|
||||
template=template,
|
||||
name="Zone",
|
||||
custom_field=self.custom_field,
|
||||
x=0,
|
||||
y=0,
|
||||
width=100,
|
||||
height=50,
|
||||
)
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="Test",
|
||||
content="test",
|
||||
mime_type="application/pdf",
|
||||
document_type=self.doc_type,
|
||||
)
|
||||
run_zone_extraction(doc, Path("/nonexistent"))
|
||||
self.assertEqual(CustomFieldInstance.objects.count(), 0)
|
||||
|
||||
def test_skips_template_with_no_zones(self):
|
||||
OcrTemplate.objects.create(
|
||||
name="Empty",
|
||||
document_type=self.doc_type,
|
||||
source_width=2480,
|
||||
source_height=3508,
|
||||
enabled=True,
|
||||
)
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="Test",
|
||||
content="test",
|
||||
mime_type="application/pdf",
|
||||
document_type=self.doc_type,
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".pdf") as f:
|
||||
f.write(b"%PDF-1.4 fake")
|
||||
f.flush()
|
||||
run_zone_extraction(doc, Path(f.name))
|
||||
self.assertEqual(CustomFieldInstance.objects.count(), 0)
|
||||
|
||||
@patch("documents.zone_ocr._process_template")
|
||||
def test_calls_process_for_enabled_template(self, mock_process):
|
||||
template = OcrTemplate.objects.create(
|
||||
name="Active",
|
||||
document_type=self.doc_type,
|
||||
source_width=2480,
|
||||
source_height=3508,
|
||||
enabled=True,
|
||||
)
|
||||
OcrTemplateZone.objects.create(
|
||||
template=template,
|
||||
name="Zone",
|
||||
custom_field=self.custom_field,
|
||||
x=0,
|
||||
y=0,
|
||||
width=100,
|
||||
height=50,
|
||||
)
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="Test",
|
||||
content="test",
|
||||
mime_type="application/pdf",
|
||||
document_type=self.doc_type,
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".pdf") as f:
|
||||
f.write(b"%PDF-1.4 fake")
|
||||
f.flush()
|
||||
run_zone_extraction(doc, Path(f.name))
|
||||
|
||||
self.assertTrue(mock_process.called)
|
||||
|
||||
@patch("documents.zone_ocr._process_template")
|
||||
def test_handles_process_exception_gracefully(self, mock_process):
|
||||
"""A failing template should not prevent other templates from running."""
|
||||
mock_process.side_effect = RuntimeError("test error")
|
||||
|
||||
template = OcrTemplate.objects.create(
|
||||
name="Failing",
|
||||
document_type=self.doc_type,
|
||||
source_width=2480,
|
||||
source_height=3508,
|
||||
enabled=True,
|
||||
)
|
||||
OcrTemplateZone.objects.create(
|
||||
template=template,
|
||||
name="Zone",
|
||||
custom_field=self.custom_field,
|
||||
x=0,
|
||||
y=0,
|
||||
width=100,
|
||||
height=50,
|
||||
)
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="Test",
|
||||
content="test",
|
||||
mime_type="application/pdf",
|
||||
document_type=self.doc_type,
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".pdf") as f:
|
||||
f.write(b"%PDF-1.4 fake")
|
||||
f.flush()
|
||||
# Should not raise
|
||||
run_zone_extraction(doc, Path(f.name))
|
||||
|
||||
def test_handles_none_original_file(self):
|
||||
"""Should not crash when original_file is None."""
|
||||
doc = Document.objects.create(
|
||||
title="Test",
|
||||
content="test",
|
||||
mime_type="application/pdf",
|
||||
document_type=self.doc_type,
|
||||
)
|
||||
# No template, so it exits early — but shouldn't crash on None
|
||||
run_zone_extraction(doc, None)
|
||||
|
||||
@patch("documents.zone_ocr._process_template")
|
||||
def test_multiple_templates_all_process(self, mock_process):
|
||||
"""Multiple enabled templates for the same type should all run."""
|
||||
for i in range(3):
|
||||
template = OcrTemplate.objects.create(
|
||||
name=f"Template {i}",
|
||||
document_type=self.doc_type,
|
||||
source_width=2480,
|
||||
source_height=3508,
|
||||
enabled=True,
|
||||
)
|
||||
OcrTemplateZone.objects.create(
|
||||
template=template,
|
||||
name=f"Zone {i}",
|
||||
custom_field=self.custom_field,
|
||||
x=0,
|
||||
y=0,
|
||||
width=100,
|
||||
height=50,
|
||||
)
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="Test",
|
||||
content="test",
|
||||
mime_type="application/pdf",
|
||||
document_type=self.doc_type,
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".pdf") as f:
|
||||
f.write(b"%PDF-1.4 fake")
|
||||
f.flush()
|
||||
run_zone_extraction(doc, Path(f.name))
|
||||
|
||||
self.assertEqual(mock_process.call_count, 3)
|
||||
@@ -3,6 +3,7 @@ import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
import zipfile
|
||||
from collections import defaultdict
|
||||
@@ -148,12 +149,14 @@ from documents.matching import match_correspondents
|
||||
from documents.matching import match_document_types
|
||||
from documents.matching import match_storage_paths
|
||||
from documents.matching import match_tags
|
||||
from documents.models import OCR_SUPPORTED_FIELD_TYPES
|
||||
from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Note
|
||||
from documents.models import OcrTemplate
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import SavedView
|
||||
from documents.models import ShareLink
|
||||
@@ -195,6 +198,7 @@ from documents.serialisers import EditPdfDocumentsSerializer
|
||||
from documents.serialisers import EmailSerializer
|
||||
from documents.serialisers import MergeDocumentsSerializer
|
||||
from documents.serialisers import NotesSerializer
|
||||
from documents.serialisers import OcrTemplateSerializer
|
||||
from documents.serialisers import PostDocumentSerializer
|
||||
from documents.serialisers import RemovePasswordDocumentsSerializer
|
||||
from documents.serialisers import ReprocessDocumentsSerializer
|
||||
@@ -2029,6 +2033,73 @@ class DocumentViewSet(
|
||||
},
|
||||
),
|
||||
)
|
||||
@action(methods=["post"], detail=True, url_path="run-zone-ocr")
|
||||
def run_zone_ocr(self, request, pk=None):
|
||||
"""Run zone-based OCR extraction on this document."""
|
||||
try:
|
||||
document = Document.objects.get(pk=pk)
|
||||
except Document.DoesNotExist:
|
||||
raise Http404
|
||||
|
||||
if not document.document_type_id:
|
||||
return Response(
|
||||
{"error": "Document has no type assigned"},
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
templates = OcrTemplate.objects.filter(
|
||||
document_type_id=document.document_type_id,
|
||||
enabled=True,
|
||||
)
|
||||
if not templates.exists():
|
||||
return Response(
|
||||
{"error": "No OCR templates found for this document type"},
|
||||
status=status.HTTP_404_NOT_FOUND,
|
||||
)
|
||||
|
||||
doc_path = document.archive_path or document.source_path
|
||||
if not doc_path or not Path(doc_path).is_file():
|
||||
return Response(
|
||||
{"error": "Document file not found"},
|
||||
status=status.HTTP_404_NOT_FOUND,
|
||||
)
|
||||
|
||||
from documents.zone_ocr import run_zone_extraction
|
||||
|
||||
run_zone_extraction(document, None)
|
||||
|
||||
# Collect results
|
||||
results = []
|
||||
builtin_labels = {"title": "Title", "asn": "ASN", "created": "Created"}
|
||||
for template in templates.prefetch_related("zones", "zones__custom_field"):
|
||||
for zone in template.zones.all():
|
||||
target = getattr(zone, "target", None) or "custom_field"
|
||||
if target == "custom_field" and zone.custom_field_id:
|
||||
cf_instance = document.custom_fields.filter(
|
||||
field=zone.custom_field,
|
||||
).first()
|
||||
field_name = zone.custom_field.name
|
||||
value = cf_instance.value if cf_instance else None
|
||||
else:
|
||||
field_name = builtin_labels.get(target, target)
|
||||
value = {
|
||||
"title": document.title,
|
||||
"asn": document.archive_serial_number,
|
||||
"created": document.created.isoformat()
|
||||
if document.created
|
||||
else None,
|
||||
}.get(target)
|
||||
results.append(
|
||||
{
|
||||
"template": template.name,
|
||||
"zone": zone.name,
|
||||
"custom_field": field_name,
|
||||
"value": value,
|
||||
},
|
||||
)
|
||||
|
||||
return Response({"results": results})
|
||||
|
||||
@action(
|
||||
methods=["delete"],
|
||||
detail=True,
|
||||
@@ -5269,3 +5340,224 @@ def serve_logo(request: HttpRequest, filename: str | None = None) -> FileRespons
|
||||
filename=app_logo.name,
|
||||
as_attachment=True,
|
||||
)
|
||||
|
||||
|
||||
class OcrTemplateViewSet(ModelViewSet):
|
||||
"""CRUD for OCR templates with zone definitions."""
|
||||
|
||||
queryset = (
|
||||
OcrTemplate.objects.all()
|
||||
.prefetch_related(
|
||||
"zones",
|
||||
"zones__custom_field",
|
||||
)
|
||||
.order_by("name")
|
||||
)
|
||||
serializer_class = OcrTemplateSerializer
|
||||
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
|
||||
pagination_class = StandardPagination
|
||||
|
||||
@action(
|
||||
detail=False,
|
||||
methods=["get"],
|
||||
url_path=r"document-page-image/(?P<doc_id>[0-9]+)/(?P<page>[0-9]+)",
|
||||
)
|
||||
def document_page_image(self, request, doc_id=None, page=None):
|
||||
"""Render a specific page of a document as a PNG image.
|
||||
|
||||
Used by the frontend template editor to display document pages
|
||||
as images that users can draw zones on.
|
||||
"""
|
||||
try:
|
||||
document = Document.objects.get(pk=doc_id)
|
||||
except Document.DoesNotExist:
|
||||
raise Http404("Document not found")
|
||||
|
||||
page_num = int(page)
|
||||
|
||||
# Validate page number
|
||||
if document.page_count and page_num >= document.page_count:
|
||||
raise Http404(
|
||||
f"Page {page_num} out of range (document has {document.page_count} pages)",
|
||||
)
|
||||
|
||||
doc_path = document.archive_path or document.source_path
|
||||
if not doc_path or not Path(doc_path).is_file():
|
||||
raise Http404("Document file not found")
|
||||
|
||||
# Check if document is an image (single page, no PDF rendering needed)
|
||||
if document.mime_type and document.mime_type.startswith("image/"):
|
||||
content = Path(doc_path).read_bytes()
|
||||
return HttpResponse(content, content_type=document.mime_type)
|
||||
|
||||
with tempfile.TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir:
|
||||
output_prefix = Path(tmp_dir) / "page"
|
||||
try:
|
||||
subprocess.run(
|
||||
[
|
||||
"pdftoppm",
|
||||
"-png",
|
||||
"-r",
|
||||
"150", # Lower DPI for preview
|
||||
"-f",
|
||||
str(page_num + 1),
|
||||
"-l",
|
||||
str(page_num + 1),
|
||||
str(doc_path),
|
||||
str(output_prefix),
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
timeout=30,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise Http404(
|
||||
f"Failed to render page: {e.stderr.decode(errors='replace')[:200]}",
|
||||
)
|
||||
except FileNotFoundError:
|
||||
raise Http404("pdftoppm not available - is poppler-utils installed?")
|
||||
|
||||
rendered = sorted(Path(tmp_dir).glob("page-*.png"))
|
||||
if not rendered:
|
||||
raise Http404("No rendered page found")
|
||||
|
||||
content = rendered[0].read_bytes()
|
||||
|
||||
return HttpResponse(content, content_type="image/png")
|
||||
|
||||
@action(detail=False, methods=["post"], url_path="test-zone")
|
||||
def test_zone(self, request):
|
||||
"""Run OCR on a single ad-hoc zone of a document and return what it
|
||||
yields: the raw OCR text, the transformed value, and whether the
|
||||
validation regex matches. Non-destructive - writes nothing. Used by the
|
||||
editor's per-zone test so a user can tune the zone/regex before saving.
|
||||
|
||||
Accepts: {"document": <id>, "zone": {x, y, width, height, page,
|
||||
ocr_language, transform, validation_regex, zone_source_width,
|
||||
zone_source_height}}.
|
||||
"""
|
||||
from documents.models import OcrTemplateZone
|
||||
from documents.zone_ocr import extract_zone_preview
|
||||
|
||||
zone_data = request.data.get("zone") or {}
|
||||
|
||||
try:
|
||||
document = Document.objects.get(pk=request.data.get("document"))
|
||||
except (Document.DoesNotExist, ValueError, TypeError):
|
||||
return Response(
|
||||
{"error": "Document not found"},
|
||||
status=status.HTTP_404_NOT_FOUND,
|
||||
)
|
||||
|
||||
doc_path = document.archive_path or document.source_path
|
||||
if not doc_path or not Path(doc_path).is_file():
|
||||
return Response(
|
||||
{"error": "Document file not found"},
|
||||
status=status.HTTP_404_NOT_FOUND,
|
||||
)
|
||||
|
||||
try:
|
||||
zone = OcrTemplateZone(
|
||||
name=zone_data.get("name") or "test",
|
||||
x=int(zone_data.get("x", 0)),
|
||||
y=int(zone_data.get("y", 0)),
|
||||
width=int(zone_data.get("width", 0)),
|
||||
height=int(zone_data.get("height", 0)),
|
||||
page=zone_data.get("page"),
|
||||
ocr_language=zone_data.get("ocr_language") or "eng",
|
||||
transform=zone_data.get("transform") or "strip",
|
||||
date_format=zone_data.get("date_format") or "",
|
||||
validation_regex=zone_data.get("validation_regex") or "",
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
return Response(
|
||||
{"error": "Invalid zone definition"},
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
if zone.width < 2 or zone.height < 2:
|
||||
return Response(
|
||||
{"error": "Zone is too small to test"},
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
result = extract_zone_preview(
|
||||
Path(doc_path),
|
||||
zone,
|
||||
int(zone_data.get("zone_source_width") or 0),
|
||||
int(zone_data.get("zone_source_height") or 0),
|
||||
document.page_count,
|
||||
)
|
||||
|
||||
regex_match = None
|
||||
if zone.validation_regex and result.get("value") is not None:
|
||||
try:
|
||||
regex_match = (
|
||||
re.fullmatch(zone.validation_regex, result["value"]) is not None
|
||||
)
|
||||
except re.error:
|
||||
regex_match = None
|
||||
|
||||
return Response(
|
||||
{
|
||||
"raw_text": result.get("raw_text"),
|
||||
"value": result.get("value"),
|
||||
"regex": zone.validation_regex,
|
||||
"regex_match": regex_match,
|
||||
},
|
||||
)
|
||||
|
||||
@action(detail=False, methods=["post"], url_path="quick-create-field")
|
||||
def quick_create_field(self, request):
|
||||
"""Create a custom field inline from the template editor.
|
||||
|
||||
Accepts: {"name": "Invoice Number", "data_type": "string"}
|
||||
Returns the created field so the frontend can immediately use it.
|
||||
"""
|
||||
name = request.data.get("name", "").strip()
|
||||
data_type = request.data.get("data_type", "").strip()
|
||||
|
||||
if not name:
|
||||
return Response(
|
||||
{"error": "Field name is required"},
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
if data_type not in OCR_SUPPORTED_FIELD_TYPES:
|
||||
return Response(
|
||||
{
|
||||
"error": f"Unsupported data type '{data_type}'. "
|
||||
f"Supported: {', '.join(sorted(OCR_SUPPORTED_FIELD_TYPES))}",
|
||||
},
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
# Check if field already exists
|
||||
existing = CustomField.objects.filter(name=name).first()
|
||||
if existing:
|
||||
return Response(
|
||||
{
|
||||
"id": existing.pk,
|
||||
"name": existing.name,
|
||||
"data_type": existing.data_type,
|
||||
"created": False,
|
||||
},
|
||||
)
|
||||
|
||||
# Check user has permission to create custom fields
|
||||
if not request.user.has_perm("documents.add_customfield"):
|
||||
return Response(
|
||||
{"error": "You don't have permission to create custom fields"},
|
||||
status=status.HTTP_403_FORBIDDEN,
|
||||
)
|
||||
|
||||
field = CustomField.objects.create(name=name, data_type=data_type)
|
||||
return Response(
|
||||
{
|
||||
"id": field.pk,
|
||||
"name": field.name,
|
||||
"data_type": field.data_type,
|
||||
"created": True,
|
||||
},
|
||||
status=status.HTTP_201_CREATED,
|
||||
)
|
||||
|
||||
@@ -0,0 +1,757 @@
|
||||
"""
|
||||
Zone-based OCR extraction engine.
|
||||
|
||||
After a document is consumed, this module checks if the document's type has
|
||||
an active OCR template. If so, it renders the relevant pages as images,
|
||||
crops each zone, runs Tesseract OCR on the crop, applies transforms,
|
||||
and writes the results to the mapped custom fields.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import string
|
||||
import subprocess
|
||||
import tempfile
|
||||
from datetime import date
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from PIL import Image
|
||||
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import OcrTemplate
|
||||
from documents.models import OcrTemplateZone
|
||||
|
||||
logger = logging.getLogger("paperless.zone_ocr")
|
||||
|
||||
|
||||
def run_zone_extraction(
|
||||
document: Document,
|
||||
original_file: Path | None,
|
||||
) -> None:
|
||||
"""
|
||||
Run zone-based OCR extraction for a document if its type has an active template.
|
||||
Called from the document_consumption_finished signal handler.
|
||||
"""
|
||||
if not document.document_type_id:
|
||||
return
|
||||
|
||||
templates = OcrTemplate.objects.filter(
|
||||
document_type_id=document.document_type_id,
|
||||
enabled=True,
|
||||
).prefetch_related("zones", "zones__custom_field")
|
||||
|
||||
if not templates.exists():
|
||||
return
|
||||
|
||||
# Resolve the document file: prefer archive (PDF/A), then source, then signal arg
|
||||
doc_path = _resolve_doc_path(document, original_file)
|
||||
if doc_path is None:
|
||||
logger.warning(
|
||||
"Zone OCR: no accessible file for document %d",
|
||||
document.pk,
|
||||
)
|
||||
return
|
||||
|
||||
for template in templates:
|
||||
zones = list(template.zones.all())
|
||||
if not zones:
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"Zone OCR: processing template '%s' for document %d (%d zones)",
|
||||
template.name,
|
||||
document.pk,
|
||||
len(zones),
|
||||
)
|
||||
|
||||
try:
|
||||
_process_template(document, doc_path, template, zones)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Zone OCR: error processing template '%s' for document %d",
|
||||
template.name,
|
||||
document.pk,
|
||||
)
|
||||
|
||||
|
||||
def _resolve_doc_path(
|
||||
document: Document,
|
||||
original_file: Path | None,
|
||||
) -> Path | None:
|
||||
"""Find an accessible file for the document."""
|
||||
candidates = []
|
||||
if document.has_archive_version:
|
||||
candidates.append(document.archive_path)
|
||||
candidates.append(document.source_path)
|
||||
if original_file is not None:
|
||||
candidates.append(original_file)
|
||||
|
||||
for path in candidates:
|
||||
if path is not None and Path(path).is_file():
|
||||
return Path(path)
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_page_idx(page_value, page_count) -> int:
|
||||
"""Resolve a 1-indexed page (1 = first, -1 = last) to a 0-indexed image
|
||||
index. A blank page_value defaults to the first page."""
|
||||
if page_value is None:
|
||||
return 0
|
||||
if page_value == -1:
|
||||
return (page_count - 1) if page_count else 0
|
||||
if page_value >= 1:
|
||||
return page_value - 1
|
||||
return 0
|
||||
|
||||
|
||||
def _process_template(
|
||||
document: Document,
|
||||
doc_path: Path,
|
||||
template: OcrTemplate,
|
||||
zones: list[OcrTemplateZone],
|
||||
) -> None:
|
||||
"""Process all zones in a template against a document.
|
||||
|
||||
Each zone is OCR'd independently, then zones are grouped by their target
|
||||
field and each field is written exactly once. When several zones share a
|
||||
field, their values are combined via the template's per-field format string
|
||||
(or joined in order if none is set) — this avoids the zones overwriting each
|
||||
other's value.
|
||||
"""
|
||||
pages_needed: set[int] = {
|
||||
_resolve_page_idx(zone.page, document.page_count) for zone in zones
|
||||
}
|
||||
|
||||
with tempfile.TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir:
|
||||
tmp_path = Path(tmp_dir)
|
||||
|
||||
page_images = _render_pages(
|
||||
doc_path,
|
||||
pages_needed,
|
||||
tmp_path,
|
||||
document.page_count,
|
||||
)
|
||||
|
||||
# Pass 1: OCR every zone into a value (or None if it failed/was rejected).
|
||||
zone_values: dict[int, str | None] = {}
|
||||
for zone in zones:
|
||||
page_idx = _resolve_page_idx(zone.page, document.page_count)
|
||||
|
||||
if page_idx not in page_images:
|
||||
logger.warning(
|
||||
"Zone OCR: page %d not available for zone '%s'",
|
||||
page_idx,
|
||||
zone.name,
|
||||
)
|
||||
continue
|
||||
|
||||
src_w = zone.zone_source_width or template.source_width
|
||||
src_h = zone.zone_source_height or template.source_height
|
||||
|
||||
extracted = _extract_zone(
|
||||
page_images[page_idx],
|
||||
zone,
|
||||
src_w,
|
||||
src_h,
|
||||
tmp_path,
|
||||
)
|
||||
|
||||
if (
|
||||
extracted is not None
|
||||
and zone.validation_regex
|
||||
and not re.fullmatch(zone.validation_regex, extracted)
|
||||
):
|
||||
logger.info(
|
||||
"Zone OCR: '%s' value %r rejected by regex '%s'",
|
||||
zone.name,
|
||||
extracted[:100],
|
||||
zone.validation_regex,
|
||||
)
|
||||
extracted = None
|
||||
|
||||
zone_values[id(zone)] = extracted
|
||||
|
||||
# Pass 2: group zones by target field and write each field once.
|
||||
grouped: dict[str, list[OcrTemplateZone]] = {}
|
||||
for zone in zones:
|
||||
grouped.setdefault(_field_key(zone), []).append(zone)
|
||||
|
||||
combine_formats = template.combine_formats or {}
|
||||
for key, field_zones in grouped.items():
|
||||
value = _combine_field_value(
|
||||
combine_formats.get(key, ""),
|
||||
field_zones,
|
||||
zone_values,
|
||||
)
|
||||
if not value:
|
||||
continue
|
||||
|
||||
target_zone = field_zones[0]
|
||||
_write_zone_value(document, target_zone, value)
|
||||
logger.info(
|
||||
"Zone OCR: %s = %r (from %d zone(s))",
|
||||
_zone_target_label(target_zone),
|
||||
value[:100] if len(value) > 100 else value,
|
||||
len(field_zones),
|
||||
)
|
||||
|
||||
|
||||
def _field_key(zone: OcrTemplateZone) -> str:
|
||||
"""Identify a zone's target field. Custom fields key by id, built-in targets
|
||||
by their name. Matches the key used in OcrTemplate.combine_formats and on the
|
||||
frontend field select."""
|
||||
target = getattr(zone, "target", None) or "custom_field"
|
||||
if target == "custom_field" and zone.custom_field_id:
|
||||
return str(zone.custom_field_id)
|
||||
return target
|
||||
|
||||
|
||||
def _combine_field_value(
|
||||
fmt: str,
|
||||
field_zones: list[OcrTemplateZone],
|
||||
zone_values: dict[int, str | None],
|
||||
) -> str:
|
||||
"""Combine the OCR values of all zones targeting one field.
|
||||
|
||||
With a format string, `{Zone Name}` tokens are replaced by that zone's value
|
||||
and literal text is kept; separators left dangling by an empty token are
|
||||
cleaned up. Without a format, the zone values are joined in order by a space.
|
||||
"""
|
||||
values = {z.name: (zone_values.get(id(z)) or "") for z in field_zones}
|
||||
|
||||
if not fmt:
|
||||
parts = [zone_values.get(id(z)) or "" for z in field_zones]
|
||||
return " ".join(p for p in parts if p).strip()
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
return values.get(match.group(1).strip(), "")
|
||||
|
||||
combined = re.sub(r"\{([^{}]+)\}", _replace, fmt)
|
||||
# Tidy up separators an empty token may have left behind.
|
||||
combined = re.sub(r"\s{2,}", " ", combined)
|
||||
combined = re.sub(r"([^\w\s])\s*\1+", r"\1", combined)
|
||||
return combined.strip().strip("-/.,;:| \t")
|
||||
|
||||
|
||||
def _render_pages(
|
||||
doc_path: Path,
|
||||
pages: set[int],
|
||||
tmp_dir: Path,
|
||||
page_count: int | None,
|
||||
) -> dict[int, Path]:
|
||||
"""Render specific PDF pages as PNG images using pdftoppm (poppler-utils)."""
|
||||
result: dict[int, Path] = {}
|
||||
mime = _detect_mime(doc_path)
|
||||
|
||||
if mime and mime.startswith("image/"):
|
||||
# Single-image document — use it directly as page 0.
|
||||
result[0] = doc_path
|
||||
return result
|
||||
|
||||
# Callers pass already-resolved 0-indexed page numbers (see _resolve_page_idx).
|
||||
for actual_page in pages:
|
||||
if actual_page < 0:
|
||||
logger.warning("Zone OCR: invalid page index %d", actual_page)
|
||||
continue
|
||||
|
||||
output_prefix = tmp_dir / f"page_{actual_page}"
|
||||
try:
|
||||
subprocess.run(
|
||||
[
|
||||
"pdftoppm",
|
||||
"-png",
|
||||
"-r",
|
||||
"300",
|
||||
"-f",
|
||||
str(actual_page + 1), # pdftoppm is 1-indexed
|
||||
"-l",
|
||||
str(actual_page + 1),
|
||||
str(doc_path),
|
||||
str(output_prefix),
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
timeout=60,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error("Zone OCR: pdftoppm timed out for page %d", actual_page)
|
||||
continue
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(
|
||||
"Zone OCR: pdftoppm failed for page %d: %s",
|
||||
actual_page,
|
||||
e.stderr.decode(errors="replace") if e.stderr else str(e),
|
||||
)
|
||||
continue
|
||||
except FileNotFoundError:
|
||||
logger.error("Zone OCR: pdftoppm not found — is poppler-utils installed?")
|
||||
return result # No point trying other pages
|
||||
|
||||
# pdftoppm names output as prefix-NNNN.png
|
||||
rendered = sorted(tmp_dir.glob(f"page_{actual_page}-*.png"))
|
||||
if rendered:
|
||||
result[actual_page] = rendered[0]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _crop_zone(
|
||||
page_img: Path,
|
||||
zone: OcrTemplateZone,
|
||||
source_width: int,
|
||||
source_height: int,
|
||||
tmp_dir: Path,
|
||||
) -> Image.Image | None:
|
||||
"""Crop a zone from the page image and return the PIL Image."""
|
||||
try:
|
||||
with Image.open(page_img) as img:
|
||||
img_width, img_height = img.size
|
||||
|
||||
scale_x = img_width / source_width
|
||||
scale_y = img_height / source_height
|
||||
|
||||
crop_left = int(zone.x * scale_x)
|
||||
crop_top = int(zone.y * scale_y)
|
||||
crop_right = int((zone.x + zone.width) * scale_x)
|
||||
crop_bottom = int((zone.y + zone.height) * scale_y)
|
||||
|
||||
# Clamp to the image so an oversized zone can't crop out of bounds.
|
||||
crop_left = max(0, min(crop_left, img_width))
|
||||
crop_top = max(0, min(crop_top, img_height))
|
||||
crop_right = max(crop_left + 1, min(crop_right, img_width))
|
||||
crop_bottom = max(crop_top + 1, min(crop_bottom, img_height))
|
||||
|
||||
if crop_right - crop_left < 2 or crop_bottom - crop_top < 2:
|
||||
logger.warning("Zone OCR: crop too small for zone '%s'", zone.name)
|
||||
return None
|
||||
|
||||
return img.crop((crop_left, crop_top, crop_right, crop_bottom)).copy()
|
||||
except Exception:
|
||||
logger.exception("Zone OCR: crop failed for zone '%s'", zone.name)
|
||||
return None
|
||||
|
||||
|
||||
def _read_barcode(cropped: Image.Image, zone_name: str) -> str | None:
|
||||
"""Read QR/barcode from a cropped image using zxingcpp."""
|
||||
try:
|
||||
import zxingcpp
|
||||
|
||||
results = zxingcpp.read_barcodes(cropped)
|
||||
if results:
|
||||
text = results[0].text
|
||||
logger.debug(
|
||||
"Zone OCR: barcode found in zone '%s': %s",
|
||||
zone_name,
|
||||
text[:100],
|
||||
)
|
||||
return text
|
||||
logger.debug("Zone OCR: no barcode found in zone '%s'", zone_name)
|
||||
return None
|
||||
except ImportError:
|
||||
logger.error("Zone OCR: zxingcpp not available — install zxing-cpp")
|
||||
return None
|
||||
except Exception:
|
||||
logger.exception("Zone OCR: barcode read failed for zone '%s'", zone_name)
|
||||
return None
|
||||
|
||||
|
||||
def _ocr_text(cropped: Image.Image, zone: OcrTemplateZone, tmp_dir: Path) -> str | None:
|
||||
"""OCR a cropped image with Tesseract."""
|
||||
crop_path = tmp_dir / f"zone_{zone.pk}.png"
|
||||
cropped.save(crop_path)
|
||||
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[
|
||||
"tesseract",
|
||||
str(crop_path),
|
||||
"stdout",
|
||||
"-l",
|
||||
zone.ocr_language,
|
||||
"--psm",
|
||||
"6", # Assume uniform block of text
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
check=True,
|
||||
)
|
||||
return proc.stdout.strip() or None
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error("Zone OCR: Tesseract timed out for zone '%s'", zone.name)
|
||||
return None
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(
|
||||
"Zone OCR: Tesseract failed for zone '%s': %s",
|
||||
zone.name,
|
||||
e.stderr[:200] if e.stderr else str(e),
|
||||
)
|
||||
return None
|
||||
except FileNotFoundError:
|
||||
logger.error("Zone OCR: Tesseract not found — is tesseract-ocr installed?")
|
||||
return None
|
||||
|
||||
|
||||
def _extract_zone(
|
||||
page_img: Path,
|
||||
zone: OcrTemplateZone,
|
||||
source_width: int,
|
||||
source_height: int,
|
||||
tmp_dir: Path,
|
||||
) -> str | None:
|
||||
"""Crop a zone from the page image and extract text via OCR or barcode reader."""
|
||||
cropped = _crop_zone(page_img, zone, source_width, source_height, tmp_dir)
|
||||
if cropped is None:
|
||||
return None
|
||||
|
||||
# QR/barcode zones skip Tesseract entirely
|
||||
if zone.transform == "qr_code":
|
||||
text = _read_barcode(cropped, zone.name)
|
||||
if not text:
|
||||
return None
|
||||
return _apply_transform(
|
||||
text,
|
||||
zone.transform,
|
||||
getattr(zone, "date_format", "") or "",
|
||||
)
|
||||
|
||||
text = _ocr_text(cropped, zone, tmp_dir)
|
||||
if not text:
|
||||
return None
|
||||
|
||||
return _apply_transform(
|
||||
text,
|
||||
zone.transform,
|
||||
getattr(zone, "date_format", "") or "",
|
||||
)
|
||||
|
||||
|
||||
def extract_zone_preview(
|
||||
doc_path: Path,
|
||||
zone: OcrTemplateZone,
|
||||
source_width: int,
|
||||
source_height: int,
|
||||
page_count: int | None,
|
||||
) -> dict:
|
||||
"""Non-destructive single-zone extraction for the editor's per-zone test.
|
||||
|
||||
Renders the zone's page, crops it, runs OCR (or the barcode reader) and
|
||||
applies the transform — WITHOUT writing any custom field. Returns the raw
|
||||
OCR text and the transformed value so the user can see what the zone yields
|
||||
(and tune the validation regex) before saving.
|
||||
"""
|
||||
# zone.page is 1-indexed (1 = first, -1 = last); resolve to a 0-indexed
|
||||
# image index exactly like the production extraction path does.
|
||||
page_idx = _resolve_page_idx(zone.page, page_count)
|
||||
with tempfile.TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir:
|
||||
tmp_path = Path(tmp_dir)
|
||||
page_images = _render_pages(doc_path, {page_idx}, tmp_path, page_count)
|
||||
if page_idx not in page_images:
|
||||
return {"raw_text": None, "value": None}
|
||||
|
||||
if not source_width or not source_height:
|
||||
with Image.open(page_images[page_idx]) as im:
|
||||
source_width, source_height = im.size
|
||||
|
||||
cropped = _crop_zone(
|
||||
page_images[page_idx],
|
||||
zone,
|
||||
source_width,
|
||||
source_height,
|
||||
tmp_path,
|
||||
)
|
||||
if cropped is None:
|
||||
return {"raw_text": None, "value": None}
|
||||
|
||||
if zone.transform == "qr_code":
|
||||
raw_text = _read_barcode(cropped, zone.name)
|
||||
else:
|
||||
raw_text = _ocr_text(cropped, zone, tmp_path)
|
||||
|
||||
value = (
|
||||
_apply_transform(
|
||||
raw_text,
|
||||
zone.transform,
|
||||
getattr(zone, "date_format", "") or "",
|
||||
)
|
||||
if raw_text
|
||||
else None
|
||||
)
|
||||
return {"raw_text": raw_text, "value": value}
|
||||
|
||||
|
||||
def _parse_date(text: str, fmt: str) -> str:
|
||||
"""Parse a date from OCR text. With a Python strptime `fmt`, try that first;
|
||||
otherwise (or on failure) fall back to dateparser auto-detection. Returns an
|
||||
ISO date string, or the original text if nothing parses."""
|
||||
text = text.strip()
|
||||
if not text:
|
||||
return text
|
||||
if fmt:
|
||||
try:
|
||||
return datetime.strptime(text, fmt).date().isoformat()
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
import dateparser
|
||||
|
||||
parsed = dateparser.parse(
|
||||
text,
|
||||
settings={
|
||||
"PREFER_DAY_OF_MONTH": "first",
|
||||
"RETURN_AS_TIMEZONE_AWARE": False,
|
||||
},
|
||||
)
|
||||
if parsed:
|
||||
return parsed.date().isoformat()
|
||||
except Exception:
|
||||
logger.debug("Zone OCR: dateparser failed for %r", text[:50])
|
||||
return text
|
||||
|
||||
|
||||
def _apply_transform(text: str, transform: str, date_format: str = "") -> str:
|
||||
"""Apply post-processing transform to extracted text."""
|
||||
text = text.strip()
|
||||
if not text:
|
||||
return text
|
||||
|
||||
if transform in ("strip", "none"):
|
||||
return text
|
||||
elif transform == "date":
|
||||
return _parse_date(text, date_format)
|
||||
elif transform == "uppercase":
|
||||
return text.upper()
|
||||
elif transform == "lowercase":
|
||||
return text.lower()
|
||||
elif transform == "numeric":
|
||||
result = re.sub(r"[^\d.,\-]", "", text)
|
||||
return result if result else text
|
||||
elif transform == "strip_punctuation":
|
||||
return text.strip(string.punctuation + " \t\r\n")
|
||||
elif transform == "qr_code":
|
||||
# Barcode/QR content as read by _read_barcode.
|
||||
return text
|
||||
return text
|
||||
|
||||
|
||||
def _zone_target_label(zone: OcrTemplateZone) -> str:
|
||||
"""Human label of a zone's write target (for logging)."""
|
||||
target = getattr(zone, "target", None) or "custom_field"
|
||||
if target == "custom_field":
|
||||
return zone.custom_field.name if zone.custom_field_id else "(no field)"
|
||||
return {"title": "Title", "asn": "ASN", "created": "Created"}.get(target, target)
|
||||
|
||||
|
||||
def _parse_created_datetime(value: str):
|
||||
"""Parse an extracted value into a tz-aware datetime for document.created.
|
||||
|
||||
Prefers an ISO date (the zone should use a date transform); falls back to
|
||||
dateparser. Returns None if no date can be parsed.
|
||||
"""
|
||||
from django.utils import timezone as djtz
|
||||
|
||||
m = re.search(r"(\d{4})-(\d{2})-(\d{2})", value)
|
||||
if m:
|
||||
try:
|
||||
dt = datetime(int(m[1]), int(m[2]), int(m[3]))
|
||||
return djtz.make_aware(dt) if djtz.is_naive(dt) else dt
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
import dateparser
|
||||
|
||||
parsed = dateparser.parse(
|
||||
value,
|
||||
settings={"RETURN_AS_TIMEZONE_AWARE": False},
|
||||
)
|
||||
if parsed:
|
||||
return djtz.make_aware(parsed) if djtz.is_naive(parsed) else parsed
|
||||
except Exception:
|
||||
logger.debug("Zone OCR: dateparser failed for created value %r", value[:50])
|
||||
return None
|
||||
|
||||
|
||||
def _write_zone_value(
|
||||
document: Document,
|
||||
zone: OcrTemplateZone,
|
||||
value: str,
|
||||
) -> None:
|
||||
"""Write an extracted value to the zone's target — a custom field, or a
|
||||
built-in document field (title / archive_serial_number / created)."""
|
||||
target = getattr(zone, "target", None) or "custom_field"
|
||||
|
||||
if target == "custom_field":
|
||||
if zone.custom_field_id:
|
||||
_write_custom_field(document, zone.custom_field, value)
|
||||
else:
|
||||
logger.debug("Zone OCR: zone '%s' has no custom field set", zone.name)
|
||||
return
|
||||
|
||||
if target == "title":
|
||||
document.title = value[:128]
|
||||
document.save(update_fields=["title"])
|
||||
elif target == "asn":
|
||||
digits = re.sub(r"[^\d]", "", value)
|
||||
if not digits:
|
||||
logger.debug(
|
||||
"Zone OCR: ASN zone '%s' produced no digits (%r)",
|
||||
zone.name,
|
||||
value[:50],
|
||||
)
|
||||
return
|
||||
document.archive_serial_number = int(digits)
|
||||
document.save(update_fields=["archive_serial_number"])
|
||||
elif target == "created":
|
||||
parsed = _parse_created_datetime(value)
|
||||
if parsed is None:
|
||||
logger.debug(
|
||||
"Zone OCR: created zone '%s' could not parse a date (%r)",
|
||||
zone.name,
|
||||
value[:50],
|
||||
)
|
||||
return
|
||||
document.created = parsed
|
||||
document.save(update_fields=["created"])
|
||||
|
||||
|
||||
def _write_custom_field(
|
||||
document: Document,
|
||||
custom_field: CustomField,
|
||||
value: str,
|
||||
) -> None:
|
||||
"""Write an extracted value to a document's custom field."""
|
||||
typed_value = _convert_value(value, custom_field.data_type)
|
||||
if typed_value is None:
|
||||
logger.debug(
|
||||
"Zone OCR: skipping custom field '%s' — value conversion returned None",
|
||||
custom_field.name,
|
||||
)
|
||||
return
|
||||
|
||||
value_field_name = CustomFieldInstance.get_value_field_name(custom_field.data_type)
|
||||
|
||||
CustomFieldInstance.objects.update_or_create(
|
||||
document=document,
|
||||
field=custom_field,
|
||||
defaults={value_field_name: typed_value},
|
||||
)
|
||||
|
||||
|
||||
def _convert_value(value: str, data_type: str) -> object | None:
|
||||
"""Convert an extracted OCR string to the appropriate type for the custom field."""
|
||||
if not value:
|
||||
return None
|
||||
|
||||
try:
|
||||
if data_type in (
|
||||
CustomField.FieldDataType.STRING,
|
||||
CustomField.FieldDataType.URL,
|
||||
):
|
||||
return value[:128]
|
||||
|
||||
elif data_type == CustomField.FieldDataType.LONG_TEXT:
|
||||
return value
|
||||
|
||||
elif data_type == CustomField.FieldDataType.INT:
|
||||
digits = re.sub(r"[^\d\-]", "", value)
|
||||
# Handle edge case: only dashes or empty
|
||||
digits = digits.lstrip("-") or ""
|
||||
if not digits:
|
||||
return None
|
||||
# Restore leading minus if original had one
|
||||
if value.strip().startswith("-"):
|
||||
digits = "-" + digits
|
||||
return int(digits)
|
||||
|
||||
elif data_type == CustomField.FieldDataType.FLOAT:
|
||||
# Handle European format: 1.234,56 → 1234.56
|
||||
cleaned = re.sub(r"[^\d.,\-]", "", value)
|
||||
if not cleaned or cleaned in (".", ",", "-"):
|
||||
return None
|
||||
# If both . and , present, the last one is the decimal separator
|
||||
if "," in cleaned and "." in cleaned:
|
||||
if cleaned.rindex(",") > cleaned.rindex("."):
|
||||
# European: 1.234,56
|
||||
cleaned = cleaned.replace(".", "").replace(",", ".")
|
||||
else:
|
||||
# US: 1,234.56
|
||||
cleaned = cleaned.replace(",", "")
|
||||
elif "," in cleaned:
|
||||
# Only comma — treat as decimal separator
|
||||
cleaned = cleaned.replace(",", ".")
|
||||
return float(cleaned)
|
||||
|
||||
elif data_type == CustomField.FieldDataType.DATE:
|
||||
match = re.search(r"(\d{4})-(\d{2})-(\d{2})", value)
|
||||
if match:
|
||||
y, m, d = match.groups()
|
||||
# Validate the date
|
||||
date(int(y), int(m), int(d))
|
||||
return f"{y}-{m}-{d}"
|
||||
return None
|
||||
|
||||
elif data_type == CustomField.FieldDataType.MONETARY:
|
||||
cleaned = re.sub(r"[^\d.,\-]", "", value)
|
||||
if not cleaned or cleaned in (".", ",", "-"):
|
||||
return None
|
||||
if "," in cleaned and "." in cleaned:
|
||||
if cleaned.rindex(",") > cleaned.rindex("."):
|
||||
cleaned = cleaned.replace(".", "").replace(",", ".")
|
||||
else:
|
||||
cleaned = cleaned.replace(",", "")
|
||||
elif "," in cleaned:
|
||||
cleaned = cleaned.replace(",", ".")
|
||||
# Validate it parses as a number
|
||||
float(cleaned)
|
||||
return cleaned
|
||||
|
||||
elif data_type == CustomField.FieldDataType.BOOL:
|
||||
lower = value.lower().strip()
|
||||
if lower in ("true", "yes", "1", "ja", "oui", "si", "x"):
|
||||
return True
|
||||
elif lower in ("false", "no", "0", "nein", "non"):
|
||||
return False
|
||||
return None
|
||||
|
||||
else:
|
||||
# Unsupported types (DOCUMENTLINK, SELECT) — can't OCR into these
|
||||
logger.debug(
|
||||
"Zone OCR: unsupported custom field type %s for OCR extraction",
|
||||
data_type,
|
||||
)
|
||||
return None
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning("Zone OCR: could not convert %r to %s: %s", value, data_type, e)
|
||||
return None
|
||||
|
||||
|
||||
def _detect_mime(path: Path) -> str | None:
|
||||
"""Detect MIME type of a file."""
|
||||
try:
|
||||
import magic
|
||||
|
||||
return magic.from_file(str(path), mime=True)
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception:
|
||||
logger.debug("Zone OCR: magic failed for %s, falling back to extension", path)
|
||||
|
||||
suffix = path.suffix.lower()
|
||||
return {
|
||||
".pdf": "application/pdf",
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".tiff": "image/tiff",
|
||||
".tif": "image/tiff",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
".gif": "image/gif",
|
||||
}.get(suffix)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2026-06-23 14:33+0000\n"
|
||||
"POT-Creation-Date: 2026-06-03 22:14+0000\n"
|
||||
"PO-Revision-Date: 2022-02-17 04:17\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: English\n"
|
||||
@@ -21,39 +21,39 @@ msgstr ""
|
||||
msgid "Documents"
|
||||
msgstr ""
|
||||
|
||||
#: documents/filters.py:464
|
||||
#: documents/filters.py:463
|
||||
msgid "Value must be valid JSON."
|
||||
msgstr ""
|
||||
|
||||
#: documents/filters.py:483
|
||||
#: documents/filters.py:482
|
||||
msgid "Invalid custom field query expression"
|
||||
msgstr ""
|
||||
|
||||
#: documents/filters.py:493
|
||||
#: documents/filters.py:492
|
||||
msgid "Invalid expression list. Must be nonempty."
|
||||
msgstr ""
|
||||
|
||||
#: documents/filters.py:514
|
||||
#: documents/filters.py:513
|
||||
msgid "Invalid logical operator {op!r}"
|
||||
msgstr ""
|
||||
|
||||
#: documents/filters.py:528
|
||||
#: documents/filters.py:527
|
||||
msgid "Maximum number of query conditions exceeded."
|
||||
msgstr ""
|
||||
|
||||
#: documents/filters.py:592
|
||||
#: documents/filters.py:591
|
||||
msgid "{name!r} is not a valid custom field."
|
||||
msgstr ""
|
||||
|
||||
#: documents/filters.py:629
|
||||
#: documents/filters.py:628
|
||||
msgid "{data_type} does not support query expr {expr!r}."
|
||||
msgstr ""
|
||||
|
||||
#: documents/filters.py:744 documents/models.py:136
|
||||
#: documents/filters.py:743 documents/models.py:136
|
||||
msgid "Maximum nesting depth exceeded."
|
||||
msgstr ""
|
||||
|
||||
#: documents/filters.py:1052
|
||||
#: documents/filters.py:990
|
||||
msgid "Custom field not found"
|
||||
msgstr ""
|
||||
|
||||
@@ -1351,49 +1351,49 @@ msgstr ""
|
||||
msgid "workflow runs"
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:503 documents/serialisers.py:855
|
||||
#: documents/serialisers.py:2744 documents/views.py:297 documents/views.py:2482
|
||||
#: documents/serialisers.py:463 documents/serialisers.py:815
|
||||
#: documents/serialisers.py:2681 documents/views.py:295 documents/views.py:2468
|
||||
#: paperless_mail/serialisers.py:155
|
||||
msgid "Insufficient permissions."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:691
|
||||
#: documents/serialisers.py:651
|
||||
msgid "Invalid color."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2216
|
||||
#: documents/serialisers.py:2175
|
||||
#, python-format
|
||||
msgid "File type %(type)s not supported"
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2260
|
||||
#: documents/serialisers.py:2219
|
||||
#, python-format
|
||||
msgid "Custom field id must be an integer: %(id)s"
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2267
|
||||
#: documents/serialisers.py:2226
|
||||
#, python-format
|
||||
msgid "Custom field with id %(id)s does not exist"
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2284 documents/serialisers.py:2294
|
||||
#: documents/serialisers.py:2243 documents/serialisers.py:2253
|
||||
msgid ""
|
||||
"Custom fields must be a list of integers or an object mapping ids to values."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2289
|
||||
#: documents/serialisers.py:2248
|
||||
msgid "Some custom fields don't exist or were specified twice."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2436
|
||||
#: documents/serialisers.py:2395
|
||||
msgid "Invalid variable detected."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2800
|
||||
#: documents/serialisers.py:2737
|
||||
msgid "Duplicate document identifiers are not allowed."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2830 documents/views.py:4429
|
||||
#: documents/serialisers.py:2767 documents/views.py:4345
|
||||
#, python-format
|
||||
msgid "Documents not found: %(ids)s"
|
||||
msgstr ""
|
||||
@@ -1661,36 +1661,32 @@ msgstr ""
|
||||
msgid "Unable to parse URI {value}"
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:290 documents/views.py:2479
|
||||
#: documents/views.py:288 documents/views.py:2465
|
||||
msgid "Invalid more_like_id"
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:1513
|
||||
#: documents/views.py:1511
|
||||
msgid "Invalid AI configuration."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:1522
|
||||
msgid "AI backend request timed out."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:2304 documents/views.py:2625
|
||||
#: documents/views.py:2290 documents/views.py:2606
|
||||
msgid "Specify only one of text, title_search, query, or more_like_id."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:4441
|
||||
#: documents/views.py:4357
|
||||
#, python-format
|
||||
msgid "Insufficient permissions to share document %(id)s."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:4487
|
||||
#: documents/views.py:4403
|
||||
msgid "Bundle is already being processed."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:4547
|
||||
#: documents/views.py:4463
|
||||
msgid "The share link bundle is still being prepared. Please try again later."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:4557
|
||||
#: documents/views.py:4473
|
||||
msgid "The share link bundle is unavailable."
|
||||
msgstr ""
|
||||
|
||||
@@ -1939,158 +1935,154 @@ msgid "Sets the LLM output language"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/models.py:370
|
||||
msgid "Sets the LLM timeout in seconds"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/models.py:376
|
||||
msgid "paperless application settings"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:545
|
||||
#: paperless/settings/__init__.py:539
|
||||
msgid "English (US)"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:546
|
||||
#: paperless/settings/__init__.py:540
|
||||
msgid "Arabic"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:547
|
||||
#: paperless/settings/__init__.py:541
|
||||
msgid "Afrikaans"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:548
|
||||
#: paperless/settings/__init__.py:542
|
||||
msgid "Belarusian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:549
|
||||
#: paperless/settings/__init__.py:543
|
||||
msgid "Bulgarian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:550
|
||||
#: paperless/settings/__init__.py:544
|
||||
msgid "Catalan"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:551
|
||||
#: paperless/settings/__init__.py:545
|
||||
msgid "Czech"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:552
|
||||
#: paperless/settings/__init__.py:546
|
||||
msgid "Danish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:553
|
||||
#: paperless/settings/__init__.py:547
|
||||
msgid "German"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:554
|
||||
#: paperless/settings/__init__.py:548
|
||||
msgid "Greek"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:555
|
||||
#: paperless/settings/__init__.py:549
|
||||
msgid "English (GB)"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:556
|
||||
#: paperless/settings/__init__.py:550
|
||||
msgid "Spanish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:557
|
||||
#: paperless/settings/__init__.py:551
|
||||
msgid "Persian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:558
|
||||
#: paperless/settings/__init__.py:552
|
||||
msgid "Finnish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:559
|
||||
#: paperless/settings/__init__.py:553
|
||||
msgid "French"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:560
|
||||
#: paperless/settings/__init__.py:554
|
||||
msgid "Hungarian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:561
|
||||
#: paperless/settings/__init__.py:555
|
||||
msgid "Indonesian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:562
|
||||
#: paperless/settings/__init__.py:556
|
||||
msgid "Italian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:563
|
||||
#: paperless/settings/__init__.py:557
|
||||
msgid "Japanese"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:564
|
||||
#: paperless/settings/__init__.py:558
|
||||
msgid "Korean"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:565
|
||||
#: paperless/settings/__init__.py:559
|
||||
msgid "Luxembourgish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:566
|
||||
#: paperless/settings/__init__.py:560
|
||||
msgid "Norwegian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:567
|
||||
#: paperless/settings/__init__.py:561
|
||||
msgid "Dutch"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:568
|
||||
#: paperless/settings/__init__.py:562
|
||||
msgid "Polish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:569
|
||||
#: paperless/settings/__init__.py:563
|
||||
msgid "Portuguese (Brazil)"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:570
|
||||
#: paperless/settings/__init__.py:564
|
||||
msgid "Portuguese"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:571
|
||||
#: paperless/settings/__init__.py:565
|
||||
msgid "Romanian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:572
|
||||
#: paperless/settings/__init__.py:566
|
||||
msgid "Russian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:573
|
||||
#: paperless/settings/__init__.py:567
|
||||
msgid "Slovak"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:574
|
||||
#: paperless/settings/__init__.py:568
|
||||
msgid "Slovenian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:575
|
||||
#: paperless/settings/__init__.py:569
|
||||
msgid "Serbian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:576
|
||||
#: paperless/settings/__init__.py:570
|
||||
msgid "Swedish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:577
|
||||
#: paperless/settings/__init__.py:571
|
||||
msgid "Turkish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:578
|
||||
#: paperless/settings/__init__.py:572
|
||||
msgid "Ukrainian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:579
|
||||
#: paperless/settings/__init__.py:573
|
||||
msgid "Vietnamese"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:580
|
||||
#: paperless/settings/__init__.py:574
|
||||
msgid "Chinese Simplified"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings/__init__.py:581
|
||||
#: paperless/settings/__init__.py:575
|
||||
msgid "Chinese Traditional"
|
||||
msgstr ""
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user