mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-03 06:38:51 +00:00
Compare commits
30 Commits
fix-search
...
feature-un
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
84b5ad2e02 | ||
|
|
ed0a425e68 | ||
|
|
51880b04f8 | ||
|
|
f8b686a29e | ||
|
|
e578090c30 | ||
|
|
25b00b3377 | ||
|
|
cf160de8ba | ||
|
|
d9d3b4f81b | ||
|
|
2184fb460d | ||
|
|
97c804709a | ||
|
|
6df75a03bd | ||
|
|
8f90aa368b | ||
|
|
76f1d2e21a | ||
|
|
2544b4f46b | ||
|
|
2171b735cf | ||
|
|
fcb13e8d63 | ||
|
|
cb9d788721 | ||
|
|
8aebe38a14 | ||
|
|
e2c7388e2c | ||
|
|
deb61bca57 | ||
|
|
650a5327ef | ||
|
|
a3da9fd8e0 | ||
|
|
13e97ffa0f | ||
|
|
f34ddfea65 | ||
|
|
a0639f4830 | ||
|
|
55f5404afb | ||
|
|
eead65e155 | ||
|
|
7dfba1f38f | ||
|
|
24efaeb5a6 | ||
|
|
0a76c71925 |
10
docs/api.md
10
docs/api.md
@@ -62,10 +62,14 @@ The REST api provides five different forms of authentication.
|
||||
|
||||
## Searching for documents
|
||||
|
||||
Full text searching is available on the `/api/documents/` endpoint. Two
|
||||
specific query parameters cause the API to return full text search
|
||||
Full text searching is available on the `/api/documents/` endpoint. The
|
||||
following query parameters cause the API to return Tantivy-backed search
|
||||
results:
|
||||
|
||||
- `/api/documents/?text=your%20search%20query`: Search title and content
|
||||
using simple substring-style search.
|
||||
- `/api/documents/?title_search=your%20search%20query`: Search title only
|
||||
using simple substring-style search.
|
||||
- `/api/documents/?query=your%20search%20query`: Search for a document
|
||||
using a full text query. For details on the syntax, see [Basic Usage - Searching](usage.md#basic-usage_searching).
|
||||
- `/api/documents/?more_like_id=1234`: Search for documents similar to
|
||||
@@ -439,3 +443,5 @@ Initial API version.
|
||||
- The `all` parameter of list endpoints is now deprecated and will be removed in a future version.
|
||||
- The bulk edit objects endpoint now supports `all` and `filters` parameters to avoid having to send
|
||||
large lists of object IDs for operations affecting many objects.
|
||||
- The legacy `title_content` document search parameter is deprecated and will be removed in a future version.
|
||||
Clients should use `text` for simple title-and-content search and `title_search` for title-only search.
|
||||
|
||||
@@ -49,11 +49,11 @@ test('text filtering', async ({ page }) => {
|
||||
await page.getByRole('main').getByRole('combobox').click()
|
||||
await page.getByRole('main').getByRole('combobox').fill('test')
|
||||
await expect(page.locator('pngx-document-list')).toHaveText(/32 documents/)
|
||||
await expect(page).toHaveURL(/title_content=test/)
|
||||
await expect(page).toHaveURL(/text=test/)
|
||||
await page.getByRole('button', { name: 'Title & content' }).click()
|
||||
await page.getByRole('button', { name: 'Title', exact: true }).click()
|
||||
await expect(page.locator('pngx-document-list')).toHaveText(/9 documents/)
|
||||
await expect(page).toHaveURL(/title__icontains=test/)
|
||||
await expect(page).toHaveURL(/title_search=test/)
|
||||
await page.getByRole('button', { name: 'Title', exact: true }).click()
|
||||
await page.getByRole('button', { name: 'Advanced search' }).click()
|
||||
await expect(page).toHaveURL(/query=test/)
|
||||
|
||||
@@ -3545,7 +3545,7 @@
|
||||
"time": 1.091,
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title_content=test",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&text=test",
|
||||
"httpVersion": "HTTP/1.1",
|
||||
"cookies": [],
|
||||
"headers": [
|
||||
@@ -3579,7 +3579,7 @@
|
||||
"value": "true"
|
||||
},
|
||||
{
|
||||
"name": "title_content",
|
||||
"name": "text",
|
||||
"value": "test"
|
||||
}
|
||||
],
|
||||
@@ -4303,7 +4303,7 @@
|
||||
"time": 0.603,
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title__icontains=test",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title_search=test",
|
||||
"httpVersion": "HTTP/1.1",
|
||||
"cookies": [],
|
||||
"headers": [
|
||||
@@ -4337,7 +4337,7 @@
|
||||
"value": "true"
|
||||
},
|
||||
{
|
||||
"name": "title__icontains",
|
||||
"name": "title_search",
|
||||
"value": "test"
|
||||
}
|
||||
],
|
||||
|
||||
@@ -24,7 +24,7 @@ import {
|
||||
FILTER_HAS_DOCUMENT_TYPE_ANY,
|
||||
FILTER_HAS_STORAGE_PATH_ANY,
|
||||
FILTER_HAS_TAGS_ALL,
|
||||
FILTER_TITLE_CONTENT,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
} from 'src/app/data/filter-rule-type'
|
||||
import { GlobalSearchType, SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
|
||||
@@ -545,7 +545,7 @@ describe('GlobalSearchComponent', () => {
|
||||
component.query = 'test'
|
||||
component.runFullSearch()
|
||||
expect(qfSpy).toHaveBeenCalledWith([
|
||||
{ rule_type: FILTER_TITLE_CONTENT, value: 'test' },
|
||||
{ rule_type: FILTER_SIMPLE_TEXT, value: 'test' },
|
||||
])
|
||||
|
||||
settingsService.set(
|
||||
|
||||
@@ -25,7 +25,7 @@ import {
|
||||
FILTER_HAS_DOCUMENT_TYPE_ANY,
|
||||
FILTER_HAS_STORAGE_PATH_ANY,
|
||||
FILTER_HAS_TAGS_ALL,
|
||||
FILTER_TITLE_CONTENT,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
} from 'src/app/data/filter-rule-type'
|
||||
import { ObjectWithId } from 'src/app/data/object-with-id'
|
||||
import { GlobalSearchType, SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||
@@ -410,7 +410,7 @@ export class GlobalSearchComponent implements OnInit {
|
||||
public runFullSearch() {
|
||||
const ruleType = this.useAdvancedForFullSearch
|
||||
? FILTER_FULLTEXT_QUERY
|
||||
: FILTER_TITLE_CONTENT
|
||||
: FILTER_SIMPLE_TEXT
|
||||
this.documentService.searchQuery = this.useAdvancedForFullSearch
|
||||
? this.query
|
||||
: ''
|
||||
|
||||
@@ -4,7 +4,7 @@ import { ComponentFixture, TestBed } from '@angular/core/testing'
|
||||
import { By } from '@angular/platform-browser'
|
||||
import { NgbAccordionButton, NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
|
||||
import { of, throwError } from 'rxjs'
|
||||
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
||||
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
|
||||
import { SettingsService } from 'src/app/services/settings.service'
|
||||
@@ -105,7 +105,7 @@ describe('StoragePathEditDialogComponent', () => {
|
||||
null,
|
||||
'created',
|
||||
true,
|
||||
[{ rule_type: FILTER_TITLE, value: 'bar' }],
|
||||
[{ rule_type: FILTER_SIMPLE_TITLE, value: 'bar' }],
|
||||
{ truncate_content: true }
|
||||
)
|
||||
listSpy.mockReturnValueOnce(
|
||||
|
||||
@@ -23,7 +23,7 @@ import {
|
||||
} from 'rxjs'
|
||||
import { EditDialogComponent } from 'src/app/components/common/edit-dialog/edit-dialog.component'
|
||||
import { Document } from 'src/app/data/document'
|
||||
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { DEFAULT_MATCHING_ALGORITHM } from 'src/app/data/matching-model'
|
||||
import { StoragePath } from 'src/app/data/storage-path'
|
||||
import { IfOwnerDirective } from 'src/app/directives/if-owner.directive'
|
||||
@@ -146,7 +146,7 @@ export class StoragePathEditDialogComponent
|
||||
null,
|
||||
'created',
|
||||
true,
|
||||
[{ rule_type: FILTER_TITLE, value: title }],
|
||||
[{ rule_type: FILTER_SIMPLE_TITLE, value: title }],
|
||||
{ truncate_content: true }
|
||||
)
|
||||
.pipe(
|
||||
|
||||
@@ -3,7 +3,7 @@ import { provideHttpClientTesting } from '@angular/common/http/testing'
|
||||
import { ComponentFixture, TestBed } from '@angular/core/testing'
|
||||
import { NG_VALUE_ACCESSOR } from '@angular/forms'
|
||||
import { of, throwError } from 'rxjs'
|
||||
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
||||
import { DocumentLinkComponent } from './document-link.component'
|
||||
|
||||
@@ -99,7 +99,7 @@ describe('DocumentLinkComponent', () => {
|
||||
null,
|
||||
'created',
|
||||
true,
|
||||
[{ rule_type: FILTER_TITLE, value: 'bar' }],
|
||||
[{ rule_type: FILTER_SIMPLE_TITLE, value: 'bar' }],
|
||||
{ truncate_content: true }
|
||||
)
|
||||
listSpy.mockReturnValueOnce(throwError(() => new Error()))
|
||||
|
||||
@@ -28,7 +28,7 @@ import {
|
||||
tap,
|
||||
} from 'rxjs'
|
||||
import { Document } from 'src/app/data/document'
|
||||
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
|
||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
||||
import { AbstractInputComponent } from '../abstract-input'
|
||||
@@ -121,7 +121,7 @@ export class DocumentLinkComponent
|
||||
null,
|
||||
'created',
|
||||
true,
|
||||
[{ rule_type: FILTER_TITLE, value: title }],
|
||||
[{ rule_type: FILTER_SIMPLE_TITLE, value: title }],
|
||||
{ truncate_content: true }
|
||||
)
|
||||
.pipe(
|
||||
|
||||
@@ -428,7 +428,7 @@ describe('BulkEditorComponent', () => {
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
all: true,
|
||||
filters: { title__icontains: 'apple' },
|
||||
filters: { title_search: 'apple' },
|
||||
method: 'modify_tags',
|
||||
parameters: { add_tags: [101], remove_tags: [] },
|
||||
})
|
||||
|
||||
@@ -67,6 +67,8 @@ import {
|
||||
FILTER_OWNER_DOES_NOT_INCLUDE,
|
||||
FILTER_OWNER_ISNULL,
|
||||
FILTER_SHARED_BY_USER,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
FILTER_SIMPLE_TITLE,
|
||||
FILTER_STORAGE_PATH,
|
||||
FILTER_TITLE,
|
||||
FILTER_TITLE_CONTENT,
|
||||
@@ -312,7 +314,7 @@ describe('FilterEditorComponent', () => {
|
||||
expect(component.textFilter).toEqual(null)
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
rule_type: FILTER_SIMPLE_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
@@ -320,6 +322,18 @@ describe('FilterEditorComponent', () => {
|
||||
expect(component.textFilterTarget).toEqual('title-content') // TEXT_FILTER_TARGET_TITLE_CONTENT
|
||||
}))
|
||||
|
||||
it('should ingest legacy text filter rules for doc title + content', fakeAsync(() => {
|
||||
expect(component.textFilter).toEqual(null)
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
value: 'legacy foo',
|
||||
},
|
||||
]
|
||||
expect(component.textFilter).toEqual('legacy foo')
|
||||
expect(component.textFilterTarget).toEqual('title-content') // TEXT_FILTER_TARGET_TITLE_CONTENT
|
||||
}))
|
||||
|
||||
it('should ingest text filter rules for doc asn', fakeAsync(() => {
|
||||
expect(component.textFilter).toEqual(null)
|
||||
component.filterRules = [
|
||||
@@ -1117,7 +1131,7 @@ describe('FilterEditorComponent', () => {
|
||||
expect(component.textFilter).toEqual('foo')
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
rule_type: FILTER_SIMPLE_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
])
|
||||
@@ -1136,7 +1150,7 @@ describe('FilterEditorComponent', () => {
|
||||
expect(component.textFilterTarget).toEqual('title')
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_TITLE,
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: 'foo',
|
||||
},
|
||||
])
|
||||
@@ -1250,30 +1264,12 @@ describe('FilterEditorComponent', () => {
|
||||
])
|
||||
}))
|
||||
|
||||
it('should convert user input to correct filter rules on custom fields query', fakeAsync(() => {
|
||||
component.textFilterInput.nativeElement.value = 'foo'
|
||||
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
|
||||
const textFieldTargetDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(NgbDropdownItem)
|
||||
)[3]
|
||||
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_CUSTOM_FIELDS
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
expect(component.textFilterTarget).toEqual('custom-fields')
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_CUSTOM_FIELDS_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
])
|
||||
}))
|
||||
|
||||
it('should convert user input to correct filter rules on mime type', fakeAsync(() => {
|
||||
component.textFilterInput.nativeElement.value = 'pdf'
|
||||
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
|
||||
const textFieldTargetDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(NgbDropdownItem)
|
||||
)[4]
|
||||
)[3]
|
||||
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_MIME_TYPE
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
@@ -1291,8 +1287,8 @@ describe('FilterEditorComponent', () => {
|
||||
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
|
||||
const textFieldTargetDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(NgbDropdownItem)
|
||||
)[5]
|
||||
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_ASN
|
||||
)[4]
|
||||
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_FULLTEXT_QUERY
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
expect(component.textFilterTarget).toEqual('fulltext-query')
|
||||
@@ -1696,12 +1692,56 @@ describe('FilterEditorComponent', () => {
|
||||
])
|
||||
}))
|
||||
|
||||
it('should convert legacy title filters into full text query when adding a created relative date', fakeAsync(() => {
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_TITLE,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
const dateCreatedDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(DatesDropdownComponent)
|
||||
)[0]
|
||||
component.dateCreatedRelativeDate = RelativeDate.WITHIN_1_WEEK
|
||||
dateCreatedDropdown.triggerEventHandler('datesSet')
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_FULLTEXT_QUERY,
|
||||
value: 'foo,created:[-1 week to now]',
|
||||
},
|
||||
])
|
||||
}))
|
||||
|
||||
it('should convert simple title filters into full text query when adding a created relative date', fakeAsync(() => {
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
const dateCreatedDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(DatesDropdownComponent)
|
||||
)[0]
|
||||
component.dateCreatedRelativeDate = RelativeDate.WITHIN_1_WEEK
|
||||
dateCreatedDropdown.triggerEventHandler('datesSet')
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_FULLTEXT_QUERY,
|
||||
value: 'foo,created:[-1 week to now]',
|
||||
},
|
||||
])
|
||||
}))
|
||||
|
||||
it('should leave relative dates not in quick list intact', fakeAsync(() => {
|
||||
component.textFilterInput.nativeElement.value = 'created:[-2 week to now]'
|
||||
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
|
||||
const textFieldTargetDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(NgbDropdownItem)
|
||||
)[5]
|
||||
)[4]
|
||||
textFieldTargetDropdown.triggerEventHandler('click')
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
@@ -2031,12 +2071,30 @@ describe('FilterEditorComponent', () => {
|
||||
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_TITLE,
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
expect(component.generateFilterName()).toEqual('Title: foo')
|
||||
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
value: 'legacy foo',
|
||||
},
|
||||
]
|
||||
expect(component.generateFilterName()).toEqual(
|
||||
'Title & content: legacy foo'
|
||||
)
|
||||
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_SIMPLE_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
expect(component.generateFilterName()).toEqual('Title & content: foo')
|
||||
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_ASN,
|
||||
@@ -2156,6 +2214,36 @@ describe('FilterEditorComponent', () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('should hide deprecated custom fields target from default text filter targets', () => {
|
||||
expect(component.textFilterTargets).not.toContainEqual({
|
||||
id: 'custom-fields',
|
||||
name: $localize`Custom fields (Deprecated)`,
|
||||
})
|
||||
})
|
||||
|
||||
it('should keep deprecated custom fields target available for legacy filters', fakeAsync(() => {
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_CUSTOM_FIELDS_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
fixture.detectChanges()
|
||||
tick()
|
||||
|
||||
expect(component.textFilterTarget).toEqual('custom-fields')
|
||||
expect(component.textFilterTargets).toContainEqual({
|
||||
id: 'custom-fields',
|
||||
name: $localize`Custom fields (Deprecated)`,
|
||||
})
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_CUSTOM_FIELDS_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
])
|
||||
}))
|
||||
|
||||
it('should call autocomplete endpoint on input', fakeAsync(() => {
|
||||
component.textFilterTarget = 'fulltext-query' // TEXT_FILTER_TARGET_FULLTEXT_QUERY
|
||||
const autocompleteSpy = jest.spyOn(searchService, 'autocomplete')
|
||||
|
||||
@@ -71,6 +71,8 @@ import {
|
||||
FILTER_OWNER_DOES_NOT_INCLUDE,
|
||||
FILTER_OWNER_ISNULL,
|
||||
FILTER_SHARED_BY_USER,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
FILTER_SIMPLE_TITLE,
|
||||
FILTER_STORAGE_PATH,
|
||||
FILTER_TITLE,
|
||||
FILTER_TITLE_CONTENT,
|
||||
@@ -195,10 +197,6 @@ const DEFAULT_TEXT_FILTER_TARGET_OPTIONS = [
|
||||
name: $localize`Title & content`,
|
||||
},
|
||||
{ id: TEXT_FILTER_TARGET_ASN, name: $localize`ASN` },
|
||||
{
|
||||
id: TEXT_FILTER_TARGET_CUSTOM_FIELDS,
|
||||
name: $localize`Custom fields`,
|
||||
},
|
||||
{ id: TEXT_FILTER_TARGET_MIME_TYPE, name: $localize`File type` },
|
||||
{
|
||||
id: TEXT_FILTER_TARGET_FULLTEXT_QUERY,
|
||||
@@ -206,6 +204,12 @@ const DEFAULT_TEXT_FILTER_TARGET_OPTIONS = [
|
||||
},
|
||||
]
|
||||
|
||||
const DEPRECATED_CUSTOM_FIELDS_TEXT_FILTER_TARGET_OPTION = {
|
||||
// Kept only so legacy saved views can render and be edited away from, remove me eventually
|
||||
id: TEXT_FILTER_TARGET_CUSTOM_FIELDS,
|
||||
name: $localize`Custom fields (Deprecated)`,
|
||||
}
|
||||
|
||||
const TEXT_FILTER_TARGET_MORELIKE_OPTION = {
|
||||
id: TEXT_FILTER_TARGET_FULLTEXT_MORELIKE,
|
||||
name: $localize`More like`,
|
||||
@@ -318,8 +322,13 @@ export class FilterEditorComponent
|
||||
return $localize`Custom fields query`
|
||||
|
||||
case FILTER_TITLE:
|
||||
case FILTER_SIMPLE_TITLE:
|
||||
return $localize`Title: ${rule.value}`
|
||||
|
||||
case FILTER_TITLE_CONTENT:
|
||||
case FILTER_SIMPLE_TEXT:
|
||||
return $localize`Title & content: ${rule.value}`
|
||||
|
||||
case FILTER_ASN:
|
||||
return $localize`ASN: ${rule.value}`
|
||||
|
||||
@@ -353,12 +362,16 @@ export class FilterEditorComponent
|
||||
_moreLikeDoc: Document
|
||||
|
||||
get textFilterTargets() {
|
||||
let targets = DEFAULT_TEXT_FILTER_TARGET_OPTIONS
|
||||
if (this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_MORELIKE) {
|
||||
return DEFAULT_TEXT_FILTER_TARGET_OPTIONS.concat([
|
||||
TEXT_FILTER_TARGET_MORELIKE_OPTION,
|
||||
targets = targets.concat([TEXT_FILTER_TARGET_MORELIKE_OPTION])
|
||||
}
|
||||
if (this.textFilterTarget == TEXT_FILTER_TARGET_CUSTOM_FIELDS) {
|
||||
targets = targets.concat([
|
||||
DEPRECATED_CUSTOM_FIELDS_TEXT_FILTER_TARGET_OPTION,
|
||||
])
|
||||
}
|
||||
return DEFAULT_TEXT_FILTER_TARGET_OPTIONS
|
||||
return targets
|
||||
}
|
||||
|
||||
textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT
|
||||
@@ -437,10 +450,12 @@ export class FilterEditorComponent
|
||||
value.forEach((rule) => {
|
||||
switch (rule.rule_type) {
|
||||
case FILTER_TITLE:
|
||||
case FILTER_SIMPLE_TITLE:
|
||||
this._textFilter = rule.value
|
||||
this.textFilterTarget = TEXT_FILTER_TARGET_TITLE
|
||||
break
|
||||
case FILTER_TITLE_CONTENT:
|
||||
case FILTER_SIMPLE_TEXT:
|
||||
this._textFilter = rule.value
|
||||
this.textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT
|
||||
break
|
||||
@@ -762,12 +777,15 @@ export class FilterEditorComponent
|
||||
this.textFilterTarget == TEXT_FILTER_TARGET_TITLE_CONTENT
|
||||
) {
|
||||
filterRules.push({
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
rule_type: FILTER_SIMPLE_TEXT,
|
||||
value: this._textFilter.trim(),
|
||||
})
|
||||
}
|
||||
if (this._textFilter && this.textFilterTarget == TEXT_FILTER_TARGET_TITLE) {
|
||||
filterRules.push({ rule_type: FILTER_TITLE, value: this._textFilter })
|
||||
filterRules.push({
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: this._textFilter,
|
||||
})
|
||||
}
|
||||
if (this.textFilterTarget == TEXT_FILTER_TARGET_ASN) {
|
||||
if (
|
||||
@@ -1009,7 +1027,10 @@ export class FilterEditorComponent
|
||||
) {
|
||||
existingRule = filterRules.find(
|
||||
(fr) =>
|
||||
fr.rule_type == FILTER_TITLE_CONTENT || fr.rule_type == FILTER_TITLE
|
||||
fr.rule_type == FILTER_TITLE_CONTENT ||
|
||||
fr.rule_type == FILTER_SIMPLE_TEXT ||
|
||||
fr.rule_type == FILTER_TITLE ||
|
||||
fr.rule_type == FILTER_SIMPLE_TITLE
|
||||
)
|
||||
existingRule.rule_type = FILTER_FULLTEXT_QUERY
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ import { DataType } from './datatype'
|
||||
export const NEGATIVE_NULL_FILTER_VALUE = -1
|
||||
|
||||
// These correspond to src/documents/models.py and changes here require a DB migration (and vice versa)
|
||||
export const FILTER_TITLE = 0
|
||||
export const FILTER_TITLE = 0 // Deprecated in favor of Tantivy-backed `title_search`. Keep for now for existing saved views
|
||||
export const FILTER_CONTENT = 1
|
||||
|
||||
export const FILTER_ASN = 2
|
||||
@@ -46,7 +46,9 @@ export const FILTER_ADDED_FROM = 46
|
||||
export const FILTER_MODIFIED_BEFORE = 15
|
||||
export const FILTER_MODIFIED_AFTER = 16
|
||||
|
||||
export const FILTER_TITLE_CONTENT = 19
|
||||
export const FILTER_TITLE_CONTENT = 19 // Deprecated in favor of Tantivy-backed `text` filtervar. Keep for now for existing saved views
|
||||
export const FILTER_SIMPLE_TITLE = 48
|
||||
export const FILTER_SIMPLE_TEXT = 49
|
||||
export const FILTER_FULLTEXT_QUERY = 20
|
||||
export const FILTER_FULLTEXT_MORELIKE = 21
|
||||
|
||||
@@ -56,7 +58,7 @@ export const FILTER_OWNER_ISNULL = 34
|
||||
export const FILTER_OWNER_DOES_NOT_INCLUDE = 35
|
||||
export const FILTER_SHARED_BY_USER = 37
|
||||
|
||||
export const FILTER_CUSTOM_FIELDS_TEXT = 36
|
||||
export const FILTER_CUSTOM_FIELDS_TEXT = 36 // Deprecated. UI no longer includes CF text-search mode. Keep for now for existing saved views
|
||||
export const FILTER_HAS_CUSTOM_FIELDS_ALL = 38
|
||||
export const FILTER_HAS_CUSTOM_FIELDS_ANY = 39
|
||||
export const FILTER_DOES_NOT_HAVE_CUSTOM_FIELDS = 40
|
||||
@@ -66,6 +68,9 @@ export const FILTER_CUSTOM_FIELDS_QUERY = 42
|
||||
|
||||
export const FILTER_MIME_TYPE = 47
|
||||
|
||||
export const SIMPLE_TEXT_PARAMETER = 'text'
|
||||
export const SIMPLE_TITLE_PARAMETER = 'title_search'
|
||||
|
||||
export const FILTER_RULE_TYPES: FilterRuleType[] = [
|
||||
{
|
||||
id: FILTER_TITLE,
|
||||
@@ -74,6 +79,13 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [
|
||||
multi: false,
|
||||
default: '',
|
||||
},
|
||||
{
|
||||
id: FILTER_SIMPLE_TITLE,
|
||||
filtervar: SIMPLE_TITLE_PARAMETER,
|
||||
datatype: 'string',
|
||||
multi: false,
|
||||
default: '',
|
||||
},
|
||||
{
|
||||
id: FILTER_CONTENT,
|
||||
filtervar: 'content__icontains',
|
||||
@@ -279,6 +291,12 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [
|
||||
datatype: 'string',
|
||||
multi: false,
|
||||
},
|
||||
{
|
||||
id: FILTER_SIMPLE_TEXT,
|
||||
filtervar: SIMPLE_TEXT_PARAMETER,
|
||||
datatype: 'string',
|
||||
multi: false,
|
||||
},
|
||||
{
|
||||
id: FILTER_FULLTEXT_QUERY,
|
||||
filtervar: 'query',
|
||||
|
||||
@@ -10,7 +10,7 @@ import {
|
||||
DOCUMENT_SORT_FIELDS,
|
||||
DOCUMENT_SORT_FIELDS_FULLTEXT,
|
||||
} from 'src/app/data/document'
|
||||
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||
import { environment } from 'src/environments/environment'
|
||||
import { PermissionsService } from '../permissions.service'
|
||||
@@ -138,13 +138,13 @@ describe(`DocumentService`, () => {
|
||||
subscription = service
|
||||
.listAllFilteredIds([
|
||||
{
|
||||
rule_type: FILTER_TITLE,
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: 'apple',
|
||||
},
|
||||
])
|
||||
.subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/?page=1&page_size=100000&fields=id&title__icontains=apple`
|
||||
`${environment.apiBaseUrl}${endpoint}/?page=1&page_size=100000&fields=id&title_search=apple`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
})
|
||||
|
||||
@@ -8,6 +8,10 @@ import {
|
||||
FILTER_HAS_CUSTOM_FIELDS_ALL,
|
||||
FILTER_HAS_CUSTOM_FIELDS_ANY,
|
||||
FILTER_HAS_TAGS_ALL,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
FILTER_SIMPLE_TITLE,
|
||||
FILTER_TITLE,
|
||||
FILTER_TITLE_CONTENT,
|
||||
NEGATIVE_NULL_FILTER_VALUE,
|
||||
} from '../data/filter-rule-type'
|
||||
import {
|
||||
@@ -128,6 +132,26 @@ describe('QueryParams Utils', () => {
|
||||
is_tagged: 0,
|
||||
})
|
||||
|
||||
params = queryParamsFromFilterRules([
|
||||
{
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
value: 'bank statement',
|
||||
},
|
||||
])
|
||||
expect(params).toEqual({
|
||||
text: 'bank statement',
|
||||
})
|
||||
|
||||
params = queryParamsFromFilterRules([
|
||||
{
|
||||
rule_type: FILTER_TITLE,
|
||||
value: 'invoice',
|
||||
},
|
||||
])
|
||||
expect(params).toEqual({
|
||||
title_search: 'invoice',
|
||||
})
|
||||
|
||||
params = queryParamsFromFilterRules([
|
||||
{
|
||||
rule_type: FILTER_HAS_TAGS_ALL,
|
||||
@@ -148,6 +172,30 @@ describe('QueryParams Utils', () => {
|
||||
|
||||
it('should convert filter rules to query params', () => {
|
||||
let rules = filterRulesFromQueryParams(
|
||||
convertToParamMap({
|
||||
text: 'bank statement',
|
||||
})
|
||||
)
|
||||
expect(rules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_SIMPLE_TEXT,
|
||||
value: 'bank statement',
|
||||
},
|
||||
])
|
||||
|
||||
rules = filterRulesFromQueryParams(
|
||||
convertToParamMap({
|
||||
title_search: 'invoice',
|
||||
})
|
||||
)
|
||||
expect(rules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: 'invoice',
|
||||
},
|
||||
])
|
||||
|
||||
rules = filterRulesFromQueryParams(
|
||||
convertToParamMap({
|
||||
tags__id__all,
|
||||
})
|
||||
|
||||
@@ -9,8 +9,14 @@ import {
|
||||
FILTER_HAS_CUSTOM_FIELDS_ALL,
|
||||
FILTER_HAS_CUSTOM_FIELDS_ANY,
|
||||
FILTER_RULE_TYPES,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
FILTER_SIMPLE_TITLE,
|
||||
FILTER_TITLE,
|
||||
FILTER_TITLE_CONTENT,
|
||||
FilterRuleType,
|
||||
NEGATIVE_NULL_FILTER_VALUE,
|
||||
SIMPLE_TEXT_PARAMETER,
|
||||
SIMPLE_TITLE_PARAMETER,
|
||||
} from '../data/filter-rule-type'
|
||||
import { ListViewState } from '../services/document-list-view.service'
|
||||
|
||||
@@ -97,6 +103,8 @@ export function transformLegacyFilterRules(
|
||||
export function filterRulesFromQueryParams(
|
||||
queryParams: ParamMap
|
||||
): FilterRule[] {
|
||||
let filterRulesFromQueryParams: FilterRule[] = []
|
||||
|
||||
const allFilterRuleQueryParams: string[] = FILTER_RULE_TYPES.map(
|
||||
(rt) => rt.filtervar
|
||||
)
|
||||
@@ -104,7 +112,6 @@ export function filterRulesFromQueryParams(
|
||||
.filter((rt) => rt !== undefined)
|
||||
|
||||
// transform query params to filter rules
|
||||
let filterRulesFromQueryParams: FilterRule[] = []
|
||||
allFilterRuleQueryParams
|
||||
.filter((frqp) => queryParams.has(frqp))
|
||||
.forEach((filterQueryParamName) => {
|
||||
@@ -146,7 +153,17 @@ export function queryParamsFromFilterRules(filterRules: FilterRule[]): Params {
|
||||
let params = {}
|
||||
for (let rule of filterRules) {
|
||||
let ruleType = FILTER_RULE_TYPES.find((t) => t.id == rule.rule_type)
|
||||
if (ruleType.isnull_filtervar && rule.value == null) {
|
||||
if (
|
||||
rule.rule_type === FILTER_TITLE_CONTENT ||
|
||||
rule.rule_type === FILTER_SIMPLE_TEXT
|
||||
) {
|
||||
params[SIMPLE_TEXT_PARAMETER] = rule.value
|
||||
} else if (
|
||||
rule.rule_type === FILTER_TITLE ||
|
||||
rule.rule_type === FILTER_SIMPLE_TITLE
|
||||
) {
|
||||
params[SIMPLE_TITLE_PARAMETER] = rule.value
|
||||
} else if (ruleType.isnull_filtervar && rule.value == null) {
|
||||
params[ruleType.isnull_filtervar] = 1
|
||||
} else if (
|
||||
ruleType.isnull_filtervar &&
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import functools
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import operator
|
||||
from contextlib import contextmanager
|
||||
from typing import TYPE_CHECKING
|
||||
@@ -77,6 +78,8 @@ DATETIME_KWARGS = [
|
||||
CUSTOM_FIELD_QUERY_MAX_DEPTH = 10
|
||||
CUSTOM_FIELD_QUERY_MAX_ATOMS = 20
|
||||
|
||||
logger = logging.getLogger("paperless.api")
|
||||
|
||||
|
||||
class CorrespondentFilterSet(FilterSet):
|
||||
class Meta:
|
||||
@@ -162,9 +165,13 @@ class InboxFilter(Filter):
|
||||
|
||||
@extend_schema_field(serializers.CharField)
|
||||
class TitleContentFilter(Filter):
|
||||
# Deprecated but retained for existing saved views. UI uses Tantivy-backed `text` / `title_search` params.
|
||||
def filter(self, qs: Any, value: Any) -> Any:
|
||||
value = value.strip() if isinstance(value, str) else value
|
||||
if value:
|
||||
logger.warning(
|
||||
"Deprecated document filter parameter 'title_content' used; use `text` instead.",
|
||||
)
|
||||
try:
|
||||
return qs.filter(
|
||||
Q(title__icontains=value) | Q(effective_content__icontains=value),
|
||||
@@ -243,6 +250,9 @@ class CustomFieldsFilter(Filter):
|
||||
def filter(self, qs, value):
|
||||
value = value.strip() if isinstance(value, str) else value
|
||||
if value:
|
||||
logger.warning(
|
||||
"Deprecated document filter parameter 'custom_fields__icontains' used; use `custom_field_query` or advanced Tantivy field syntax instead.",
|
||||
)
|
||||
fields_with_matching_selects = CustomField.objects.filter(
|
||||
extra_data__icontains=value,
|
||||
)
|
||||
@@ -747,6 +757,7 @@ class DocumentFilterSet(FilterSet):
|
||||
|
||||
is_in_inbox = InboxFilter()
|
||||
|
||||
# Deprecated, but keep for now for existing saved views
|
||||
title_content = TitleContentFilter()
|
||||
|
||||
content__istartswith = EffectiveContentFilter(lookup_expr="istartswith")
|
||||
@@ -756,6 +767,7 @@ class DocumentFilterSet(FilterSet):
|
||||
|
||||
owner__id__none = ObjectFilter(field_name="owner", exclude=True)
|
||||
|
||||
# Deprecated, UI no longer includes CF text-search mode, but keep for now for existing saved views
|
||||
custom_fields__icontains = CustomFieldsFilter()
|
||||
|
||||
custom_fields__id__all = ObjectFilter(field_name="custom_fields__field")
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
# Generated by Django 5.2.12 on 2026-04-01 18:20
|
||||
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
OLD_TITLE_RULE = 0
|
||||
OLD_TITLE_CONTENT_RULE = 19
|
||||
NEW_SIMPLE_TITLE_RULE = 48
|
||||
NEW_SIMPLE_TEXT_RULE = 49
|
||||
|
||||
|
||||
# See documents/models.py SavedViewFilterRule
|
||||
def migrate_saved_view_rules_forward(apps, schema_editor):
|
||||
SavedViewFilterRule = apps.get_model("documents", "SavedViewFilterRule")
|
||||
SavedViewFilterRule.objects.filter(rule_type=OLD_TITLE_RULE).update(
|
||||
rule_type=NEW_SIMPLE_TITLE_RULE,
|
||||
)
|
||||
SavedViewFilterRule.objects.filter(rule_type=OLD_TITLE_CONTENT_RULE).update(
|
||||
rule_type=NEW_SIMPLE_TEXT_RULE,
|
||||
)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0017_migrate_fulltext_query_field_prefixes"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="savedviewfilterrule",
|
||||
name="rule_type",
|
||||
field=models.PositiveSmallIntegerField(
|
||||
choices=[
|
||||
(0, "title contains"),
|
||||
(1, "content contains"),
|
||||
(2, "ASN is"),
|
||||
(3, "correspondent is"),
|
||||
(4, "document type is"),
|
||||
(5, "is in inbox"),
|
||||
(6, "has tag"),
|
||||
(7, "has any tag"),
|
||||
(8, "created before"),
|
||||
(9, "created after"),
|
||||
(10, "created year is"),
|
||||
(11, "created month is"),
|
||||
(12, "created day is"),
|
||||
(13, "added before"),
|
||||
(14, "added after"),
|
||||
(15, "modified before"),
|
||||
(16, "modified after"),
|
||||
(17, "does not have tag"),
|
||||
(18, "does not have ASN"),
|
||||
(19, "title or content contains"),
|
||||
(20, "fulltext query"),
|
||||
(21, "more like this"),
|
||||
(22, "has tags in"),
|
||||
(23, "ASN greater than"),
|
||||
(24, "ASN less than"),
|
||||
(25, "storage path is"),
|
||||
(26, "has correspondent in"),
|
||||
(27, "does not have correspondent in"),
|
||||
(28, "has document type in"),
|
||||
(29, "does not have document type in"),
|
||||
(30, "has storage path in"),
|
||||
(31, "does not have storage path in"),
|
||||
(32, "owner is"),
|
||||
(33, "has owner in"),
|
||||
(34, "does not have owner"),
|
||||
(35, "does not have owner in"),
|
||||
(36, "has custom field value"),
|
||||
(37, "is shared by me"),
|
||||
(38, "has custom fields"),
|
||||
(39, "has custom field in"),
|
||||
(40, "does not have custom field in"),
|
||||
(41, "does not have custom field"),
|
||||
(42, "custom fields query"),
|
||||
(43, "created to"),
|
||||
(44, "created from"),
|
||||
(45, "added to"),
|
||||
(46, "added from"),
|
||||
(47, "mime type is"),
|
||||
(48, "simple title search"),
|
||||
(49, "simple text search"),
|
||||
],
|
||||
verbose_name="rule type",
|
||||
),
|
||||
),
|
||||
migrations.RunPython(
|
||||
migrate_saved_view_rules_forward,
|
||||
migrations.RunPython.noop,
|
||||
),
|
||||
]
|
||||
@@ -623,6 +623,8 @@ class SavedViewFilterRule(models.Model):
|
||||
(45, _("added to")),
|
||||
(46, _("added from")),
|
||||
(47, _("mime type is")),
|
||||
(48, _("simple title search")),
|
||||
(49, _("simple text search")),
|
||||
]
|
||||
|
||||
saved_view = models.ForeignKey(
|
||||
|
||||
@@ -2,11 +2,11 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import unicodedata
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Self
|
||||
from typing import TypedDict
|
||||
@@ -19,7 +19,10 @@ from django.conf import settings
|
||||
from django.utils.timezone import get_current_timezone
|
||||
from guardian.shortcuts import get_users_with_perms
|
||||
|
||||
from documents.search._normalize import ascii_fold
|
||||
from documents.search._query import build_permission_filter
|
||||
from documents.search._query import parse_simple_text_query
|
||||
from documents.search._query import parse_simple_title_query
|
||||
from documents.search._query import parse_user_query
|
||||
from documents.search._schema import _write_sentinels
|
||||
from documents.search._schema import build_schema
|
||||
@@ -45,14 +48,10 @@ _AUTOCOMPLETE_REGEX_TIMEOUT = 1.0 # seconds; guards against ReDoS on untrusted
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def _ascii_fold(s: str) -> str:
|
||||
"""
|
||||
Normalize unicode to ASCII equivalent characters for search consistency.
|
||||
|
||||
Converts accented characters (e.g., "café") to their ASCII base forms ("cafe")
|
||||
to enable cross-language searching without requiring exact diacritic matching.
|
||||
"""
|
||||
return unicodedata.normalize("NFD", s).encode("ascii", "ignore").decode()
|
||||
class SearchMode(StrEnum):
|
||||
QUERY = "query"
|
||||
TEXT = "text"
|
||||
TITLE = "title"
|
||||
|
||||
|
||||
def _extract_autocomplete_words(text_sources: list[str]) -> set[str]:
|
||||
@@ -74,7 +73,7 @@ def _extract_autocomplete_words(text_sources: list[str]) -> set[str]:
|
||||
)
|
||||
continue
|
||||
for token in tokens:
|
||||
normalized = _ascii_fold(token.lower())
|
||||
normalized = ascii_fold(token.lower())
|
||||
if normalized:
|
||||
words.add(normalized)
|
||||
return words
|
||||
@@ -294,8 +293,10 @@ class TantivyBackend:
|
||||
doc.add_text("checksum", document.checksum)
|
||||
doc.add_text("title", document.title)
|
||||
doc.add_text("title_sort", document.title)
|
||||
doc.add_text("simple_title", document.title)
|
||||
doc.add_text("content", content)
|
||||
doc.add_text("bigram_content", content)
|
||||
doc.add_text("simple_content", content)
|
||||
|
||||
# Original filename - only add if not None/empty
|
||||
if document.original_filename:
|
||||
@@ -433,6 +434,7 @@ class TantivyBackend:
|
||||
sort_field: str | None,
|
||||
*,
|
||||
sort_reverse: bool,
|
||||
search_mode: SearchMode = SearchMode.QUERY,
|
||||
) -> SearchResults:
|
||||
"""
|
||||
Execute a search query against the document index.
|
||||
@@ -441,20 +443,32 @@ class TantivyBackend:
|
||||
permission filtering before executing against Tantivy. Supports both
|
||||
relevance-based and field-based sorting.
|
||||
|
||||
QUERY search mode supports natural date keywords, field filters, etc.
|
||||
TITLE search mode treats the query as plain text to search for in title only
|
||||
TEXT search mode treats the query as plain text to search for in title and content
|
||||
|
||||
Args:
|
||||
query: User's search query (supports natural date keywords, field filters)
|
||||
query: User's search query
|
||||
user: User for permission filtering (None for superuser/no filtering)
|
||||
page: Page number (1-indexed) for pagination
|
||||
page_size: Number of results per page
|
||||
sort_field: Field to sort by (None for relevance ranking)
|
||||
sort_reverse: Whether to reverse the sort order
|
||||
search_mode: "query" for advanced Tantivy syntax, "text" for
|
||||
plain-text search over title and content only, "title" for
|
||||
plain-text search over title only
|
||||
|
||||
Returns:
|
||||
SearchResults with hits, total count, and processed query
|
||||
"""
|
||||
self._ensure_open()
|
||||
tz = get_current_timezone()
|
||||
user_query = parse_user_query(self._index, query, tz)
|
||||
if search_mode is SearchMode.TEXT:
|
||||
user_query = parse_simple_text_query(self._index, query)
|
||||
elif search_mode is SearchMode.TITLE:
|
||||
user_query = parse_simple_title_query(self._index, query)
|
||||
else:
|
||||
user_query = parse_user_query(self._index, query, tz)
|
||||
|
||||
# Apply permission filter if user is not None (not superuser)
|
||||
if user is not None:
|
||||
@@ -594,7 +608,7 @@ class TantivyBackend:
|
||||
List of word suggestions ordered by frequency, then alphabetically
|
||||
"""
|
||||
self._ensure_open()
|
||||
normalized_term = _ascii_fold(term.lower())
|
||||
normalized_term = ascii_fold(term.lower())
|
||||
|
||||
searcher = self._index.searcher()
|
||||
|
||||
|
||||
8
src/documents/search/_normalize.py
Normal file
8
src/documents/search/_normalize.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import unicodedata
|
||||
|
||||
|
||||
def ascii_fold(text: str) -> str:
|
||||
"""Normalize unicode text to ASCII equivalents for search consistency."""
|
||||
return unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode()
|
||||
@@ -12,6 +12,8 @@ import tantivy
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.conf import settings
|
||||
|
||||
from documents.search._normalize import ascii_fold
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from datetime import tzinfo
|
||||
|
||||
@@ -51,6 +53,7 @@ _WHOOSH_REL_RANGE_RE = regex.compile(
|
||||
)
|
||||
# Whoosh-style 8-digit date: field:YYYYMMDD — field-aware so timezone can be applied correctly
|
||||
_DATE8_RE = regex.compile(r"(?P<field>\w+):(?P<date8>\d{8})\b")
|
||||
_SIMPLE_QUERY_TOKEN_RE = regex.compile(r"\S+")
|
||||
|
||||
|
||||
def _fmt(dt: datetime) -> str:
|
||||
@@ -436,7 +439,37 @@ DEFAULT_SEARCH_FIELDS = [
|
||||
"document_type",
|
||||
"tag",
|
||||
]
|
||||
SIMPLE_SEARCH_FIELDS = ["simple_title", "simple_content"]
|
||||
TITLE_SEARCH_FIELDS = ["simple_title"]
|
||||
_FIELD_BOOSTS = {"title": 2.0}
|
||||
_SIMPLE_FIELD_BOOSTS = {"simple_title": 2.0}
|
||||
|
||||
|
||||
def _build_simple_field_query(
|
||||
index: tantivy.Index,
|
||||
field: str,
|
||||
tokens: list[str],
|
||||
) -> tantivy.Query:
|
||||
patterns = []
|
||||
for idx, token in enumerate(tokens):
|
||||
escaped = regex.escape(token)
|
||||
# For multi-token substring search, only the first token can begin mid-word.
|
||||
# Later tokens follow a whitespace boundary in the original query, so anchor
|
||||
# them to the start of the next indexed token to reduce false positives like
|
||||
# matching "Z-Berichte 16" for the query "Z-Berichte 6".
|
||||
if idx == 0:
|
||||
patterns.append(f".*{escaped}.*")
|
||||
else:
|
||||
patterns.append(f"{escaped}.*")
|
||||
if len(patterns) == 1:
|
||||
query = tantivy.Query.regex_query(index.schema, field, patterns[0])
|
||||
else:
|
||||
query = tantivy.Query.regex_phrase_query(index.schema, field, patterns)
|
||||
|
||||
boost = _SIMPLE_FIELD_BOOSTS.get(field, 1.0)
|
||||
if boost > 1.0:
|
||||
return tantivy.Query.boost_query(query, boost)
|
||||
return query
|
||||
|
||||
|
||||
def parse_user_query(
|
||||
@@ -495,3 +528,52 @@ def parse_user_query(
|
||||
)
|
||||
|
||||
return exact
|
||||
|
||||
|
||||
def parse_simple_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
fields: list[str],
|
||||
) -> tantivy.Query:
|
||||
"""
|
||||
Parse a plain-text query using Tantivy over a restricted field set.
|
||||
|
||||
Query string is escaped and normalized to be treated as "simple" text query.
|
||||
"""
|
||||
tokens = [
|
||||
ascii_fold(token.lower())
|
||||
for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
|
||||
]
|
||||
tokens = [token for token in tokens if token]
|
||||
if not tokens:
|
||||
return tantivy.Query.empty_query()
|
||||
|
||||
field_queries = [
|
||||
(tantivy.Occur.Should, _build_simple_field_query(index, field, tokens))
|
||||
for field in fields
|
||||
]
|
||||
if len(field_queries) == 1:
|
||||
return field_queries[0][1]
|
||||
return tantivy.Query.boolean_query(field_queries)
|
||||
|
||||
|
||||
def parse_simple_text_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
) -> tantivy.Query:
|
||||
"""
|
||||
Parse a plain-text query over title/content for simple search inputs.
|
||||
"""
|
||||
|
||||
return parse_simple_query(index, raw_query, SIMPLE_SEARCH_FIELDS)
|
||||
|
||||
|
||||
def parse_simple_title_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
) -> tantivy.Query:
|
||||
"""
|
||||
Parse a plain-text query over the title field only.
|
||||
"""
|
||||
|
||||
return parse_simple_query(index, raw_query, TITLE_SEARCH_FIELDS)
|
||||
|
||||
@@ -53,6 +53,18 @@ def build_schema() -> tantivy.Schema:
|
||||
# CJK support - not stored, indexed only
|
||||
sb.add_text_field("bigram_content", stored=False, tokenizer_name="bigram_analyzer")
|
||||
|
||||
# Simple substring search support for title/content - not stored, indexed only
|
||||
sb.add_text_field(
|
||||
"simple_title",
|
||||
stored=False,
|
||||
tokenizer_name="simple_search_analyzer",
|
||||
)
|
||||
sb.add_text_field(
|
||||
"simple_content",
|
||||
stored=False,
|
||||
tokenizer_name="simple_search_analyzer",
|
||||
)
|
||||
|
||||
# Autocomplete prefix scan - stored, not indexed
|
||||
sb.add_text_field("autocomplete_word", stored=True, tokenizer_name="raw")
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ def register_tokenizers(index: tantivy.Index, language: str | None) -> None:
|
||||
index.register_tokenizer("paperless_text", _paperless_text(language))
|
||||
index.register_tokenizer("simple_analyzer", _simple_analyzer())
|
||||
index.register_tokenizer("bigram_analyzer", _bigram_analyzer())
|
||||
index.register_tokenizer("simple_search_analyzer", _simple_search_analyzer())
|
||||
# Fast-field tokenizer required for fast=True text fields in the schema
|
||||
index.register_fast_field_tokenizer("simple_analyzer", _simple_analyzer())
|
||||
|
||||
@@ -114,3 +115,16 @@ def _bigram_analyzer() -> tantivy.TextAnalyzer:
|
||||
.filter(tantivy.Filter.lowercase())
|
||||
.build()
|
||||
)
|
||||
|
||||
|
||||
def _simple_search_analyzer() -> tantivy.TextAnalyzer:
|
||||
"""Tokenizer for simple substring search fields: non-whitespace chunks -> remove_long(65) -> lowercase -> ascii_fold."""
|
||||
return (
|
||||
tantivy.TextAnalyzerBuilder(
|
||||
tantivy.Tokenizer.regex(r"\S+"),
|
||||
)
|
||||
.filter(tantivy.Filter.remove_long(65))
|
||||
.filter(tantivy.Filter.lowercase())
|
||||
.filter(tantivy.Filter.ascii_fold())
|
||||
.build()
|
||||
)
|
||||
|
||||
@@ -5,6 +5,7 @@ from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import Note
|
||||
from documents.search._backend import SearchMode
|
||||
from documents.search._backend import TantivyBackend
|
||||
from documents.search._backend import get_backend
|
||||
from documents.search._backend import reset_backend
|
||||
@@ -46,6 +47,258 @@ class TestWriteBatch:
|
||||
class TestSearch:
|
||||
"""Test search functionality."""
|
||||
|
||||
def test_text_mode_limits_default_search_to_title_and_content(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Simple text mode must not match metadata-only fields."""
|
||||
doc = Document.objects.create(
|
||||
title="Invoice document",
|
||||
content="monthly statement",
|
||||
checksum="TXT1",
|
||||
pk=9,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
metadata_only = backend.search(
|
||||
"document_type:invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert metadata_only.total == 0
|
||||
|
||||
content_match = backend.search(
|
||||
"monthly",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert content_match.total == 1
|
||||
|
||||
def test_title_mode_limits_default_search_to_title_only(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Title mode must not match content-only terms."""
|
||||
doc = Document.objects.create(
|
||||
title="Invoice document",
|
||||
content="monthly statement",
|
||||
checksum="TXT2",
|
||||
pk=10,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
content_only = backend.search(
|
||||
"monthly",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert content_only.total == 0
|
||||
|
||||
title_match = backend.search(
|
||||
"invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert title_match.total == 1
|
||||
|
||||
def test_text_mode_matches_partial_term_substrings(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Simple text mode should support substring matching within tokens."""
|
||||
doc = Document.objects.create(
|
||||
title="Account access",
|
||||
content="password reset instructions",
|
||||
checksum="TXT3",
|
||||
pk=11,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
prefix_match = backend.search(
|
||||
"pass",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert prefix_match.total == 1
|
||||
|
||||
infix_match = backend.search(
|
||||
"sswo",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert infix_match.total == 1
|
||||
|
||||
phrase_match = backend.search(
|
||||
"sswo re",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert phrase_match.total == 1
|
||||
|
||||
def test_text_mode_does_not_match_on_partial_term_overlap(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Simple text mode should not match documents that merely share partial fragments."""
|
||||
doc = Document.objects.create(
|
||||
title="Adobe Acrobat PDF Files",
|
||||
content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
|
||||
checksum="TXT7",
|
||||
pk=13,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
non_match = backend.search(
|
||||
"raptor",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert non_match.total == 0
|
||||
|
||||
def test_text_mode_anchors_later_query_tokens_to_token_starts(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Multi-token simple search should not match later tokens in the middle of a word."""
|
||||
exact_doc = Document.objects.create(
|
||||
title="Z-Berichte 6",
|
||||
content="monthly report",
|
||||
checksum="TXT9",
|
||||
pk=15,
|
||||
)
|
||||
prefix_doc = Document.objects.create(
|
||||
title="Z-Berichte 60",
|
||||
content="monthly report",
|
||||
checksum="TXT10",
|
||||
pk=16,
|
||||
)
|
||||
false_positive = Document.objects.create(
|
||||
title="Z-Berichte 16",
|
||||
content="monthly report",
|
||||
checksum="TXT11",
|
||||
pk=17,
|
||||
)
|
||||
backend.add_or_update(exact_doc)
|
||||
backend.add_or_update(prefix_doc)
|
||||
backend.add_or_update(false_positive)
|
||||
|
||||
results = backend.search(
|
||||
"Z-Berichte 6",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
result_ids = {hit["id"] for hit in results.hits}
|
||||
|
||||
assert exact_doc.id in result_ids
|
||||
assert prefix_doc.id in result_ids
|
||||
assert false_positive.id not in result_ids
|
||||
|
||||
def test_text_mode_ignores_queries_without_searchable_tokens(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Simple text mode should safely return no hits for symbol-only strings."""
|
||||
doc = Document.objects.create(
|
||||
title="Guide",
|
||||
content="This is a guide.",
|
||||
checksum="TXT8",
|
||||
pk=14,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
no_tokens = backend.search(
|
||||
"!!!",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert no_tokens.total == 0
|
||||
|
||||
def test_title_mode_matches_partial_term_substrings(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Title mode should support substring matching within title tokens."""
|
||||
doc = Document.objects.create(
|
||||
title="Password guide",
|
||||
content="reset instructions",
|
||||
checksum="TXT4",
|
||||
pk=12,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
prefix_match = backend.search(
|
||||
"pass",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert prefix_match.total == 1
|
||||
|
||||
infix_match = backend.search(
|
||||
"sswo",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert infix_match.total == 1
|
||||
|
||||
phrase_match = backend.search(
|
||||
"sswo gu",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert phrase_match.total == 1
|
||||
|
||||
def test_scores_normalised_top_hit_is_one(self, backend: TantivyBackend):
|
||||
"""Search scores must be normalized so top hit has score 1.0 for UI consistency."""
|
||||
for i, title in enumerate(["bank invoice", "bank statement", "bank receipt"]):
|
||||
|
||||
@@ -8,6 +8,7 @@ import tantivy
|
||||
|
||||
from documents.search._tokenizer import _bigram_analyzer
|
||||
from documents.search._tokenizer import _paperless_text
|
||||
from documents.search._tokenizer import _simple_search_analyzer
|
||||
from documents.search._tokenizer import register_tokenizers
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -41,6 +42,20 @@ class TestTokenizers:
|
||||
idx.register_tokenizer("bigram_analyzer", _bigram_analyzer())
|
||||
return idx
|
||||
|
||||
@pytest.fixture
|
||||
def simple_search_index(self) -> tantivy.Index:
|
||||
"""Index with simple-search field for Latin substring tests."""
|
||||
sb = tantivy.SchemaBuilder()
|
||||
sb.add_text_field(
|
||||
"simple_content",
|
||||
stored=False,
|
||||
tokenizer_name="simple_search_analyzer",
|
||||
)
|
||||
schema = sb.build()
|
||||
idx = tantivy.Index(schema, path=None)
|
||||
idx.register_tokenizer("simple_search_analyzer", _simple_search_analyzer())
|
||||
return idx
|
||||
|
||||
def test_ascii_fold_finds_accented_content(
|
||||
self,
|
||||
content_index: tantivy.Index,
|
||||
@@ -66,6 +81,24 @@ class TestTokenizers:
|
||||
q = bigram_index.parse_query("東京", ["bigram_content"])
|
||||
assert bigram_index.searcher().search(q, limit=5).count == 1
|
||||
|
||||
def test_simple_search_analyzer_supports_regex_substrings(
|
||||
self,
|
||||
simple_search_index: tantivy.Index,
|
||||
) -> None:
|
||||
"""Whitespace-preserving simple search analyzer supports substring regex matching."""
|
||||
writer = simple_search_index.writer()
|
||||
doc = tantivy.Document()
|
||||
doc.add_text("simple_content", "tag:invoice password-reset")
|
||||
writer.add_document(doc)
|
||||
writer.commit()
|
||||
simple_search_index.reload()
|
||||
q = tantivy.Query.regex_query(
|
||||
simple_search_index.schema,
|
||||
"simple_content",
|
||||
".*sswo.*",
|
||||
)
|
||||
assert simple_search_index.searcher().search(q, limit=5).count == 1
|
||||
|
||||
def test_unsupported_language_logs_warning(self, caplog: LogCaptureFixture) -> None:
|
||||
"""Unsupported language codes should log a warning and disable stemming gracefully."""
|
||||
sb = tantivy.SchemaBuilder()
|
||||
|
||||
@@ -91,6 +91,135 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
def test_simple_text_search(self) -> None:
|
||||
tagged = Tag.objects.create(name="invoice")
|
||||
matching_doc = Document.objects.create(
|
||||
title="Quarterly summary",
|
||||
content="Monthly bank report",
|
||||
checksum="T1",
|
||||
pk=11,
|
||||
)
|
||||
matching_doc.tags.add(tagged)
|
||||
|
||||
metadata_only_doc = Document.objects.create(
|
||||
title="Completely unrelated",
|
||||
content="No matching terms here",
|
||||
checksum="T2",
|
||||
pk=12,
|
||||
)
|
||||
metadata_only_doc.tags.add(tagged)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(matching_doc)
|
||||
backend.add_or_update(metadata_only_doc)
|
||||
|
||||
response = self.client.get("/api/documents/?text=monthly")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?text=tag:invoice")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
|
||||
def test_simple_text_search_matches_substrings(self) -> None:
|
||||
matching_doc = Document.objects.create(
|
||||
title="Quarterly summary",
|
||||
content="Password reset instructions",
|
||||
checksum="T5",
|
||||
pk=15,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(matching_doc)
|
||||
|
||||
response = self.client.get("/api/documents/?text=pass")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?text=sswo")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?text=sswo re")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
def test_simple_text_search_does_not_match_on_partial_term_overlap(self) -> None:
|
||||
non_matching_doc = Document.objects.create(
|
||||
title="Adobe Acrobat PDF Files",
|
||||
content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
|
||||
checksum="T7",
|
||||
pk=17,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(non_matching_doc)
|
||||
|
||||
response = self.client.get("/api/documents/?text=raptor")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
|
||||
def test_simple_title_search(self) -> None:
|
||||
title_match = Document.objects.create(
|
||||
title="Quarterly summary",
|
||||
content="No matching content here",
|
||||
checksum="T3",
|
||||
pk=13,
|
||||
)
|
||||
content_only = Document.objects.create(
|
||||
title="Completely unrelated",
|
||||
content="Quarterly summary appears only in content",
|
||||
checksum="T4",
|
||||
pk=14,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(title_match)
|
||||
backend.add_or_update(content_only)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=quarterly")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
def test_simple_title_search_matches_substrings(self) -> None:
|
||||
title_match = Document.objects.create(
|
||||
title="Password handbook",
|
||||
content="No matching content here",
|
||||
checksum="T6",
|
||||
pk=16,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(title_match)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=pass")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=sswo")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=sswo hand")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
def test_search_rejects_multiple_search_modes(self) -> None:
|
||||
response = self.client.get("/api/documents/?text=bank&query=bank")
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertEqual(
|
||||
response.data["detail"],
|
||||
"Specify only one of text, title_search, query, or more_like_id.",
|
||||
)
|
||||
|
||||
def test_search_returns_all_for_api_version_9(self) -> None:
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
@@ -1493,6 +1622,31 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(results["custom_fields"][0]["id"], custom_field1.id)
|
||||
self.assertEqual(results["workflows"][0]["id"], workflow1.id)
|
||||
|
||||
def test_global_search_db_only_limits_documents_to_title_matches(self) -> None:
|
||||
title_match = Document.objects.create(
|
||||
title="bank statement",
|
||||
content="no additional terms",
|
||||
checksum="GS1",
|
||||
pk=21,
|
||||
)
|
||||
content_only = Document.objects.create(
|
||||
title="not a title match",
|
||||
content="bank appears only in content",
|
||||
checksum="GS2",
|
||||
pk=22,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(title_match)
|
||||
backend.add_or_update(content_only)
|
||||
|
||||
self.client.force_authenticate(self.user)
|
||||
|
||||
response = self.client.get("/api/search/?query=bank&db_only=true")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data["documents"]), 1)
|
||||
self.assertEqual(response.data["documents"][0]["id"], title_match.id)
|
||||
|
||||
def test_global_search_filters_owned_mail_objects(self) -> None:
|
||||
user1 = User.objects.create_user("mail-search-user")
|
||||
user2 = User.objects.create_user("other-mail-search-user")
|
||||
|
||||
@@ -1995,11 +1995,23 @@ class ChatStreamingView(GenericAPIView):
|
||||
list=extend_schema(
|
||||
description="Document views including search",
|
||||
parameters=[
|
||||
OpenApiParameter(
|
||||
name="text",
|
||||
type=OpenApiTypes.STR,
|
||||
location=OpenApiParameter.QUERY,
|
||||
description="Simple Tantivy-backed text search query string",
|
||||
),
|
||||
OpenApiParameter(
|
||||
name="title_search",
|
||||
type=OpenApiTypes.STR,
|
||||
location=OpenApiParameter.QUERY,
|
||||
description="Simple Tantivy-backed title-only search query string",
|
||||
),
|
||||
OpenApiParameter(
|
||||
name="query",
|
||||
type=OpenApiTypes.STR,
|
||||
location=OpenApiParameter.QUERY,
|
||||
description="Advanced search query string",
|
||||
description="Advanced Tantivy search query string",
|
||||
),
|
||||
OpenApiParameter(
|
||||
name="full_perms",
|
||||
@@ -2025,17 +2037,22 @@ class ChatStreamingView(GenericAPIView):
|
||||
),
|
||||
)
|
||||
class UnifiedSearchViewSet(DocumentViewSet):
|
||||
SEARCH_PARAM_NAMES = ("text", "title_search", "query", "more_like_id")
|
||||
|
||||
def get_serializer_class(self):
|
||||
if self._is_search_request():
|
||||
return SearchResultSerializer
|
||||
else:
|
||||
return DocumentSerializer
|
||||
|
||||
def _get_active_search_params(self, request: Request | None = None) -> list[str]:
|
||||
request = request or self.request
|
||||
return [
|
||||
param for param in self.SEARCH_PARAM_NAMES if param in request.query_params
|
||||
]
|
||||
|
||||
def _is_search_request(self):
|
||||
return (
|
||||
"query" in self.request.query_params
|
||||
or "more_like_id" in self.request.query_params
|
||||
)
|
||||
return bool(self._get_active_search_params())
|
||||
|
||||
def list(self, request, *args, **kwargs):
|
||||
if not self._is_search_request():
|
||||
@@ -2043,6 +2060,7 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
|
||||
from documents.search import TantivyRelevanceList
|
||||
from documents.search import get_backend
|
||||
from documents.search._backend import SearchMode
|
||||
|
||||
try:
|
||||
backend = get_backend()
|
||||
@@ -2050,9 +2068,31 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
filtered_qs = self.filter_queryset(self.get_queryset())
|
||||
|
||||
user = None if request.user.is_superuser else request.user
|
||||
active_search_params = self._get_active_search_params(request)
|
||||
|
||||
if "query" in request.query_params:
|
||||
query_str = request.query_params["query"]
|
||||
if len(active_search_params) > 1:
|
||||
raise ValidationError(
|
||||
{
|
||||
"detail": _(
|
||||
"Specify only one of text, title_search, query, or more_like_id.",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
if (
|
||||
"text" in request.query_params
|
||||
or "title_search" in request.query_params
|
||||
or "query" in request.query_params
|
||||
):
|
||||
if "text" in request.query_params:
|
||||
search_mode = SearchMode.TEXT
|
||||
query_str = request.query_params["text"]
|
||||
elif "title_search" in request.query_params:
|
||||
search_mode = SearchMode.TITLE
|
||||
query_str = request.query_params["title_search"]
|
||||
else:
|
||||
search_mode = SearchMode.QUERY
|
||||
query_str = request.query_params["query"]
|
||||
results = backend.search(
|
||||
query_str,
|
||||
user=user,
|
||||
@@ -2060,6 +2100,7 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
page_size=10000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=search_mode,
|
||||
)
|
||||
else:
|
||||
# more_like_id — validate permission on the seed document first
|
||||
@@ -2132,6 +2173,8 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
if str(e.detail) == str(invalid_more_like_id_message):
|
||||
return HttpResponseForbidden(invalid_more_like_id_message)
|
||||
return HttpResponseForbidden(_("Insufficient permissions."))
|
||||
except ValidationError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.warning(f"An error occurred listing search results: {e!s}")
|
||||
return HttpResponseBadRequest(
|
||||
@@ -3003,6 +3046,9 @@ class GlobalSearchView(PassUserMixin):
|
||||
serializer_class = SearchResultSerializer
|
||||
|
||||
def get(self, request, *args, **kwargs):
|
||||
from documents.search import get_backend
|
||||
from documents.search._backend import SearchMode
|
||||
|
||||
query = request.query_params.get("query", None)
|
||||
if query is None:
|
||||
return HttpResponseBadRequest("Query required")
|
||||
@@ -3019,25 +3065,25 @@ class GlobalSearchView(PassUserMixin):
|
||||
"view_document",
|
||||
Document,
|
||||
)
|
||||
# First search by title
|
||||
docs = all_docs.filter(title__icontains=query)
|
||||
if not db_only and len(docs) < OBJECT_LIMIT:
|
||||
# If we don't have enough results, search by content.
|
||||
# Over-fetch from Tantivy (no permission filter) and rely on
|
||||
# the ORM all_docs queryset for authoritative permission gating.
|
||||
from documents.search import get_backend
|
||||
|
||||
if db_only:
|
||||
docs = all_docs.filter(title__icontains=query)[:OBJECT_LIMIT]
|
||||
else:
|
||||
user = None if request.user.is_superuser else request.user
|
||||
fts_results = get_backend().search(
|
||||
query,
|
||||
user=None,
|
||||
user=user,
|
||||
page=1,
|
||||
page_size=1000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
fts_ids = {h["id"] for h in fts_results.hits}
|
||||
docs = docs | all_docs.filter(id__in=fts_ids)
|
||||
docs = docs[:OBJECT_LIMIT]
|
||||
docs_by_id = all_docs.in_bulk([hit["id"] for hit in fts_results.hits])
|
||||
docs = [
|
||||
docs_by_id[hit["id"]]
|
||||
for hit in fts_results.hits
|
||||
if hit["id"] in docs_by_id
|
||||
][:OBJECT_LIMIT]
|
||||
saved_views = (
|
||||
get_objects_for_user_owner_aware(
|
||||
request.user,
|
||||
|
||||
Reference in New Issue
Block a user