Compare commits

...

2 Commits

Author SHA1 Message Date
Sean Whalen
adb0d31382 9.1.2
- Fix duplicate detection for normalized aggregate reports in Elasticsearch/OpenSearch (PR #666 fixes issue #665)
2026-03-06 13:41:33 -05:00
Copilot
ae5d20ecf5 Fix duplicate detection for normalized aggregate reports in Elasticsearch/OpenSearch (#666)
Change date_begin/date_end queries from exact match to range queries
(gte/lte) so that previously saved normalized time buckets are correctly
detected as duplicates within the original report's date range.

Co-authored-by: seanthegeek <44679+seanthegeek@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: seanthegeek <44679+seanthegeek@users.noreply.github.com>
2026-03-06 13:21:54 -05:00
4 changed files with 11 additions and 5 deletions

View File

@@ -1,5 +1,11 @@
# Changelog
## 9.1.2
### Fixes
- Fix duplicate detection for normalized aggregate reports in Elasticsearch/OpenSearch (PR #666 fixes issue #665)
## 9.1.1
### Fixes

View File

@@ -1,3 +1,3 @@
__version__ = "9.1.1"
__version__ = "9.1.2"
USER_AGENT = f"parsedmarc/{__version__}"

View File

@@ -413,8 +413,8 @@ def save_aggregate_report_to_elasticsearch(
org_name_query = Q(dict(match_phrase=dict(org_name=org_name))) # type: ignore
report_id_query = Q(dict(match_phrase=dict(report_id=report_id))) # pyright: ignore[reportArgumentType]
domain_query = Q(dict(match_phrase={"published_policy.domain": domain})) # pyright: ignore[reportArgumentType]
begin_date_query = Q(dict(match=dict(date_begin=begin_date))) # pyright: ignore[reportArgumentType]
end_date_query = Q(dict(match=dict(date_end=end_date))) # pyright: ignore[reportArgumentType]
begin_date_query = Q(dict(range=dict(date_begin=dict(gte=begin_date)))) # pyright: ignore[reportArgumentType]
end_date_query = Q(dict(range=dict(date_end=dict(lte=end_date)))) # pyright: ignore[reportArgumentType]
if index_suffix is not None:
search_index = "dmarc_aggregate_{0}*".format(index_suffix)

View File

@@ -413,8 +413,8 @@ def save_aggregate_report_to_opensearch(
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
domain_query = Q(dict(match_phrase={"published_policy.domain": domain}))
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
end_date_query = Q(dict(match=dict(date_end=end_date)))
begin_date_query = Q(dict(range=dict(date_begin=dict(gte=begin_date))))
end_date_query = Q(dict(range=dict(date_end=dict(lte=end_date))))
if index_suffix is not None:
search_index = "dmarc_aggregate_{0}*".format(index_suffix)