mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-03-29 01:42:45 +00:00
Fix duplicate detection for normalized aggregate reports in Elasticsearch/OpenSearch (#666)
Change date_begin/date_end queries from exact match to range queries (gte/lte) so that previously saved normalized time buckets are correctly detected as duplicates within the original report's date range. Co-authored-by: seanthegeek <44679+seanthegeek@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: seanthegeek <44679+seanthegeek@users.noreply.github.com>
This commit is contained in:
@@ -413,8 +413,8 @@ def save_aggregate_report_to_elasticsearch(
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name))) # type: ignore
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id))) # pyright: ignore[reportArgumentType]
|
||||
domain_query = Q(dict(match_phrase={"published_policy.domain": domain})) # pyright: ignore[reportArgumentType]
|
||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date))) # pyright: ignore[reportArgumentType]
|
||||
end_date_query = Q(dict(match=dict(date_end=end_date))) # pyright: ignore[reportArgumentType]
|
||||
begin_date_query = Q(dict(range=dict(date_begin=dict(gte=begin_date)))) # pyright: ignore[reportArgumentType]
|
||||
end_date_query = Q(dict(range=dict(date_end=dict(lte=end_date)))) # pyright: ignore[reportArgumentType]
|
||||
|
||||
if index_suffix is not None:
|
||||
search_index = "dmarc_aggregate_{0}*".format(index_suffix)
|
||||
|
||||
@@ -413,8 +413,8 @@ def save_aggregate_report_to_opensearch(
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||
domain_query = Q(dict(match_phrase={"published_policy.domain": domain}))
|
||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
|
||||
end_date_query = Q(dict(match=dict(date_end=end_date)))
|
||||
begin_date_query = Q(dict(range=dict(date_begin=dict(gte=begin_date))))
|
||||
end_date_query = Q(dict(range=dict(date_end=dict(lte=end_date))))
|
||||
|
||||
if index_suffix is not None:
|
||||
search_index = "dmarc_aggregate_{0}*".format(index_suffix)
|
||||
|
||||
Reference in New Issue
Block a user