Skip to content

Commit 21c5073

Browse files
authored
feat: Implement apply_filter_policy and FilterPolicy.MERGE for the new filters (#8042)
1 parent 4c79847 commit 21c5073

File tree

4 files changed

+433
-36
lines changed

4 files changed

+433
-36
lines changed

haystack/document_stores/types/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
from .filter_policy import FilterPolicy
5+
from .filter_policy import FilterPolicy, apply_filter_policy
66
from .policy import DuplicatePolicy
77
from .protocol import DocumentStore
88

9-
__all__ = ["DocumentStore", "DuplicatePolicy", "FilterPolicy"]
9+
__all__ = ["apply_filter_policy", "DocumentStore", "DuplicatePolicy", "FilterPolicy"]

haystack/document_stores/types/filter_policy.py

Lines changed: 266 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
from enum import Enum
6-
from typing import Any, Dict, Optional
6+
from typing import Any, Dict, Literal, Optional
7+
8+
from haystack import logging
9+
10+
logger = logging.getLogger(__name__)
711

812

913
class FilterPolicy(Enum):
@@ -28,18 +32,259 @@ def from_str(filter_policy: str) -> "FilterPolicy":
2832
:param filter_policy: The string to convert.
2933
:return: The corresponding FilterPolicy enum.
3034
"""
31-
enum_map = {e.value: e for e in FilterPolicy}
32-
policy = enum_map.get(filter_policy)
35+
enum_map = {e.value.lower(): e for e in FilterPolicy}
36+
policy = enum_map.get(filter_policy.lower() if filter_policy else "")
3337
if policy is None:
3438
msg = f"Unknown FilterPolicy type '{filter_policy}'. Supported types are: {list(enum_map.keys())}"
3539
raise ValueError(msg)
3640
return policy
3741

3842

43+
def is_comparison_filter(filter_item: Dict[str, Any]) -> bool:
44+
"""
45+
Check if the given filter is a comparison filter.
46+
47+
:param filter_item: The filter to check.
48+
:returns: True if the filter is a comparison filter, False otherwise.
49+
"""
50+
return all(key in filter_item for key in ["field", "operator", "value"])
51+
52+
53+
def is_logical_filter(filter_item: Dict[str, Any]) -> bool:
54+
"""
55+
Check if the given filter is a logical filter.
56+
57+
:param filter_item: The filter to check.
58+
:returns: True if the filter is a logical filter, False otherwise.
59+
"""
60+
return "operator" in filter_item and "conditions" in filter_item
61+
62+
63+
def combine_two_logical_filters(
64+
init_logical_filter: Dict[str, Any], runtime_logical_filter: Dict[str, Any]
65+
) -> Dict[str, Any]:
66+
"""
67+
Combine two logical filters, they must have the same operator.
68+
69+
If `init_logical_filter["operator"]` and `runtime_logical_filter["operator"]` are the same, the conditions
70+
of both filters are combined. Otherwise, the `init_logical_filter` is ignored and `
71+
runtime_logical_filter` is returned.
72+
73+
__Example__:
74+
75+
```python
76+
init_logical_filter = {
77+
"operator": "AND",
78+
"conditions": [
79+
{"field": "meta.type", "operator": "==", "value": "article"},
80+
{"field": "meta.rating", "operator": ">=", "value": 3},
81+
]
82+
}
83+
runtime_logical_filter = {
84+
"operator": "AND",
85+
"conditions": [
86+
{"field": "meta.genre", "operator": "IN", "value": ["economy", "politics"]},
87+
{"field": "meta.publisher", "operator": "==", "value": "nytimes"},
88+
]
89+
}
90+
new_filters = combine_two_logical_filters(
91+
init_logical_filter, runtime_logical_filter, "AND"
92+
)
93+
# Output:
94+
{
95+
"operator": "AND",
96+
"conditions": [
97+
{"field": "meta.type", "operator": "==", "value": "article"},
98+
{"field": "meta.rating", "operator": ">=", "value": 3},
99+
{"field": "meta.genre", "operator": "IN", "value": ["economy", "politics"]},
100+
{"field": "meta.publisher", "operator": "==", "value": "nytimes"},
101+
]
102+
}
103+
```
104+
"""
105+
if init_logical_filter["operator"] == runtime_logical_filter["operator"]:
106+
return {
107+
"operator": str(init_logical_filter["operator"]),
108+
"conditions": init_logical_filter["conditions"] + runtime_logical_filter["conditions"],
109+
}
110+
111+
logger.warning(
112+
"The provided logical operators, {parsed_operator} and {operator}, do not match so the parsed logical "
113+
"filter, {init_logical_filter}, will be ignored and only the provided logical filter,{runtime_logical_filter}, "
114+
"will be used. Update the logical operators to match to include the parsed filter.",
115+
parsed_operator=init_logical_filter["operator"],
116+
operator=runtime_logical_filter["operator"],
117+
init_logical_filter=init_logical_filter,
118+
runtime_logical_filter=runtime_logical_filter,
119+
)
120+
runtime_logical_filter["operator"] = str(runtime_logical_filter["operator"])
121+
return runtime_logical_filter
122+
123+
124+
def combine_init_comparison_and_runtime_logical_filters(
125+
init_comparison_filter: Dict[str, Any],
126+
runtime_logical_filter: Dict[str, Any],
127+
logical_operator: Literal["AND", "OR", "NOT"],
128+
) -> Dict[str, Any]:
129+
"""
130+
Combine a runtime logical filter with the init comparison filter using the provided logical_operator.
131+
132+
We only add the init_comparison_filter if logical_operator matches the existing
133+
runtime_logical_filter["operator"]. Otherwise, we return the runtime_logical_filter unchanged.
134+
135+
__Example__:
136+
137+
```python
138+
runtime_logical_filter = {
139+
"operator": "AND",
140+
"conditions": [
141+
{"field": "meta.type", "operator": "==", "value": "article"},
142+
{"field": "meta.rating", "operator": ">=", "value": 3},
143+
]
144+
}
145+
init_comparison_filter = {"field": "meta.date", "operator": ">=", "value": "2015-01-01"}
146+
new_filters = combine_init_comparison_and_runtime_logical_filters(
147+
init_comparison_filter, runtime_logical_filter, "AND"
148+
)
149+
# Output:
150+
{
151+
"operator": "AND",
152+
"conditions": [
153+
{"field": "meta.type", "operator": "==", "value": "article"},
154+
{"field": "meta.rating", "operator": ">=", "value": 3},
155+
{"field": "meta.date", "operator": ">=", "value": "2015-01-01"},
156+
]
157+
}
158+
```
159+
"""
160+
if runtime_logical_filter["operator"] == logical_operator:
161+
conditions = runtime_logical_filter["conditions"]
162+
fields = {c.get("field") for c in conditions}
163+
if init_comparison_filter["field"] not in fields:
164+
conditions.append(init_comparison_filter)
165+
else:
166+
logger.warning(
167+
"The init filter, {init_filter}, is ignored as the field is already present in the existing "
168+
"filters, {filters}.",
169+
init_filter=init_comparison_filter,
170+
filters=runtime_logical_filter,
171+
)
172+
return {"operator": str(runtime_logical_filter["operator"]), "conditions": conditions}
173+
174+
logger.warning(
175+
"The provided logical_operator, {logical_operator}, does not match the logical operator found in "
176+
"the runtime filters, {filters_logical_operator}, so the init filter will be ignored.",
177+
logical_operator=logical_operator,
178+
filters_logical_operator=runtime_logical_filter["operator"],
179+
)
180+
runtime_logical_filter["operator"] = str(runtime_logical_filter["operator"])
181+
return runtime_logical_filter
182+
183+
184+
def combine_runtime_comparison_and_init_logical_filters(
185+
runtime_comparison_filter: Dict[str, Any],
186+
init_logical_filter: Dict[str, Any],
187+
logical_operator: Literal["AND", "OR", "NOT"],
188+
) -> Dict[str, Any]:
189+
"""
190+
Combine an init logical filter with the runtime comparison filter using the provided logical_operator.
191+
192+
We only add the runtime_comparison_filter if logical_operator matches the existing
193+
init_logical_filter["operator"]. Otherwise, we return the runtime_comparison_filter unchanged.
194+
195+
__Example__:
196+
197+
```python
198+
init_logical_filter = {
199+
"operator": "AND",
200+
"conditions": [
201+
{"field": "meta.type", "operator": "==", "value": "article"},
202+
{"field": "meta.rating", "operator": ">=", "value": 3},
203+
]
204+
}
205+
runtime_comparison_filter = {"field": "meta.date", "operator": ">=", "value": "2015-01-01"}
206+
new_filters = combine_runtime_comparison_and_init_logical_filters(
207+
runtime_comparison_filter, init_logical_filter, "AND"
208+
)
209+
# Output:
210+
{
211+
"operator": "AND",
212+
"conditions": [
213+
{"field": "meta.type", "operator": "==", "value": "article"},
214+
{"field": "meta.rating", "operator": ">=", "value": 3},
215+
{"field": "meta.date", "operator": ">=", "value": "2015-01-01"},
216+
]
217+
}
218+
```
219+
"""
220+
if init_logical_filter["operator"] == logical_operator:
221+
conditions = init_logical_filter["conditions"]
222+
fields = {c.get("field") for c in conditions}
223+
if runtime_comparison_filter["field"] in fields:
224+
logger.warning(
225+
"The runtime filter, {runtime_filter}, will overwrite the existing filter with the same "
226+
"field in the init logical filter.",
227+
runtime_filter=runtime_comparison_filter,
228+
)
229+
conditions = [c for c in conditions if c.get("field") != runtime_comparison_filter["field"]]
230+
conditions.append(runtime_comparison_filter)
231+
return {"operator": str(init_logical_filter["operator"]), "conditions": conditions}
232+
233+
logger.warning(
234+
"The provided logical_operator, {logical_operator}, does not match the logical operator found in "
235+
"the init logical filter, {filters_logical_operator}, so the init logical filter will be ignored.",
236+
logical_operator=logical_operator,
237+
filters_logical_operator=init_logical_filter["operator"],
238+
)
239+
return runtime_comparison_filter
240+
241+
242+
def combine_two_comparison_filters(
243+
init_comparison_filter: Dict[str, Any],
244+
runtime_comparison_filter: Dict[str, Any],
245+
logical_operator: Literal["AND", "OR", "NOT"],
246+
) -> Dict[str, Any]:
247+
"""
248+
Combine a comparison filter with the `init_comparison_filter` using the provided `logical_operator`.
249+
250+
If `runtime_comparison_filter` and `init_comparison_filter` target the same field, `init_comparison_filter`
251+
is ignored and `runtime_comparison_filter` is returned unchanged.
252+
253+
__Example__:
254+
255+
```python
256+
runtime_comparison_filter = {"field": "meta.type", "operator": "==", "value": "article"},
257+
init_comparison_filter = {"field": "meta.date", "operator": ">=", "value": "2015-01-01"},
258+
new_filters = combine_two_comparison_filters(
259+
init_comparison_filter, runtime_comparison_filter, "AND"
260+
)
261+
# Output:
262+
{
263+
"operator": "AND",
264+
"conditions": [
265+
{"field": "meta.type", "operator": "==", "value": "article"},
266+
{"field": "meta.date", "operator": ">=", "value": "2015-01-01"},
267+
]
268+
}
269+
```
270+
"""
271+
if runtime_comparison_filter["field"] == init_comparison_filter["field"]:
272+
logger.warning(
273+
"The parsed filter, {parsed_filter}, is ignored as the field is already present in the existing "
274+
"filters, {filters}.",
275+
parsed_filter=init_comparison_filter,
276+
filters=runtime_comparison_filter,
277+
)
278+
return runtime_comparison_filter
279+
280+
return {"operator": str(logical_operator), "conditions": [init_comparison_filter, runtime_comparison_filter]}
281+
282+
39283
def apply_filter_policy(
40284
filter_policy: FilterPolicy,
41285
init_filters: Optional[Dict[str, Any]] = None,
42286
runtime_filters: Optional[Dict[str, Any]] = None,
287+
default_logical_operator: Literal["AND", "OR", "NOT"] = "AND",
43288
) -> Optional[Dict[str, Any]]:
44289
"""
45290
Apply the filter policy to the given initial and runtime filters to determine the final set of filters used.
@@ -52,10 +297,23 @@ def apply_filter_policy(
52297
values from the runtime filters will overwrite those from the initial filters.
53298
:param init_filters: The initial filters set during the initialization of the relevant retriever.
54299
:param runtime_filters: The filters provided at runtime, usually during a query operation execution. These filters
55-
can change for each query/retreiver run invocation.
300+
can change for each query/retriever run invocation.
301+
:param default_logical_operator: The default logical operator to use when merging filters (non-legacy filters only).
56302
:returns: A dictionary containing the resulting filters based on the provided policy.
57303
"""
58-
if filter_policy == FilterPolicy.MERGE and runtime_filters:
59-
return {**(init_filters or {}), **runtime_filters}
60-
else:
61-
return runtime_filters or init_filters
304+
if filter_policy == FilterPolicy.MERGE and runtime_filters and init_filters:
305+
# now we merge filters
306+
if is_comparison_filter(init_filters) and is_comparison_filter(runtime_filters):
307+
return combine_two_comparison_filters(init_filters, runtime_filters, default_logical_operator)
308+
elif is_comparison_filter(init_filters) and is_logical_filter(runtime_filters):
309+
return combine_init_comparison_and_runtime_logical_filters(
310+
init_filters, runtime_filters, default_logical_operator
311+
)
312+
elif is_logical_filter(init_filters) and is_comparison_filter(runtime_filters):
313+
return combine_runtime_comparison_and_init_logical_filters(
314+
runtime_filters, init_filters, default_logical_operator
315+
)
316+
elif is_logical_filter(init_filters) and is_logical_filter(runtime_filters):
317+
return combine_two_logical_filters(init_filters, runtime_filters)
318+
319+
return runtime_filters or init_filters
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
enhancements:
3+
- |
4+
Enhanced filter application logic to support merging of filters. It facilitates more precise retrieval filtering, allowing for both init and runtime complex filter combinations with logical operators. For more details see https://docs.haystack.deepset.ai/docs/metadata-filtering

0 commit comments

Comments
 (0)