from enum import Enum
[docs]class Filter:
"""
Class responsible for creating the json queries to be used in `pycarol.Query`
Usage:
.. code:: python
from pycarol.filter import TYPE_FILTER, Filter, TERM_FILTER
json_query = Filter.Builder()\\
.must(TYPE_FILTER(value='medicalform' + "Golden"))\\
.must(TERM_FILTER(key='mdmGoldenFieldAndValues.status.raw',value='pending'))\\
.must_not(TERM_FILTER(key='mdmGoldenFieldAndValues.auditedbycarol',value=True))\\
.build().to_json()
This will create the following json query.
.. code:: json
{
'mustList': [
{
'mdmFilterType': 'TYPE_FILTER',
'mdmValue': 'medicalformGolden'
},
{
'mdmFilterType': 'TERM_FILTER',
'mdmKey': 'mdmGoldenFieldAndValues.status.raw',
'mdmValue': 'pending'
}
],
'mustNotList': [
{
'mdmFilterType': 'TERM_FILTER',
'mdmKey': 'mdmGoldenFieldAndValues.auditedbycarol',
'mdmValue': True
}
],
'shouldList': [
],
'aggregationList': [
],
'minimumShouldMatch': 1
}
Using with Aggregations:
.. code:: python
from pycarol.filter import MINIMUM, MAXIMUM, TYPE_FILTER, Filter, TERM_FILTER
json_query = Filter.Builder() \\
.type('datamodel') \\
.aggregation_list([MINIMUM(name='MINIMUM', params='mdm_key'),
MAXIMUM(name='MAXIMUM', params='mdm_key')]) \\
.build().to_json()
.. code:: json
{
'mustList': [
{
'mdmFilterType': 'TYPE_FILTER',
'mdmValue': 'datamodel'
}
],
'mustNotList': [
],
'shouldList': [
],
'aggregationList': [
{
'type': 'MINIMUM',
'name': 'MINIMUM',
'params': 'mdm_key',
'size': 10,
'shardSize': 10,
'minDocCount': 0
},
{
'type': 'MAXIMUM',
'name': 'MAXIMUM',
'params': 'mdm_key',
'size': 10,
'shardSize': 10,
'minDocCount': 0
}
],
'minimumShouldMatch': 1
}
"""
def __init__(self, builder):
self.must_list = builder._must_list
self.must_not_list = builder._must_not_list
self.should_list = builder._should_list
self.aggregation_list = builder._aggregation_list
self.minimum_should_match = builder._minimum_should_match
def to_json(self):
json = {}
json['mustList'] = [elt.to_json() for elt in self.must_list]
json['mustNotList'] = [elt.to_json() for elt in self.must_not_list]
json['shouldList'] = [elt.to_json() for elt in self.should_list]
json['aggregationList'] = [elt.to_json() for elt in self.aggregation_list]
json['minimumShouldMatch'] = self.minimum_should_match
return json
class Builder:
def __init__(self, key_prefix=""):
self._minimum_should_match = 1
self._must_list = []
self._must_not_list = []
self._should_list = []
self._aggregation_list = []
self.key_prefix = key_prefix
def type(self, value):
self._must_list.append(TYPE_FILTER(value=value))
return self
def must(self, must):
must.set_key_prefix(self.key_prefix)
self._must_list.append(must)
return self
def must_list(self, must_list):
assert isinstance(must_list, list)
for must in must_list:
must.set_key_prefix(self.key_prefix)
self._must_list.extend(must_list)
return self
def must_not(self, must_not):
must_not.set_key_prefix(self.key_prefix)
self._must_not_list.append(must_not)
return self
def must_not_list(self, must_not_list):
assert isinstance(must_not_list, list)
for must_not in must_not_list:
must_not.set_key_prefix(self.key_prefix)
self._must_not_list.extend(must_not_list)
return self
def should(self, should):
should.set_key_prefix(self.key_prefix)
self._should_list.append(should)
return self
def should_list(self, should_list):
assert isinstance(should_list, list)
for should in should_list:
should.set_key_prefix(self.key_prefix)
self._should_list.extend(should_list)
return self
def aggregation(self, aggregation):
self._aggregation_list.append(aggregation)
return self
def aggregation_list(self, aggregation_list):
assert isinstance(aggregation_list, list)
self._aggregation_list.extend(aggregation_list)
return self
def minimum_should_match(self, minimum_should_match):
self._minimum_should_match = minimum_should_match
return self
def build(self):
return Filter(self)
class FilterType:
def __init__(self, filter_type, key = None, value = None, path = None, range_values = None, values_field = None,
mdm_format = None, flags = None, range_start = None, range_end = None, values_query = None):
self.filter_type = filter_type
self.key = key
self.value = value
self.path = path
if range_values is not None:
assert isinstance(range_values, list)
self.range_values = range_values
self.values_field = values_field
self.mdm_format = mdm_format
self.flags = flags
self.range_start = range_start
self.range_end = range_end
if values_query is not None:
assert values_field is not None
assert isinstance(values_query, FilterType)
self.values_query = values_query
def set_key_prefix(self, key_prefix):
if key_prefix:
if self.key:
self.key = key_prefix + '.' + self.key
if self.values_field:
self.values_field = key_prefix + '.' + self.values_field
def to_json(self):
json = {'mdmFilterType': self.filter_type.value}
if self.key:
json['mdmKey'] = self.key
if self.value is not None:
json['mdmValue'] = self.value
if self.path:
json['mdmPath'] = self.path
if self.range_values:
json['mdmRangeValues'] = self.range_values
if self.values_field:
json['mdmValuesField'] = self.values_field
if self.mdm_format:
json['mdmFormat'] = self.mdm_format
if self.flags is not None:
json['mdmFlags'] = self.flags
if self.range_start is not None:
json['mdmRangeStart'] = self.range_start
if self.range_end is not None:
json['mdmRangeEnd'] = self.range_end
if self.values_query is not None:
json['mdmValuesQuery'] = self.values_query.to_json()
return json
class TYPE_FILTER(FilterType):
def __init__(self, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.TYPE_FILTER, value=value, values_field=values_field, values_query=values_query)
class BOOL_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.BOOL_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class TERM_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.TERM_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class TERMS_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.TERMS_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class RANGE_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.RANGE_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class MATCH_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.MATCH_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class MATCH_ALL_TERMS_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.MATCH_ALL_TERMS_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class MATCH_ANY_TERM_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.MATCH_ALL_TERMS_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class TERM_FUZZY_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.TERM_FUZZY_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class TERMS_FUZZY_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.TERMS_FUZZY_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class WILDCARD_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.WILDCARD_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class WILDCARD_CUSTOM_FILTER(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.WILDCARD_CUSTOM_FILTER, key=key, value=value, values_field=values_field, values_query=values_query)
class EXISTS_FILTER(FilterType):
def __init__(self, key, values_field = None, values_query = None):
super().__init__(filter_type = FT.EXISTS_FILTER, key=key, values_field=values_field, values_query=values_query)
class SIMPLE_QUERY_STRING(FilterType):
def __init__(self, key, value, values_field = None, values_query = None):
super().__init__(filter_type = FT.SIMPLE_QUERY_STRING, key=key, value=value, values_field=values_field, values_query=values_query)
class MISSING_FILTER(FilterType):
def __init__(self, key, values_field = None, values_query = None):
super().__init__(filter_type = FT.MISSING_FILTER, key=key, values_field=values_field, values_query=values_query)
class GEODISTANCE_FILTER(FilterType):
def __init__(self, path, key, range_values, values_field = None, values_query = None):
super().__init__(filter_type = FT.GEODISTANCE_FILTER, path=path, key=key, range_values=range_values, values_field=values_field, values_query=values_query)
class NESTED(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_TERM_FILTER(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_TERM_FILTER, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_TERMS_FILTER(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_TERMS_FILTER, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_RANGE_FILTER(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_RANGE_FILTER, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_MATCH_ALL_TERMS_FILTER(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_MATCH_ALL_TERMS_FILTER, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_MATCH_ANY_TERM_FILTER(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_MATCH_ANY_TERM_FILTER, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_TERM_FUZZY_FILTER(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_TERM_FUZZY_FILTER, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_TERMS_FUZZY_FILTER(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_TERMS_FUZZY_FILTER, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_WILDCARD_FILTER(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_WILDCARD_FILTER, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_WILDCARD_CUSTOM_FILTER(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_WILDCARD_CUSTOM_FILTER, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_EXISTS_FILTER(FilterType):
def __init__(self, key, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_EXISTS_FILTER, key=key, path=path, values_field=values_field, values_query=values_query)
class NESTED_SIMPLE_QUERY_STRING(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_SIMPLE_QUERY_STRING, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_MISSING_FILTER(FilterType):
def __init__(self, key, value, path, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_MISSING_FILTER, key=key, value=value, path=path, values_field=values_field, values_query=values_query)
class NESTED_GEODISTANCE_FILTER(FilterType):
def __init__(self, path, key, range_values, values_field = None, values_query = None):
super().__init__(filter_type = FT.NESTED_GEODISTANCE_FILTER, path=path, key=key, range_values=range_values, values_field=values_field, values_query=values_query)
class Aggregation:
def __init__(self, agg_type, name, params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by = None, sort_order = None, query_param = None):
self.agg_type = agg_type
self.name = name
self.params = params
if sub_aggregations is not None:
assert isinstance(sub_aggregations,list), 'sub_aggregations must be a list'
self.sub_aggregations = sub_aggregations
self.size = size
self.shard_size = shard_size
self.min_doc_count = min_doc_count
self.sort_by = sort_by
self.sort_order = sort_order
self.query_param = query_param
def to_json(self):
json = {}
json['type'] = self.agg_type.value
json['name'] = self.name
if self.params:
json['params'] = self.params
if self.size:
json['size'] = self.size
if self.shard_size:
json['shardSize'] = self.shard_size
if self.min_doc_count is not None:
json['minDocCount'] = self.min_doc_count
if self.sort_by:
json['sortBy'] = self.sort_by
if self.sort_order:
json['sortOrder'] = self.sort_order
if self.sub_aggregations:
json['subAggregations'] = [agg.to_json() for agg in self.sub_aggregations]
if self.query_param is not None:
json['queryParam'] = self.query_param.to_json()
return json
class TERM(Aggregation):
def __init__(self, name='term', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.TERM, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class TERMS(Aggregation):
def __init__(self, name='terms', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.TERMS, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class NESTED(Aggregation):
def __init__(self, name='nested', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.NESTED, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class FILTER(Aggregation):
def __init__(self, name='filter', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None, query_param=None):
super().__init__(agg_type= AGG.FILTER, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order, query_param=query_param)
class MINIMUM(Aggregation):
def __init__(self, name='minimum', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.MINIMUM, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class MAXIMUM(Aggregation):
def __init__(self, name='maximum', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.MAXIMUM, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class AVERAGE(Aggregation):
def __init__(self, name='average', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.AVERAGE, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class COUNT(Aggregation):
def __init__(self, name='count', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.COUNT, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class SUM(Aggregation):
def __init__(self, name='sum', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.SUM, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class STATS(Aggregation):
def __init__(self, name='stats', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.STATS, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class EXTENDED_STATS(Aggregation):
def __init__(self, name='extendedStats', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.EXTENDED_STATS, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class DATE_HISTOGRAM(Aggregation):
def __init__(self, name='dateHistogram', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.DATE_HISTOGRAM, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class HISTOGRAM(Aggregation):
def __init__(self, name='histogram', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.HISTOGRAM, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class RANGE(Aggregation):
def __init__(self, name='range', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.RANGE, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
class CARDINALITY(Aggregation):
def __init__(self, name='cardinality', params=None, sub_aggregations=None, size=10, shard_size=10, min_doc_count=0, sort_by=None, sort_order=None):
super().__init__(agg_type= AGG.CARDINALITY, name=name, params=params, sub_aggregations=sub_aggregations, size=size, shard_size=shard_size, min_doc_count=min_doc_count, sort_by=sort_by, sort_order=sort_order)
[docs]class AGG(Enum):
TERM = "TERM"
TERMS = "TERMS"
NESTED = "NESTED"
FILTER = "FILTER"
MINIMUM = "MINIMUM"
MAXIMUM = "MAXIMUM"
AVERAGE = "AVERAGE"
COUNT = "COUNT"
SUM = "SUM"
STATS = "STATS"
EXTENDED_STATS = "EXTENDED_STATS"
DATE_HISTOGRAM = "DATE_HISTOGRAM"
HISTOGRAM = "HISTOGRAM"
RANGE = "RANGE"
CARDINALITY = "CARDINALITY"
[docs]class FT(Enum):
TERM_FILTER = "TERM_FILTER"
BOOL_FILTER = "BOOL_FILTER"
TERMS_FILTER = "TERMS_FILTER"
MATCH_ALL_FILTER = "MATCH_ALL_FILTER"
MATCH_FILTER = "MATCH_FILTER"
MATCH_ALL_TERMS_FILTER = "MATCH_ALL_TERMS_FILTER"
MATCH_ANY_TERM_FILTER = "MATCH_ANY_TERM_FILTER"
TERM_FUZZY_FILTER = "TERM_FUZZY_FILTER"
TERMS_FUZZY_FILTER = "TERMS_FUZZY_FILTER"
TYPE_FILTER = "TYPE_FILTER"
RANGE_FILTER = "RANGE_FILTER"
WILDCARD_FILTER = "WILDCARD_FILTER"
WILDCARD_CUSTOM_FILTER = "WILDCARD_CUSTOM_FILTER"
EXISTS_FILTER = "EXISTS_FILTER"
SIMPLE_QUERY_STRING = "SIMPLE_QUERY_STRING"
MISSING_FILTER = "MISSING_FILTER"
GEODISTANCE_FILTER = "GEODISTANCE_FILTER"
NESTED = "NESTED"
NESTED_MATCH_ALL_TERMS_FILTER = "NESTED_MATCH_ALL_TERMS_FILTER"
NESTED_MATCH_ANY_TERM_FILTER = "NESTED_MATCH_ANY_TERM_FILTER"
NESTED_TERM_FILTER = "NESTED_TERM_FILTER"
NESTED_TERMS_FILTER = "NESTED_TERMS_FILTER"
NESTED_MATCH_ALL_FILTER = "NESTED_MATCH_ALL_FILTER"
NESTED_MATCH_FILTER = "NESTED_MATCH_FILTER"
NESTED_TERM_FUZZY_FILTER = "NESTED_TERM_FUZZY_FILTER"
NESTED_TERMS_FUZZY_FILTER = "NESTED_TERMS_FUZZY_FILTER"
NESTED_RANGE_FILTER = "NESTED_RANGE_FILTER"
NESTED_WILDCARD_FILTER = "NESTED_WILDCARD_FILTER"
NESTED_WILDCARD_CUSTOM_FILTER = "NESTED_WILDCARD_CUSTOM_FILTER"
NESTED_SIMPLE_QUERY_STRING = "NESTED_SIMPLE_QUERY_STRING"
NESTED_GEODISTANCE_FILTER = "NESTED_GEODISTANCE_FILTER"
NESTED_EXISTS_FILTER = "NESTED_EXISTS_FILTER"
NESTED_MISSING_FILTER = "NESTED_MISSING_FILTER"