from __future__ import annotations
from elasticsearch_dsl.function import SF # type:ignore
from elasticsearch_dsl.query import FunctionScore # type:ignore
from elasticsearch_dsl.query import Match
from elasticsearch_dsl.query import MatchPhrase
from elasticsearch_dsl.query import MultiMatch
from functools import cached_property
from onegov.core.collection import Pagination, _M
from onegov.event.models import Event
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from onegov.org.request import OrgRequest
from onegov.search.dsl import Hit, Response, Search as ESSearch
[docs]
class Search(Pagination[_M]):
[docs]
results_per_page = 10
def __init__(self, request: OrgRequest, query: str, page: int) -> None:
super().__init__(page)
@cached_property
[docs]
def available_documents(self) -> int:
search = self.request.app.es_search_by_request(self.request)
return search.count()
@cached_property
[docs]
def explain(self) -> bool:
return self.request.is_manager and 'explain' in self.request.params
@property
[docs]
def q(self) -> str:
return self.query
[docs]
def __eq__(self, other: object) -> bool:
return (
isinstance(other, self.__class__)
and self.page == other.page
and self.query == other.query
)
if TYPE_CHECKING:
@property
[docs]
def cached_subset(self) -> Response | None: ... # type:ignore
[docs]
def subset(self) -> Response | None: # type:ignore[override]
return self.batch
@property
[docs]
def page_index(self) -> int:
return self.page
[docs]
def page_by_index(self, index: int) -> Search[_M]:
return Search(self.request, self.query, index)
@cached_property
[docs]
def batch(self) -> Response | None: # type:ignore[override]
if not self.query:
return None
search = self.request.app.es_search_by_request(
request=self.request,
explain=self.explain
)
# queries need to be cut at some point to make sure we're not
# pushing the elasticsearch cluster to the brink
query = self.query[:self.max_query_length]
if query.startswith('#'):
search = self.hashtag_search(search, query)
else:
search = self.generic_search(search, query)
return search[self.offset:self.offset + self.batch_size].execute()
@cached_property
[docs]
def load_batch_results(self) -> list[Hit]:
"""Load search results and sort events by latest occurrence.
This methods is a wrapper around `batch.load()`, which returns the
actual search results form the query. """
def get_sort_key(event: Event) -> float:
if event.latest_occurrence:
return event.latest_occurrence.start.timestamp()
return float('-inf')
assert self.batch is not None
batch = self.batch.load()
events = []
non_events = []
for search_result in batch:
if isinstance(search_result, Event):
events.append(search_result)
else:
non_events.append(search_result)
if not events:
return batch
sorted_events = sorted(
events,
key=get_sort_key
)
return sorted_events + non_events
[docs]
def generic_search(
self,
search: ESSearch,
query: str
) -> ESSearch:
# make sure the title matches with a higher priority, otherwise the
# "get lucky" functionality is not so lucky after all
match_title = MatchPhrase(title={'query': query, 'boost': 3})
# we *could* use Match here and include '_all' fields, but that
# yields us less exact results, probably because '_all' includes some
# metadata fields we have no use for
match_rest = MultiMatch(query=query, fields=[
field for field in self.request.app.es_mappings.registered_fields
if not field.startswith('es_')
], fuzziness='1', prefix_length=3)
search = search.query(match_title | match_rest)
# favour documents with recent changes, over documents without
search.query = FunctionScore(query=search.query, functions=[
SF('gauss', es_last_change={
'offset': '7d',
'scale': '90d',
'decay': '0.99'
})
])
return search
[docs]
def hashtag_search(self, search: ESSearch, query: str) -> ESSearch:
return search.query(Match(es_tags=query.lstrip('#')))
[docs]
def feeling_lucky(self) -> str | None:
if self.batch:
first_entry = self.batch[0].load()
# XXX the default view to the event should be doing the redirect
if first_entry.__tablename__ == 'events':
return self.request.link(first_entry, 'latest')
else:
return self.request.link(first_entry)
return None
@cached_property
[docs]
def subset_count(self) -> int:
return self.cached_subset and self.cached_subset.hits.total.value or 0
[docs]
def suggestions(self) -> tuple[str, ...]:
return tuple(self.request.app.es_suggestions_by_request(
self.request, self.query
))