Source code for winterthur.roadwork

from __future__ import annotations

import isodate
import pycurl
import sedate

from datetime import datetime, timedelta
from dogpile.cache.api import NO_VALUE
from functools import cached_property
from io import BytesIO
from onegov.core.custom import json
from operator import attrgetter
from pathlib import Path
from purl import URL
from sedate import utcnow


from typing import Any, TYPE_CHECKING
if TYPE_CHECKING:
    from collections.abc import Callable, Iterator
    from onegov.core.cache import RedisCacheRegion
    from typing import Self



[docs]
class RoadworkError(Exception):
    pass




[docs]
class RoadworkConnectionError(RoadworkError):
    pass




[docs]
class RoadworkConfig:
    """ Looks at ~/.pdb.secret and /etc/pdb.secret (in this order), to extract
    the configuration used for the RoadworkClient class.

    The configuration is as follows::

        HOSTNAME: pdb.example.org
        ENDPOINT: 127.0.0.1:6004
        USERNAME: username
        PASSWORD: password

    * The HOSTNAME is the address of the PDB service.
    * The ENDPOINT is the optional address of the tcp-proxy used.
    * The USERNAME is the NTLM password.
    * The PASSWORD is the NTLM password.

    """

    def __init__(
        self,
        hostname: str | None,
        endpoint: str | None,
        username: str | None,
        password: str | None
    ) -> None:

[docs]
        self.hostname = hostname


[docs]
        self.endpoint = endpoint


[docs]
        self.username = username


[docs]
        self.password = password


    @classmethod

[docs]
    def lookup_paths(cls) -> Iterator[Path]:
        yield Path('~/.pdb.secret').expanduser()
        yield Path('/etc/pdb.secret')


    @classmethod

[docs]
    def lookup(cls) -> Self:
        for path in cls.lookup_paths():
            if path.exists():
                return cls(**cls.parse(path))

        paths = ', '.join(str(p) for p in cls.lookup_paths())
        raise RoadworkError(
            f'No pdb configuration found in {paths}')


    @classmethod

[docs]
    def parse(cls, path: Path) -> dict[str, str | None]:
        result: dict[str, str | None] = {
            'hostname': None,
            'endpoint': None,
            'username': None,
            'password': None,
        }

        with path.open('r') as file:
            for line in file:
                line = line.strip()

                if not line:
                    continue

                if ':' not in line:
                    continue

                if line.startswith('#'):
                    continue

                k, v = line.split(':', maxsplit=1)
                k = k.strip().lower()
                v = v.strip()

                if k in result:
                    result[k] = v

        return result





[docs]
class RoadworkClient:
    """ A proxy to Winterthur's internal roadworks service. Uses redis as
    a caching mechanism to ensure performance and reliability.

    Since the roadworks service can only be reached inside Winterthur's
    network, we rely on a proxy connection during development/testing.

    To not expose any information unwittingly to the public, the description
    of how to connect to that proxy is kept at docs.seantis.ch.

    """

    def __init__(
        self,
        cache: RedisCacheRegion,
        hostname: str,
        username: str,
        password: str,
        endpoint: str | None = None
    ) -> None:

[docs]
        self.cache = cache


[docs]
        self.hostname = hostname


[docs]
        self.username = username


[docs]
        self.password = password


[docs]
        self.endpoint = endpoint or hostname


    @cached_property

[docs]
    def curl(self) -> pycurl.Curl:
        curl = pycurl.Curl()
        curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_NTLM)
        curl.setopt(pycurl.USERPWD, f'{self.username}:{self.password}')
        curl.setopt(pycurl.HTTPHEADER, [f'HOST: {self.hostname}'])
        curl.setopt(pycurl.VERBOSE, True)
        # This is is not really a good idea as it disables TLS certificate
        # validation!
        curl.setopt(pycurl.SSL_VERIFYPEER, 0)
        curl.setopt(pycurl.SSL_VERIFYHOST, 0)

        return curl



[docs]
    def url(self, path: str) -> str:
        return f'https://{self.endpoint}/{path}'



[docs]
    def get(
        self,
        path: str,
        lifetime: float = 5 * 60,
        downtime: float = 60 * 60
    ) -> Any:
        """ Requests the given path, returning the resulting json if
        successful.

        A cache is used in two stages:

        * At the lifetime stage, the cache is returned unconditionally.
        * At the end of the lifetime, the cache is refreshed if possible.
        * At the end of the downtime stage the cache forcefully refreshed.

        During its lifetime the object is basically up to 5 minutes out of
        date. But since the backend may not be available when that time
        expires we operate with a downtime that is higher (1 hour).

        This means that a downtime in the backend will not result in evicted
        caches, even if the lifetime is up. Once the downtime limit is up we
        do however evict the cache forcefully, raising an error if we cannot
        connect to the backend.

        """
        path = path.lstrip('/')
        cached = self.cache.get(path)

        def refresh() -> Any:
            try:
                status, body = self.get_uncached(path)
            except pycurl.error as exception:
                raise RoadworkConnectionError(
                    f'Could not connect to {self.hostname}'
                ) from exception

            if status == 200:
                self.cache.set(path, {
                    'created': utcnow(),
                    'status': status,
                    'body': body
                })

                return body

            raise RoadworkError(f'{path} returned {status}')

        # no cache yet, return result and cache it
        if cached is NO_VALUE:
            return refresh()

        now = utcnow()
        lifetime_horizon = cached['created'] + timedelta(seconds=lifetime)
        downtime_horizon = cached['created'] + timedelta(seconds=downtime)

        # within cache lifetime, return cached value
        if now <= lifetime_horizon:
            return cached['body']

        # outside cache lifetime, but still in downtime horizon, try to
        # refresh the value but ignore errors
        if lifetime_horizon < now < downtime_horizon:
            try:
                return refresh()
            except RoadworkConnectionError:
                return cached['body']

        # outside the downtime lifetime, force refresh and raise errors
        return refresh()



[docs]
    def get_uncached(self, path: str) -> tuple[int, Any]:
        body = BytesIO()

        self.curl.setopt(pycurl.URL, self.url(path))
        self.curl.setopt(pycurl.WRITEFUNCTION, body.write)
        self.curl.perform()

        status = self.curl.getinfo(pycurl.RESPONSE_CODE)
        body_str = body.getvalue().decode('utf-8')

        if status == 200:
            return status, json.loads(body_str)

        return status, body_str



[docs]
    def is_cacheable(self, response: tuple[int, Any]) -> bool:
        return response[0] == 200





[docs]
class RoadworkCollection:

    def __init__(
        self,
        client: RoadworkClient,
        letter: str | None = None,
        query: str | None = None
    ) -> None:

[docs]
        self.client = client


[docs]
        self.query = None


[docs]
        self.letter = None


        if query:
            self.query = query.lower()

        elif letter:
            self.letter = letter.lower()

    @property

[docs]
    def letters(self) -> list[str]:
        return sorted({
            letter
            for roadwork in self.by_letter(None).roadwork
            for letter in roadwork.letters
        })



[docs]
    def by_filter(self, filter: str) -> list[Roadwork]:

        # note: addGisLink doesn't work here
        url = (
            URL('odata/Baustellen')
            .query_param('addGisLink', 'False')
            .query_param('$filter', filter)
        )

        records = self.client.get(url.as_string()).get('value', ())
        return sorted((
            Roadwork(r) for r in records
            if r['Internet']
        ), key=attrgetter('title'))


    @property

[docs]
    def roadwork(self) -> list[Roadwork]:
        date = datetime.today()

        roadwork = self.by_filter(filter=' and '.join((
            f'DauerVon le {date.strftime("%Y-%m-%d")}',
            f'DauerBis ge {date.strftime("%Y-%m-%d")}',
        )))

        # The backend supports searches/filters, but the used dataset is
        # so small that it makes little sense to use that feature, since it
        # would lead to a lot more cache-misses on our end.
        #
        # Instead we simply loop through the results and filter them out.
        if self.query:
            roadwork = [
                r for r in roadwork if self.query in r.title.lower()
            ]

        elif self.letter:
            roadwork = [
                r for r in roadwork if self.letter in r.letters
            ]

        return roadwork



[docs]
    def by_id(self, id: int) -> Roadwork | None:
        url = (
            URL(f'odata/Baustellen({id})')
            .query_param('addGisLink', 'True'))

        work = tuple(
            Roadwork(r) for r in self.client.get(
                url.as_string()).get('value', ()))

        if work:
            return work[0]

        # secondary lookup is against the subsections.. this probably calls
        # for an index eventually
        for r in self.roadwork:
            for section in r.sections:
                if section.id == id:
                    return section
        return None



[docs]
    def by_letter(self, letter: str | None) -> Self:
        return self.__class__(self.client, letter=letter, query=None)





[docs]
class Roadwork:


[docs]
    convertors: dict[str, Callable[[str | None], Any]]


    def __init__(self, data: dict[str, Any]) -> None:

[docs]
        self.data = data


        self.convertors = {
            'DauerVon': lambda v: v and isodate.parse_datetime(v),
            'DauerBis': lambda v: v and isodate.parse_datetime(v),
        }

    @property

[docs]
    def id(self) -> int:
        return self['Id']


    @property

[docs]
    def letters(self) -> Iterator[str]:
        for key in ('ProjektBezeichnung', 'ProjektBereich'):
            if value := self[key]:
                letter = value[0].lower()
                if 97 <= ord(letter) <= 122:
                    yield letter


    @property

[docs]
    def title(self) -> str:
        parts = (self[key] for key in ('ProjektBezeichnung', 'ProjektBereich'))
        parts = (p.strip() for p in parts if p)
        parts = (p for p in parts)

        return ' '.join(parts)


    @property

[docs]
    def sections(self) -> list[Self]:
        now = sedate.utcnow()

        sections = (
            self.__class__({
                'Id': r['TeilbaustelleId'],
                'Teilbaustellen': [],
                **r
            }) for r in self['Teilbaustellen']
        )

        sections = (s for s in sections if s['DauerVon'])
        sections = (s for s in sections if s['DauerVon'] <= now)
        sections = (s for s in sections if now <= (s['DauerBis'] or now))

        return list(sections)



[docs]
    def __getitem__(self, key: str) -> Any:
        value = self.data[key]

        if key in self.convertors:
            return self.convertors[key](value)

        return value



[docs]
    def __contains__(self, key: str) -> bool:
        return key in self.data