| | """Base class and utility functions for all event stream data serializers. |
| | |
| | A serializer should extend the 'EventDataLoader' class to (down)load event |
| | stream data. |
| | """ |
| |
|
| | import base64 |
| | import json |
| | import warnings |
| | from abc import ABC, abstractmethod |
| | from typing import Any, Union |
| | from urllib import request |
| |
|
| | from pandera.typing import DataFrame |
| |
|
| | JSONType = Union[str, int, float, bool, None, dict[str, Any], list[Any]] |
| |
|
| |
|
| | class ParseError(Exception): |
| | """Exception raised when a file is not correctly formatted.""" |
| |
|
| |
|
| | class MissingDataError(Exception): |
| | """Exception raised when a field is missing in the input data.""" |
| |
|
| |
|
| | class NoAuthWarning(UserWarning): |
| | """Warning raised when no user credentials are provided.""" |
| |
|
| |
|
| | def _remoteloadjson(path: str) -> JSONType: |
| | """Load JSON data from a URL. |
| | |
| | Parameters |
| | ---------- |
| | path : str |
| | URL of the data source. |
| | |
| | Returns |
| | ------- |
| | JSONType |
| | A dictionary with the loaded JSON data. |
| | """ |
| | return json.loads(request.urlopen(path).read()) |
| |
|
| |
|
| | def _auth_remoteloadjson(user: str, passwd: str) -> None: |
| | """Add a Authorization header to all requests. |
| | |
| | Parameters |
| | ---------- |
| | user : str |
| | Username. |
| | passwd : str |
| | Password. |
| | """ |
| | auth = base64.b64encode(f"{user}:{passwd}".encode()) |
| | opener = request.build_opener() |
| | opener.addheaders = [("Authorization", f"Basic {auth.decode()}")] |
| | request.install_opener(opener) |
| |
|
| |
|
| | def _localloadjson(path: str) -> JSONType: |
| | """Load a dictionary from a JSON's filepath. |
| | |
| | Parameters |
| | ---------- |
| | path : str |
| | JSON's filepath. |
| | |
| | Returns |
| | ------- |
| | JSONType |
| | A dictionary with the data loaded. |
| | """ |
| | with open(path, encoding="utf-8") as fh: |
| | return json.load(fh) |
| |
|
| |
|
| | def _has_auth(creds: dict[str, str]) -> bool: |
| | """Check if user credentials are provided. |
| | |
| | Parameters |
| | ---------- |
| | creds : dict |
| | A dictionary with user credentials. It should contain "user" and |
| | "passwd" keys. |
| | |
| | Returns |
| | ------- |
| | bool |
| | True if user credentials are provided, False otherwise. |
| | """ |
| | if creds.get("user") in [None, ""] or creds.get("passwd") in [None, ""]: |
| | warnings.warn("Credentials were not supplied. Public data access only.", NoAuthWarning) |
| | return False |
| | return True |
| |
|
| |
|
| | def _expand_minute(minute: int, periods_duration: list[int]) -> int: |
| | """Expand a timestamp with injury time of previous periods. |
| | |
| | Parameters |
| | ---------- |
| | minute : int |
| | Timestamp in minutes. |
| | periods_duration : List[int] |
| | Total duration of each period in minutes. |
| | |
| | Returns |
| | ------- |
| | int |
| | Timestamp expanded with injury time. |
| | """ |
| | expanded_minute = minute |
| | periods_regular = [45, 45, 15, 15, 0] |
| | for period in range(len(periods_duration) - 1): |
| | if minute > sum(periods_regular[: period + 1]): |
| | expanded_minute += periods_duration[period] - periods_regular[period] |
| | else: |
| | break |
| | return expanded_minute |
| |
|
| |
|
| | class EventDataLoader(ABC): |
| | """Load event data either from a remote location or from a local folder. |
| | |
| | Parameters |
| | ---------- |
| | root : str |
| | Root-path of the data. |
| | getter : str |
| | "remote" or "local" |
| | """ |
| |
|
| | @abstractmethod |
| | def competitions(self) -> DataFrame[Any]: |
| | """Return a dataframe with all available competitions and seasons. |
| | |
| | Returns |
| | ------- |
| | pd.DataFrame |
| | A dataframe containing all available competitions and seasons. See |
| | :class:`~socceraction.spadl.base.CompetitionSchema` for the schema. |
| | """ |
| |
|
| | @abstractmethod |
| | def games(self, competition_id: int, season_id: int) -> DataFrame[Any]: |
| | """Return a dataframe with all available games in a season. |
| | |
| | Parameters |
| | ---------- |
| | competition_id : int |
| | The ID of the competition. |
| | season_id : int |
| | The ID of the season. |
| | |
| | Returns |
| | ------- |
| | pd.DataFrame |
| | A dataframe containing all available games. See |
| | :class:`~socceraction.spadl.base.GameSchema` for the schema. |
| | """ |
| |
|
| | @abstractmethod |
| | def teams(self, game_id: int) -> DataFrame[Any]: |
| | """Return a dataframe with both teams that participated in a game. |
| | |
| | Parameters |
| | ---------- |
| | game_id : int |
| | The ID of the game. |
| | |
| | Returns |
| | ------- |
| | pd.DataFrame |
| | A dataframe containing both teams. See |
| | :class:`~socceraction.spadl.base.TeamSchema` for the schema. |
| | """ |
| |
|
| | @abstractmethod |
| | def players(self, game_id: int) -> DataFrame[Any]: |
| | """Return a dataframe with all players that participated in a game. |
| | |
| | Parameters |
| | ---------- |
| | game_id : int |
| | The ID of the game. |
| | |
| | Returns |
| | ------- |
| | pd.DataFrame |
| | A dataframe containing all players. See |
| | :class:`~socceraction.spadl.base.PlayerSchema` for the schema. |
| | """ |
| |
|
| | @abstractmethod |
| | def events(self, game_id: int) -> DataFrame[Any]: |
| | """Return a dataframe with the event stream of a game. |
| | |
| | Parameters |
| | ---------- |
| | game_id : int |
| | The ID of the game. |
| | |
| | Returns |
| | ------- |
| | pd.DataFrame |
| | A dataframe containing the event stream. See |
| | :class:`~socceraction.spadl.base.EventSchema` for the schema. |
| | """ |
| |
|