Module redvox.common.data_window_configuration

This module provides type-safe data window configuration using an external file

Expand source code
"""
This module provides type-safe data window configuration using an external file
"""

from dataclasses import dataclass
from dataclasses_json import dataclass_json
from typing import Optional, List, MutableMapping

import pprint
import toml
import numpy as np

import redvox.common.date_time_utils as dtu
from redvox.common.gap_and_pad_utils import DataPointCreationMode


# defaults for configuration
DEFAULT_DROP_TIME_S: float = 0.2  # seconds between packets to be considered a gap
DEFAULT_START_PADDING_S: float = 120.0  # time to add before start time when searching
DEFAULT_END_PADDING_S: float = 120.0  # time to add after end time when searching


@dataclass_json
@dataclass
class DataWindowConfigFile:
    """
    Properties:
        event_name: str, name of the data window.  Default "dw"

        origin_provider: string, source of the location data (i.e. GPS or NETWORK), default UNKNOWN

        origin_latitude: float, best estimate of latitude in degrees, default np.nan

        origin_latitude_std: float, standard deviation of best estimate of latitude, default np.nan

        origin_longitude: float, best estimate of longitude in degrees, default np.nan

        origin_longitude_std: float, standard deviation of best estimate of longitude, default np.nan

        origin_altitude: float, best estimate of altitude in meters, default np.nan

        origin_altitude_std: float, standard deviation of best estimate of altitude, default np.nan

        origin_event_radius_m: float, radius of event in meters, default 0.0

        output_dir: str, directory to output the data to.  Default "." (current directory)

        output_type: str, type of file to output the data as.  Options are: "NONE", "PARQUET", "LZ4"
        Default "NONE" (no saving).

        make_runme: bool, if True, save a runme.py example file along with the data.  Default False

        input_directory: str, directory that contains the files to read data from.  REQUIRED

        structured_layout: bool, if True, the input_directory contains specially named and organized
        directories of data.  Default True

        station_ids: optional list of strings, list of station ids to filter on.
        If empty or None, get any ids found in the input directory.  Default None

        extensions: optional list of strings, representing file extensions to filter on.
        If None, gets as much data as it can in the input directory.  Default None

        api_versions: optional list of ApiVersions, representing api versions to filter on.
        If None, get as much data as it can in the input directory.  Default None

        start_year: optional int representing the year of the data window start time.  Default None

        start_month: optional int representing the month of the data window start time.  Default None

        start_day: optional int representing the day of the data window start time.  Default None

        start_hour: optional int representing the hour of the data window start time.  Default None

        start_minute: optional int representing the minute of the data window start time.  Default None

        start_second: optional int representing the second of the data window start time.  Default None

        end_year: optional int representing the year of the data window end time.  Default None

        end_month: optional int representing the month of the data window end time.  Default None

        end_day: optional int representing the day of the data window end time.  Default None

        end_hour: optional int representing the hour of the data window end time.  Default None

        end_minute: optional int representing the minute of the data window end time.  Default None

        end_second: optional int representing the second of the data window end time.  Default None

        start_padding_seconds: float representing the amount of seconds to include before the start datetime
        when filtering data.  Default DEFAULT_START_PADDING_S (120 seconds)

        end_padding_seconds: float representing the amount of seconds to include after the end datetime
        when filtering data.  Default DEFAULT_END_PADDING_S (120 seconds)

        drop_time_seconds: float representing the minimum amount of seconds between data packets that would indicate
        a gap.  Default DEFAULT_DROP_TIME_S (0.2 seconds)

        apply_correction: bool, if True, update the timestamps in the data based on best station offset.  Default True

        edge_points_mode: str, one of "NAN", "COPY", or "INTERPOLATE".  Determines behavior when creating points on
        the edge of the data window.  default "COPY"

        use_model_correction: bool, if True, use the offset model's correction functions, otherwise use the best
        offset.  Default True

        debug: bool, if True, output additional information when processing data window.  Default False
    """

    input_directory: str
    event_name: str = "dw"
    origin_provider: str = "UNKNOWN"
    origin_latitude: float = np.nan
    origin_latitude_std: float = np.nan
    origin_longitude: float = np.nan
    origin_longitude_std: float = np.nan
    origin_altitude: float = np.nan
    origin_altitude_std: float = np.nan
    origin_event_radius_m: float = 0.0
    output_dir: str = "."
    output_type: str = "NONE"
    make_runme: bool = False
    structured_layout: bool = True
    station_ids: Optional[List[str]] = None
    extensions: Optional[List[str]] = None
    api_versions: Optional[List[str]] = None
    start_year: Optional[int] = None
    start_month: Optional[int] = None
    start_day: Optional[int] = None
    start_hour: Optional[int] = None
    start_minute: Optional[int] = None
    start_second: Optional[int] = None
    end_year: Optional[int] = None
    end_month: Optional[int] = None
    end_day: Optional[int] = None
    end_hour: Optional[int] = None
    end_minute: Optional[int] = None
    end_second: Optional[int] = None
    start_padding_seconds: float = DEFAULT_START_PADDING_S
    end_padding_seconds: float = DEFAULT_END_PADDING_S
    drop_time_seconds: float = DEFAULT_DROP_TIME_S
    apply_correction: bool = True
    use_model_correction: bool = True
    edge_points_mode: str = "COPY"
    debug: bool = False

    @staticmethod
    def from_path(config_path: str) -> "DataWindowConfigFile":
        try:
            with open(config_path, "r") as config_in:
                config_dict: MutableMapping = toml.load(config_in)
                # noinspection Mypy
                return DataWindowConfigFile.from_dict(config_dict)
        except Exception as e:
            print(f"Error loading configuration at: {config_path}")
            raise e

    def pretty(self) -> str:
        # noinspection Mypy
        return pprint.pformat(self.to_dict())

    def start_dt(self) -> Optional[dtu.datetime]:
        if self.start_year is not None:
            return dtu.datetime(
                self.start_year, self.start_month, self.start_day, self.start_hour, self.start_minute, self.start_second
            )
        return None

    def set_start_dt(self, start_dt: dtu.datetime):
        self.start_year = start_dt.year
        self.start_month = start_dt.month
        self.start_day = start_dt.day
        self.start_hour = start_dt.hour
        self.start_minute = start_dt.minute
        self.start_second = start_dt.second

    def end_dt(self) -> Optional[dtu.datetime]:
        if self.end_year is not None:
            return dtu.datetime(
                self.end_year, self.end_month, self.end_day, self.end_hour, self.end_minute, self.end_second
            )
        return None

    def set_end_dt(self, end_dt: dtu.datetime):
        self.end_year = end_dt.year
        self.end_month = end_dt.month
        self.end_day = end_dt.day
        self.end_hour = end_dt.hour
        self.end_minute = end_dt.minute
        self.end_second = end_dt.second

    def start_buffer_td(self) -> dtu.timedelta:
        return dtu.timedelta(seconds=self.start_padding_seconds)

    def end_buffer_td(self) -> dtu.timedelta:
        return dtu.timedelta(seconds=self.end_padding_seconds)

    def copy_edge_points(self) -> DataPointCreationMode:
        return DataPointCreationMode[self.edge_points_mode]

Classes

class DataWindowConfigFile (input_directory: str, event_name: str = 'dw', origin_provider: str = 'UNKNOWN', origin_latitude: float = nan, origin_latitude_std: float = nan, origin_longitude: float = nan, origin_longitude_std: float = nan, origin_altitude: float = nan, origin_altitude_std: float = nan, origin_event_radius_m: float = 0.0, output_dir: str = '.', output_type: str = 'NONE', make_runme: bool = False, structured_layout: bool = True, station_ids: Optional[List[str]] = None, extensions: Optional[List[str]] = None, api_versions: Optional[List[str]] = None, start_year: Optional[int] = None, start_month: Optional[int] = None, start_day: Optional[int] = None, start_hour: Optional[int] = None, start_minute: Optional[int] = None, start_second: Optional[int] = None, end_year: Optional[int] = None, end_month: Optional[int] = None, end_day: Optional[int] = None, end_hour: Optional[int] = None, end_minute: Optional[int] = None, end_second: Optional[int] = None, start_padding_seconds: float = 120.0, end_padding_seconds: float = 120.0, drop_time_seconds: float = 0.2, apply_correction: bool = True, use_model_correction: bool = True, edge_points_mode: str = 'COPY', debug: bool = False)

Properties

event_name: str, name of the data window. Default "dw"

origin_provider: string, source of the location data (i.e. GPS or NETWORK), default UNKNOWN

origin_latitude: float, best estimate of latitude in degrees, default np.nan

origin_latitude_std: float, standard deviation of best estimate of latitude, default np.nan

origin_longitude: float, best estimate of longitude in degrees, default np.nan

origin_longitude_std: float, standard deviation of best estimate of longitude, default np.nan

origin_altitude: float, best estimate of altitude in meters, default np.nan

origin_altitude_std: float, standard deviation of best estimate of altitude, default np.nan

origin_event_radius_m: float, radius of event in meters, default 0.0

output_dir: str, directory to output the data to. Default "." (current directory)

output_type: str, type of file to output the data as. Options are: "NONE", "PARQUET", "LZ4" Default "NONE" (no saving).

make_runme: bool, if True, save a runme.py example file along with the data. Default False

input_directory: str, directory that contains the files to read data from. REQUIRED

structured_layout: bool, if True, the input_directory contains specially named and organized directories of data. Default True

station_ids: optional list of strings, list of station ids to filter on. If empty or None, get any ids found in the input directory. Default None

extensions: optional list of strings, representing file extensions to filter on. If None, gets as much data as it can in the input directory. Default None

api_versions: optional list of ApiVersions, representing api versions to filter on. If None, get as much data as it can in the input directory. Default None

start_year: optional int representing the year of the data window start time. Default None

start_month: optional int representing the month of the data window start time. Default None

start_day: optional int representing the day of the data window start time. Default None

start_hour: optional int representing the hour of the data window start time. Default None

start_minute: optional int representing the minute of the data window start time. Default None

start_second: optional int representing the second of the data window start time. Default None

end_year: optional int representing the year of the data window end time. Default None

end_month: optional int representing the month of the data window end time. Default None

end_day: optional int representing the day of the data window end time. Default None

end_hour: optional int representing the hour of the data window end time. Default None

end_minute: optional int representing the minute of the data window end time. Default None

end_second: optional int representing the second of the data window end time. Default None

start_padding_seconds: float representing the amount of seconds to include before the start datetime when filtering data. Default DEFAULT_START_PADDING_S (120 seconds)

end_padding_seconds: float representing the amount of seconds to include after the end datetime when filtering data. Default DEFAULT_END_PADDING_S (120 seconds)

drop_time_seconds: float representing the minimum amount of seconds between data packets that would indicate a gap. Default DEFAULT_DROP_TIME_S (0.2 seconds)

apply_correction: bool, if True, update the timestamps in the data based on best station offset. Default True

edge_points_mode: str, one of "NAN", "COPY", or "INTERPOLATE". Determines behavior when creating points on the edge of the data window. default "COPY"

use_model_correction: bool, if True, use the offset model's correction functions, otherwise use the best offset. Default True

debug: bool, if True, output additional information when processing data window. Default False

Expand source code
@dataclass_json
@dataclass
class DataWindowConfigFile:
    """
    Properties:
        event_name: str, name of the data window.  Default "dw"

        origin_provider: string, source of the location data (i.e. GPS or NETWORK), default UNKNOWN

        origin_latitude: float, best estimate of latitude in degrees, default np.nan

        origin_latitude_std: float, standard deviation of best estimate of latitude, default np.nan

        origin_longitude: float, best estimate of longitude in degrees, default np.nan

        origin_longitude_std: float, standard deviation of best estimate of longitude, default np.nan

        origin_altitude: float, best estimate of altitude in meters, default np.nan

        origin_altitude_std: float, standard deviation of best estimate of altitude, default np.nan

        origin_event_radius_m: float, radius of event in meters, default 0.0

        output_dir: str, directory to output the data to.  Default "." (current directory)

        output_type: str, type of file to output the data as.  Options are: "NONE", "PARQUET", "LZ4"
        Default "NONE" (no saving).

        make_runme: bool, if True, save a runme.py example file along with the data.  Default False

        input_directory: str, directory that contains the files to read data from.  REQUIRED

        structured_layout: bool, if True, the input_directory contains specially named and organized
        directories of data.  Default True

        station_ids: optional list of strings, list of station ids to filter on.
        If empty or None, get any ids found in the input directory.  Default None

        extensions: optional list of strings, representing file extensions to filter on.
        If None, gets as much data as it can in the input directory.  Default None

        api_versions: optional list of ApiVersions, representing api versions to filter on.
        If None, get as much data as it can in the input directory.  Default None

        start_year: optional int representing the year of the data window start time.  Default None

        start_month: optional int representing the month of the data window start time.  Default None

        start_day: optional int representing the day of the data window start time.  Default None

        start_hour: optional int representing the hour of the data window start time.  Default None

        start_minute: optional int representing the minute of the data window start time.  Default None

        start_second: optional int representing the second of the data window start time.  Default None

        end_year: optional int representing the year of the data window end time.  Default None

        end_month: optional int representing the month of the data window end time.  Default None

        end_day: optional int representing the day of the data window end time.  Default None

        end_hour: optional int representing the hour of the data window end time.  Default None

        end_minute: optional int representing the minute of the data window end time.  Default None

        end_second: optional int representing the second of the data window end time.  Default None

        start_padding_seconds: float representing the amount of seconds to include before the start datetime
        when filtering data.  Default DEFAULT_START_PADDING_S (120 seconds)

        end_padding_seconds: float representing the amount of seconds to include after the end datetime
        when filtering data.  Default DEFAULT_END_PADDING_S (120 seconds)

        drop_time_seconds: float representing the minimum amount of seconds between data packets that would indicate
        a gap.  Default DEFAULT_DROP_TIME_S (0.2 seconds)

        apply_correction: bool, if True, update the timestamps in the data based on best station offset.  Default True

        edge_points_mode: str, one of "NAN", "COPY", or "INTERPOLATE".  Determines behavior when creating points on
        the edge of the data window.  default "COPY"

        use_model_correction: bool, if True, use the offset model's correction functions, otherwise use the best
        offset.  Default True

        debug: bool, if True, output additional information when processing data window.  Default False
    """

    input_directory: str
    event_name: str = "dw"
    origin_provider: str = "UNKNOWN"
    origin_latitude: float = np.nan
    origin_latitude_std: float = np.nan
    origin_longitude: float = np.nan
    origin_longitude_std: float = np.nan
    origin_altitude: float = np.nan
    origin_altitude_std: float = np.nan
    origin_event_radius_m: float = 0.0
    output_dir: str = "."
    output_type: str = "NONE"
    make_runme: bool = False
    structured_layout: bool = True
    station_ids: Optional[List[str]] = None
    extensions: Optional[List[str]] = None
    api_versions: Optional[List[str]] = None
    start_year: Optional[int] = None
    start_month: Optional[int] = None
    start_day: Optional[int] = None
    start_hour: Optional[int] = None
    start_minute: Optional[int] = None
    start_second: Optional[int] = None
    end_year: Optional[int] = None
    end_month: Optional[int] = None
    end_day: Optional[int] = None
    end_hour: Optional[int] = None
    end_minute: Optional[int] = None
    end_second: Optional[int] = None
    start_padding_seconds: float = DEFAULT_START_PADDING_S
    end_padding_seconds: float = DEFAULT_END_PADDING_S
    drop_time_seconds: float = DEFAULT_DROP_TIME_S
    apply_correction: bool = True
    use_model_correction: bool = True
    edge_points_mode: str = "COPY"
    debug: bool = False

    @staticmethod
    def from_path(config_path: str) -> "DataWindowConfigFile":
        try:
            with open(config_path, "r") as config_in:
                config_dict: MutableMapping = toml.load(config_in)
                # noinspection Mypy
                return DataWindowConfigFile.from_dict(config_dict)
        except Exception as e:
            print(f"Error loading configuration at: {config_path}")
            raise e

    def pretty(self) -> str:
        # noinspection Mypy
        return pprint.pformat(self.to_dict())

    def start_dt(self) -> Optional[dtu.datetime]:
        if self.start_year is not None:
            return dtu.datetime(
                self.start_year, self.start_month, self.start_day, self.start_hour, self.start_minute, self.start_second
            )
        return None

    def set_start_dt(self, start_dt: dtu.datetime):
        self.start_year = start_dt.year
        self.start_month = start_dt.month
        self.start_day = start_dt.day
        self.start_hour = start_dt.hour
        self.start_minute = start_dt.minute
        self.start_second = start_dt.second

    def end_dt(self) -> Optional[dtu.datetime]:
        if self.end_year is not None:
            return dtu.datetime(
                self.end_year, self.end_month, self.end_day, self.end_hour, self.end_minute, self.end_second
            )
        return None

    def set_end_dt(self, end_dt: dtu.datetime):
        self.end_year = end_dt.year
        self.end_month = end_dt.month
        self.end_day = end_dt.day
        self.end_hour = end_dt.hour
        self.end_minute = end_dt.minute
        self.end_second = end_dt.second

    def start_buffer_td(self) -> dtu.timedelta:
        return dtu.timedelta(seconds=self.start_padding_seconds)

    def end_buffer_td(self) -> dtu.timedelta:
        return dtu.timedelta(seconds=self.end_padding_seconds)

    def copy_edge_points(self) -> DataPointCreationMode:
        return DataPointCreationMode[self.edge_points_mode]

Class variables

var api_versions : Optional[List[str]]
var apply_correction : bool
var debug : bool
var drop_time_seconds : float
var edge_points_mode : str
var end_day : Optional[int]
var end_hour : Optional[int]
var end_minute : Optional[int]
var end_month : Optional[int]
var end_padding_seconds : float
var end_second : Optional[int]
var end_year : Optional[int]
var event_name : str
var extensions : Optional[List[str]]
var input_directory : str
var make_runme : bool
var origin_altitude : float
var origin_altitude_std : float
var origin_event_radius_m : float
var origin_latitude : float
var origin_latitude_std : float
var origin_longitude : float
var origin_longitude_std : float
var origin_provider : str
var output_dir : str
var output_type : str
var start_day : Optional[int]
var start_hour : Optional[int]
var start_minute : Optional[int]
var start_month : Optional[int]
var start_padding_seconds : float
var start_second : Optional[int]
var start_year : Optional[int]
var station_ids : Optional[List[str]]
var structured_layout : bool
var use_model_correction : bool

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
Expand source code
@classmethod
def from_dict(cls: Type[A],
              kvs: Json,
              *,
              infer_missing=False) -> A:
    return _decode_dataclass(cls, kvs, infer_missing)
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
Expand source code
@classmethod
def from_json(cls: Type[A],
              s: JsonData,
              *,
              parse_float=None,
              parse_int=None,
              parse_constant=None,
              infer_missing=False,
              **kw) -> A:
    kvs = json.loads(s,
                     parse_float=parse_float,
                     parse_int=parse_int,
                     parse_constant=parse_constant,
                     **kw)
    return cls.from_dict(kvs, infer_missing=infer_missing)
def from_path(config_path: str) ‑> DataWindowConfigFile
Expand source code
@staticmethod
def from_path(config_path: str) -> "DataWindowConfigFile":
    try:
        with open(config_path, "r") as config_in:
            config_dict: MutableMapping = toml.load(config_in)
            # noinspection Mypy
            return DataWindowConfigFile.from_dict(config_dict)
    except Exception as e:
        print(f"Error loading configuration at: {config_path}")
        raise e
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]
Expand source code
@classmethod
def schema(cls: Type[A],
           *,
           infer_missing: bool = False,
           only=None,
           exclude=(),
           many: bool = False,
           context=None,
           load_only=(),
           dump_only=(),
           partial: bool = False,
           unknown=None) -> SchemaType:
    Schema = build_schema(cls, DataClassJsonMixin, infer_missing, partial)

    if unknown is None:
        undefined_parameter_action = _undefined_parameter_action_safe(cls)
        if undefined_parameter_action is not None:
            # We can just make use of the same-named mm keywords
            unknown = undefined_parameter_action.name.lower()

    return Schema(only=only,
                  exclude=exclude,
                  many=many,
                  context=context,
                  load_only=load_only,
                  dump_only=dump_only,
                  partial=partial,
                  unknown=unknown)

Methods

def copy_edge_points(self) ‑> DataPointCreationMode
Expand source code
def copy_edge_points(self) -> DataPointCreationMode:
    return DataPointCreationMode[self.edge_points_mode]
def end_buffer_td(self) ‑> datetime.timedelta
Expand source code
def end_buffer_td(self) -> dtu.timedelta:
    return dtu.timedelta(seconds=self.end_padding_seconds)
def end_dt(self) ‑> Optional[datetime.datetime]
Expand source code
def end_dt(self) -> Optional[dtu.datetime]:
    if self.end_year is not None:
        return dtu.datetime(
            self.end_year, self.end_month, self.end_day, self.end_hour, self.end_minute, self.end_second
        )
    return None
def pretty(self) ‑> str
Expand source code
def pretty(self) -> str:
    # noinspection Mypy
    return pprint.pformat(self.to_dict())
def set_end_dt(self, end_dt: datetime.datetime)
Expand source code
def set_end_dt(self, end_dt: dtu.datetime):
    self.end_year = end_dt.year
    self.end_month = end_dt.month
    self.end_day = end_dt.day
    self.end_hour = end_dt.hour
    self.end_minute = end_dt.minute
    self.end_second = end_dt.second
def set_start_dt(self, start_dt: datetime.datetime)
Expand source code
def set_start_dt(self, start_dt: dtu.datetime):
    self.start_year = start_dt.year
    self.start_month = start_dt.month
    self.start_day = start_dt.day
    self.start_hour = start_dt.hour
    self.start_minute = start_dt.minute
    self.start_second = start_dt.second
def start_buffer_td(self) ‑> datetime.timedelta
Expand source code
def start_buffer_td(self) -> dtu.timedelta:
    return dtu.timedelta(seconds=self.start_padding_seconds)
def start_dt(self) ‑> Optional[datetime.datetime]
Expand source code
def start_dt(self) -> Optional[dtu.datetime]:
    if self.start_year is not None:
        return dtu.datetime(
            self.start_year, self.start_month, self.start_day, self.start_hour, self.start_minute, self.start_second
        )
    return None
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
Expand source code
def to_dict(self, encode_json=False) -> Dict[str, Json]:
    return _asdict(self, encode_json=encode_json)
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) ‑> str
Expand source code
def to_json(self,
            *,
            skipkeys: bool = False,
            ensure_ascii: bool = True,
            check_circular: bool = True,
            allow_nan: bool = True,
            indent: Optional[Union[int, str]] = None,
            separators: Tuple[str, str] = None,
            default: Callable = None,
            sort_keys: bool = False,
            **kw) -> str:
    return json.dumps(self.to_dict(encode_json=False),
                      cls=_ExtendedEncoder,
                      skipkeys=skipkeys,
                      ensure_ascii=ensure_ascii,
                      check_circular=check_circular,
                      allow_nan=allow_nan,
                      indent=indent,
                      separators=separators,
                      default=default,
                      sort_keys=sort_keys,
                      **kw)