Module `redvox.common.api_reader`

Read Redvox data from a single directory Data files can be either API 900 or API 1000 data formats

Expand source code

"""
Read Redvox data from a single directory
Data files can be either API 900 or API 1000 data formats
"""
from typing import List, Optional
from datetime import timedelta, datetime
import multiprocessing
import multiprocessing.pool

import pyarrow as pa
import psutil

import redvox.settings as settings
import redvox.api1000.proto.redvox_api_m_pb2 as api_m
import redvox.common.date_time_utils as dtu
from redvox.common import io, api_conversions as ac
from redvox.common.parallel_utils import maybe_parallel_map
from redvox.common.station import Station
from redvox.common.reader_session_model import ModelsContainer
from redvox.common.session_model import SessionModel
from redvox.common.errors import RedVoxExceptions
from redvox.cloud.client import cloud_client
from redvox.cloud.session_model_api import Session
from redvox.cloud.errors import CloudApiError


id_py_stct = pa.struct(
    [
        ("id", pa.string()),
        ("uuid", pa.string()),
        ("start_time", pa.float64()),
    ]
)
meta_py_stct = pa.struct(
    [
        ("api", pa.float64()),
        ("sub_api", pa.float64()),
        ("make", pa.string()),
        ("model", pa.string()),
        ("os", pa.int64()),
        ("os_version", pa.string()),
        ("app", pa.string()),
        ("app_version", pa.string()),
        ("is_private", pa.bool_()),
        ("packet_duration_s", pa.float64()),
        ("station_description", pa.string()),
    ]
)


PERCENT_FREE_MEM_USE = 0.8  # Percentage of total free memory to use when creating stations (1. is 100%)


class ApiReader:
    """
    Reads data from api 900 or api 1000 format, converting all data read into RedvoxPacketM for
    ease of comparison and use.

    Properties:
        filter: io.ReadFilter with the station ids, start and end time, start and end time padding, and
        types of files to read

        base_dir: str of the directory containing all the files to read

        structured_dir: bool, if True, the base_dir contains a specific directory structure used by the
        respective api formats.  If False, base_dir only has the data files.  Default False.

        files_index: io.Index of the files that match the filter that are in base_dir

        index_summary: io.IndexSummary of the filtered data

        session_models: ModelContainer for cloud and local session models.

        debug: bool, if True, output additional information during function execution.  Default False.
    """

    def __init__(
        self,
        base_dir: str,
        structured_dir: bool = False,
        read_filter: io.ReadFilter = None,
        debug: bool = False,
        pool: Optional[multiprocessing.pool.Pool] = None,
    ):
        """
        Initialize the ApiReader object

        :param base_dir: directory containing the files to read
        :param structured_dir: if True, base_dir contains a specific directory structure used by the respective
                                api formats.  If False, base_dir only has the data files.  Default False.
        :param read_filter: ReadFilter for the data files, if None, get everything.  Default None
        :param debug: if True, output program warnings/errors during function execution.  Default False.
        """
        _pool: multiprocessing.pool.Pool = multiprocessing.Pool() if pool is None else pool

        if read_filter:
            self.filter: io.ReadFilter = read_filter
            if self.filter.station_ids:
                self.filter.station_ids = set(self.filter.station_ids)
        else:
            self.filter = io.ReadFilter()
        self.base_dir: str = base_dir
        self.structured_dir: bool = structured_dir
        self.debug: bool = debug
        self.errors: RedVoxExceptions = RedVoxExceptions("APIReader")
        self.session_models: ModelsContainer = ModelsContainer()
        self.files_index: List[io.Index] = self._get_all_files(_pool)
        self.index_summary: io.IndexSummary = io.IndexSummary.from_index(self._flatten_files_index())
        if len(self.files_index) > 0:
            mem_split_factor = len(self.files_index) if settings.is_parallelism_enabled() else 1
            self.chunk_limit = psutil.virtual_memory().available * PERCENT_FREE_MEM_USE / mem_split_factor
            max_file_size = max([fe.decompressed_file_size_bytes for fi in self.files_index for fe in fi.entries])
            total_est_size = max_file_size * sum([len(fi.entries) for fi in self.files_index])
            if max_file_size > self.chunk_limit:
                raise MemoryError(
                    f"System requires {max_file_size} bytes of memory to process a file but only has "
                    f"{self.chunk_limit} available.  Please free or add more RAM."
                )
            elif total_est_size / mem_split_factor > self.chunk_limit:
                raise MemoryError(
                    f"{total_est_size} of data requested, but only {self.chunk_limit} available; "
                    f"please reduce the amount of data you are requesting."
                )
            if debug:
                if mem_split_factor == 1:
                    print(
                        f"{len(self.files_index)} stations have {int(self.chunk_limit)} "
                        f"bytes for loading files in memory."
                    )
                else:
                    print(
                        f"{mem_split_factor} stations each have "
                        f"{int(self.chunk_limit)} bytes for loading files in memory."
                    )
        else:
            self.chunk_limit = 0

        if debug:
            self.errors.print()

        if pool is None:
            _pool.close()

    def _flatten_files_index(self):
        """
        :return: flattened version of files_index
        """
        result = io.Index()
        for i in self.files_index:
            result.append(iter(i.entries))
        return result

    def _get_cloud_models(self, ids: List[str]):
        """
        saves the cloud models from the server that match the list of ids given to the ApiReader's session_models.
        :param ids: station ids to get models for
        """
        try:
            with cloud_client() as client:
                self.session_models.search_cloud_session(
                    id_uuids=ids,
                    owner=client.redvox_config.username,
                    start_ts=int(dtu.datetime_to_epoch_microseconds_utc(self.filter.start_dt))
                    if self.filter.start_dt
                    else None,
                    end_ts=int(dtu.datetime_to_epoch_microseconds_utc(self.filter.end_dt))
                    if self.filter.end_dt
                    else None,
                    include_public=True,
                )
                if self.session_models.cloud_models is None:
                    self.errors.append(f"Unable to find any cloud sessions for {ids}.  Using local files.")
        except CloudApiError as e:
            self.errors.append(f"Error while connecting to server.  Error message: {e}")
        except Exception as e:
            self.errors.append(f"An error occurred.  Error message: {e}")

    def _reset_index(self, model: Session) -> List[io.Index]:
        """
        reset the filter used to get files, then get the updated list of files

        :param model: model to use to reset filter
        :return: updated index of files
        """
        insufficient_str = ""
        # reset the filter used to get files
        new_filter = (
            io.ReadFilter()
            .with_extensions(self.filter.extensions)
            .with_api_versions(self.filter.api_versions)
            .with_station_ids({model.id})
            .with_start_dt_buf(dtu.timedelta(seconds=0))
            .with_end_dt_buf(dtu.timedelta(seconds=0))
        )
        # update the start and end times for the filter by the mean offset and the packet duration
        if self.filter.start_dt is not None:
            if timedelta(microseconds=abs(model.timing.mean_off)) > self.filter.start_dt_buf:
                insufficient_str += "start "
            new_filter.with_start_dt(
                self.filter.start_dt + timedelta(microseconds=(model.timing.mean_off - model.packet_dur))
            )
        if self.filter.end_dt is not None:
            if timedelta(microseconds=abs(model.timing.mean_off)) > self.filter.end_dt_buf:
                insufficient_str += "end"
            new_filter.with_end_dt(
                self.filter.end_dt + timedelta(microseconds=(model.timing.mean_off + model.packet_dur))
            )

        if len(insufficient_str) > 0:
            self.errors.append(f"Required more data for {model.id} at: {insufficient_str}")
        return [self._apply_filter(new_filter)]

    def _get_all_files(self, pool: Optional[multiprocessing.pool.Pool] = None) -> List[io.Index]:
        """
        get all files in the base dir of the ApiReader

        :return: index with all the files that match the filter
        """
        _pool: multiprocessing.pool.Pool = multiprocessing.Pool() if pool is None else pool
        index: List[io.Index] = []
        # this guarantees that all ids we search for are valid
        all_index = self._apply_filter(pool=_pool)
        all_index_ids = all_index.summarize().station_ids()
        # get models using the cloud to correct timing
        self._get_cloud_models(all_index_ids)
        resp_ids = self.session_models.list_ids()

        for station_id in all_index_ids:
            # if start and end are both not defined, just use what we got
            if self.filter.start_dt is None and self.filter.end_dt is None:
                checked_index = [all_index.get_index_for_station_id(station_id)]
            # if we need to update the start or end, use the first session model from cloud if it exists
            elif station_id in resp_ids:
                checked_index = self._reset_index(self.session_models.get_model_by_key(station_id))
            # if no models from cloud, use the data available to update start and end of index
            else:
                id_index = all_index.get_index_for_station_id(station_id)
                if len(id_index.entries) < 1:
                    checked_index = []
                else:
                    # attempt to make a session model using local data.  if failure, use what we got initially.
                    try:
                        stats = SessionModel().create_from_stream(self.read_files_in_index(id_index))
                        checked_index = self._reset_index(stats.cloud_session)
                        self.session_models.add_local_session(stats)
                    except (ValueError, Exception):
                        checked_index = [id_index]

            # add the updated list of files to the index
            index.extend(checked_index)

        if pool is None:
            _pool.close()

        return index

    def _apply_filter(
        self,
        reader_filter: Optional[io.ReadFilter] = None,
        pool: Optional[multiprocessing.pool.Pool] = None,
    ) -> io.Index:
        """
        apply the filter of the reader, or another filter if specified

        :param reader_filter: optional filter; if None, use the reader's filter, default None
        :return: index of the filtered files
        """
        _pool: multiprocessing.pool.Pool = multiprocessing.Pool() if pool is None else pool
        if not reader_filter:
            reader_filter = self.filter
        if self.structured_dir:
            index = io.index_structured(self.base_dir, reader_filter, pool=_pool)
        else:
            index = io.index_unstructured(self.base_dir, reader_filter, pool=_pool)
        if pool is None:
            _pool.close()
        return index

    def _redo_index(self, station_ids: set, new_start: datetime, new_end: datetime) -> Optional[io.Index]:
        """
        Redo the index for files using new start and end dates.  removes any buffer time at the start and end of the
        new query.  Returns the updated index or None

        :param station_ids: set of ids to get
        :param new_start: new start time to get data from
        :param new_end: new end time to get data from
        :return: Updated index or None
        """
        new_index = self._apply_filter(
            io.ReadFilter()
            .with_start_dt(new_start)
            .with_end_dt(new_end)
            .with_extensions(self.filter.extensions)
            .with_api_versions(self.filter.api_versions)
            .with_station_ids(station_ids)
            .with_start_dt_buf(timedelta(seconds=0))
            .with_end_dt_buf(timedelta(seconds=0))
        )
        if len(new_index.entries) > 0:
            return new_index
        return None

    def _split_workload(self, findex: io.Index) -> List[io.Index]:
        """
        takes an index and splits it into chunks based on a size limit
        while running_total + next_file_size < limit, adds files to a chunk (Index)
        if limit is exceeded, adds the chunk and puts the next file into a new chunk

        :param findex: index of files to split
        :return: list of Index to process
        """
        packet_list = []
        chunk_queue = 0
        chunk_list = []
        for f in findex.entries:
            chunk_queue += f.decompressed_file_size_bytes
            if chunk_queue > self.chunk_limit:
                packet_list.append(io.Index(chunk_list))
                chunk_queue = 0
                chunk_list = []
            chunk_list.append(f)
        packet_list.append(io.Index(chunk_list))
        return packet_list

    @staticmethod
    def read_files_in_index(indexf: io.Index) -> List[api_m.RedvoxPacketM]:
        """
        read all the files in the index

        :return: list of RedvoxPacketM, converted from API 900 if necessary
        """
        result: List[api_m.RedvoxPacketM] = []

        # Iterate over the API 900 packets in a memory efficient way
        # and convert to API 1000
        # noinspection PyTypeChecker
        for packet_900 in indexf.stream_raw(io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_900})):
            # noinspection Mypy
            result.append(ac.convert_api_900_to_1000_raw(packet_900))

        # Grab the API 1000 packets
        # noinspection PyTypeChecker
        for packet in indexf.stream_raw(io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_1000})):
            # noinspection Mypy
            result.append(packet)

        return result

    # noinspection PyTypeChecker
    def read_files_by_id(self, station_id: str) -> Optional[List[api_m.RedvoxPacketM]]:
        """
        :param station_id: the id to filter on
        :return: the list of packets with the requested id, or None if the id can't be found
        """

        result: List[api_m.RedvoxPacketM] = []

        # Iterate over the API 900 packets in a memory efficient way
        # and convert to API 1000
        for packet_900 in self._flatten_files_index().stream_raw(
            io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_900}).with_station_ids({station_id})
        ):
            # noinspection Mypy
            result.append(ac.convert_api_900_to_1000_raw(packet_900))

        # Grab the API 1000 packets
        for packet in self._flatten_files_index().stream_raw(
            io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_1000}).with_station_ids({station_id})
        ):
            # noinspection Mypy
            result.append(packet)

        if len(result) == 0:
            return None

        return result

    def _station_by_index(self, findex: io.Index) -> Station:
        """
        :param findex: index with files to build a station with
        :return: Station built from files in findex
        """
        return Station.create_from_packets(self.read_files_in_index(findex))

    def get_stations(self, pool: Optional[multiprocessing.pool.Pool] = None) -> List[Station]:
        """
        :param pool: optional multiprocessing pool
        :return: List of all stations in the ApiReader
        """
        return list(maybe_parallel_map(pool, self._station_by_index, iter(self.files_index), chunk_size=1))

    def get_station_by_id(self, get_id: str) -> Optional[List[Station]]:
        """
        :param get_id: the id to filter on
        :return: list of all stations with the requested id or None if id can't be found
        """
        result = [s for s in self.get_stations() if s.id() == get_id]
        if len(result) < 1:
            return None
        return result

Classes

class ApiReader (base_dir: str, structured_dir: bool = False, read_filter: ReadFilter = None, debug: bool = False, pool: Optional[multiprocessing.pool.Pool] = None)

Reads data from api 900 or api 1000 format, converting all data read into RedvoxPacketM for ease of comparison and use.

Properties

filter: io.ReadFilter with the station ids, start and end time, start and end time padding, and types of files to read

base_dir: str of the directory containing all the files to read

structured_dir: bool, if True, the base_dir contains a specific directory structure used by the respective api formats. If False, base_dir only has the data files. Default False.

files_index: io.Index of the files that match the filter that are in base_dir

index_summary: io.IndexSummary of the filtered data

session_models: ModelContainer for cloud and local session models.

debug: bool, if True, output additional information during function execution. Default False.

Initialize the ApiReader object

:param base_dir: directory containing the files to read :param structured_dir: if True, base_dir contains a specific directory structure used by the respective api formats. If False, base_dir only has the data files. Default False. :param read_filter: ReadFilter for the data files, if None, get everything. Default None :param debug: if True, output program warnings/errors during function execution. Default False.

Expand source code

class ApiReader:
    """
    Reads data from api 900 or api 1000 format, converting all data read into RedvoxPacketM for
    ease of comparison and use.

    Properties:
        filter: io.ReadFilter with the station ids, start and end time, start and end time padding, and
        types of files to read

        base_dir: str of the directory containing all the files to read

        structured_dir: bool, if True, the base_dir contains a specific directory structure used by the
        respective api formats.  If False, base_dir only has the data files.  Default False.

        files_index: io.Index of the files that match the filter that are in base_dir

        index_summary: io.IndexSummary of the filtered data

        session_models: ModelContainer for cloud and local session models.

        debug: bool, if True, output additional information during function execution.  Default False.
    """

    def __init__(
        self,
        base_dir: str,
        structured_dir: bool = False,
        read_filter: io.ReadFilter = None,
        debug: bool = False,
        pool: Optional[multiprocessing.pool.Pool] = None,
    ):
        """
        Initialize the ApiReader object

        :param base_dir: directory containing the files to read
        :param structured_dir: if True, base_dir contains a specific directory structure used by the respective
                                api formats.  If False, base_dir only has the data files.  Default False.
        :param read_filter: ReadFilter for the data files, if None, get everything.  Default None
        :param debug: if True, output program warnings/errors during function execution.  Default False.
        """
        _pool: multiprocessing.pool.Pool = multiprocessing.Pool() if pool is None else pool

        if read_filter:
            self.filter: io.ReadFilter = read_filter
            if self.filter.station_ids:
                self.filter.station_ids = set(self.filter.station_ids)
        else:
            self.filter = io.ReadFilter()
        self.base_dir: str = base_dir
        self.structured_dir: bool = structured_dir
        self.debug: bool = debug
        self.errors: RedVoxExceptions = RedVoxExceptions("APIReader")
        self.session_models: ModelsContainer = ModelsContainer()
        self.files_index: List[io.Index] = self._get_all_files(_pool)
        self.index_summary: io.IndexSummary = io.IndexSummary.from_index(self._flatten_files_index())
        if len(self.files_index) > 0:
            mem_split_factor = len(self.files_index) if settings.is_parallelism_enabled() else 1
            self.chunk_limit = psutil.virtual_memory().available * PERCENT_FREE_MEM_USE / mem_split_factor
            max_file_size = max([fe.decompressed_file_size_bytes for fi in self.files_index for fe in fi.entries])
            total_est_size = max_file_size * sum([len(fi.entries) for fi in self.files_index])
            if max_file_size > self.chunk_limit:
                raise MemoryError(
                    f"System requires {max_file_size} bytes of memory to process a file but only has "
                    f"{self.chunk_limit} available.  Please free or add more RAM."
                )
            elif total_est_size / mem_split_factor > self.chunk_limit:
                raise MemoryError(
                    f"{total_est_size} of data requested, but only {self.chunk_limit} available; "
                    f"please reduce the amount of data you are requesting."
                )
            if debug:
                if mem_split_factor == 1:
                    print(
                        f"{len(self.files_index)} stations have {int(self.chunk_limit)} "
                        f"bytes for loading files in memory."
                    )
                else:
                    print(
                        f"{mem_split_factor} stations each have "
                        f"{int(self.chunk_limit)} bytes for loading files in memory."
                    )
        else:
            self.chunk_limit = 0

        if debug:
            self.errors.print()

        if pool is None:
            _pool.close()

    def _flatten_files_index(self):
        """
        :return: flattened version of files_index
        """
        result = io.Index()
        for i in self.files_index:
            result.append(iter(i.entries))
        return result

    def _get_cloud_models(self, ids: List[str]):
        """
        saves the cloud models from the server that match the list of ids given to the ApiReader's session_models.
        :param ids: station ids to get models for
        """
        try:
            with cloud_client() as client:
                self.session_models.search_cloud_session(
                    id_uuids=ids,
                    owner=client.redvox_config.username,
                    start_ts=int(dtu.datetime_to_epoch_microseconds_utc(self.filter.start_dt))
                    if self.filter.start_dt
                    else None,
                    end_ts=int(dtu.datetime_to_epoch_microseconds_utc(self.filter.end_dt))
                    if self.filter.end_dt
                    else None,
                    include_public=True,
                )
                if self.session_models.cloud_models is None:
                    self.errors.append(f"Unable to find any cloud sessions for {ids}.  Using local files.")
        except CloudApiError as e:
            self.errors.append(f"Error while connecting to server.  Error message: {e}")
        except Exception as e:
            self.errors.append(f"An error occurred.  Error message: {e}")

    def _reset_index(self, model: Session) -> List[io.Index]:
        """
        reset the filter used to get files, then get the updated list of files

        :param model: model to use to reset filter
        :return: updated index of files
        """
        insufficient_str = ""
        # reset the filter used to get files
        new_filter = (
            io.ReadFilter()
            .with_extensions(self.filter.extensions)
            .with_api_versions(self.filter.api_versions)
            .with_station_ids({model.id})
            .with_start_dt_buf(dtu.timedelta(seconds=0))
            .with_end_dt_buf(dtu.timedelta(seconds=0))
        )
        # update the start and end times for the filter by the mean offset and the packet duration
        if self.filter.start_dt is not None:
            if timedelta(microseconds=abs(model.timing.mean_off)) > self.filter.start_dt_buf:
                insufficient_str += "start "
            new_filter.with_start_dt(
                self.filter.start_dt + timedelta(microseconds=(model.timing.mean_off - model.packet_dur))
            )
        if self.filter.end_dt is not None:
            if timedelta(microseconds=abs(model.timing.mean_off)) > self.filter.end_dt_buf:
                insufficient_str += "end"
            new_filter.with_end_dt(
                self.filter.end_dt + timedelta(microseconds=(model.timing.mean_off + model.packet_dur))
            )

        if len(insufficient_str) > 0:
            self.errors.append(f"Required more data for {model.id} at: {insufficient_str}")
        return [self._apply_filter(new_filter)]

    def _get_all_files(self, pool: Optional[multiprocessing.pool.Pool] = None) -> List[io.Index]:
        """
        get all files in the base dir of the ApiReader

        :return: index with all the files that match the filter
        """
        _pool: multiprocessing.pool.Pool = multiprocessing.Pool() if pool is None else pool
        index: List[io.Index] = []
        # this guarantees that all ids we search for are valid
        all_index = self._apply_filter(pool=_pool)
        all_index_ids = all_index.summarize().station_ids()
        # get models using the cloud to correct timing
        self._get_cloud_models(all_index_ids)
        resp_ids = self.session_models.list_ids()

        for station_id in all_index_ids:
            # if start and end are both not defined, just use what we got
            if self.filter.start_dt is None and self.filter.end_dt is None:
                checked_index = [all_index.get_index_for_station_id(station_id)]
            # if we need to update the start or end, use the first session model from cloud if it exists
            elif station_id in resp_ids:
                checked_index = self._reset_index(self.session_models.get_model_by_key(station_id))
            # if no models from cloud, use the data available to update start and end of index
            else:
                id_index = all_index.get_index_for_station_id(station_id)
                if len(id_index.entries) < 1:
                    checked_index = []
                else:
                    # attempt to make a session model using local data.  if failure, use what we got initially.
                    try:
                        stats = SessionModel().create_from_stream(self.read_files_in_index(id_index))
                        checked_index = self._reset_index(stats.cloud_session)
                        self.session_models.add_local_session(stats)
                    except (ValueError, Exception):
                        checked_index = [id_index]

            # add the updated list of files to the index
            index.extend(checked_index)

        if pool is None:
            _pool.close()

        return index

    def _apply_filter(
        self,
        reader_filter: Optional[io.ReadFilter] = None,
        pool: Optional[multiprocessing.pool.Pool] = None,
    ) -> io.Index:
        """
        apply the filter of the reader, or another filter if specified

        :param reader_filter: optional filter; if None, use the reader's filter, default None
        :return: index of the filtered files
        """
        _pool: multiprocessing.pool.Pool = multiprocessing.Pool() if pool is None else pool
        if not reader_filter:
            reader_filter = self.filter
        if self.structured_dir:
            index = io.index_structured(self.base_dir, reader_filter, pool=_pool)
        else:
            index = io.index_unstructured(self.base_dir, reader_filter, pool=_pool)
        if pool is None:
            _pool.close()
        return index

    def _redo_index(self, station_ids: set, new_start: datetime, new_end: datetime) -> Optional[io.Index]:
        """
        Redo the index for files using new start and end dates.  removes any buffer time at the start and end of the
        new query.  Returns the updated index or None

        :param station_ids: set of ids to get
        :param new_start: new start time to get data from
        :param new_end: new end time to get data from
        :return: Updated index or None
        """
        new_index = self._apply_filter(
            io.ReadFilter()
            .with_start_dt(new_start)
            .with_end_dt(new_end)
            .with_extensions(self.filter.extensions)
            .with_api_versions(self.filter.api_versions)
            .with_station_ids(station_ids)
            .with_start_dt_buf(timedelta(seconds=0))
            .with_end_dt_buf(timedelta(seconds=0))
        )
        if len(new_index.entries) > 0:
            return new_index
        return None

    def _split_workload(self, findex: io.Index) -> List[io.Index]:
        """
        takes an index and splits it into chunks based on a size limit
        while running_total + next_file_size < limit, adds files to a chunk (Index)
        if limit is exceeded, adds the chunk and puts the next file into a new chunk

        :param findex: index of files to split
        :return: list of Index to process
        """
        packet_list = []
        chunk_queue = 0
        chunk_list = []
        for f in findex.entries:
            chunk_queue += f.decompressed_file_size_bytes
            if chunk_queue > self.chunk_limit:
                packet_list.append(io.Index(chunk_list))
                chunk_queue = 0
                chunk_list = []
            chunk_list.append(f)
        packet_list.append(io.Index(chunk_list))
        return packet_list

    @staticmethod
    def read_files_in_index(indexf: io.Index) -> List[api_m.RedvoxPacketM]:
        """
        read all the files in the index

        :return: list of RedvoxPacketM, converted from API 900 if necessary
        """
        result: List[api_m.RedvoxPacketM] = []

        # Iterate over the API 900 packets in a memory efficient way
        # and convert to API 1000
        # noinspection PyTypeChecker
        for packet_900 in indexf.stream_raw(io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_900})):
            # noinspection Mypy
            result.append(ac.convert_api_900_to_1000_raw(packet_900))

        # Grab the API 1000 packets
        # noinspection PyTypeChecker
        for packet in indexf.stream_raw(io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_1000})):
            # noinspection Mypy
            result.append(packet)

        return result

    # noinspection PyTypeChecker
    def read_files_by_id(self, station_id: str) -> Optional[List[api_m.RedvoxPacketM]]:
        """
        :param station_id: the id to filter on
        :return: the list of packets with the requested id, or None if the id can't be found
        """

        result: List[api_m.RedvoxPacketM] = []

        # Iterate over the API 900 packets in a memory efficient way
        # and convert to API 1000
        for packet_900 in self._flatten_files_index().stream_raw(
            io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_900}).with_station_ids({station_id})
        ):
            # noinspection Mypy
            result.append(ac.convert_api_900_to_1000_raw(packet_900))

        # Grab the API 1000 packets
        for packet in self._flatten_files_index().stream_raw(
            io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_1000}).with_station_ids({station_id})
        ):
            # noinspection Mypy
            result.append(packet)

        if len(result) == 0:
            return None

        return result

    def _station_by_index(self, findex: io.Index) -> Station:
        """
        :param findex: index with files to build a station with
        :return: Station built from files in findex
        """
        return Station.create_from_packets(self.read_files_in_index(findex))

    def get_stations(self, pool: Optional[multiprocessing.pool.Pool] = None) -> List[Station]:
        """
        :param pool: optional multiprocessing pool
        :return: List of all stations in the ApiReader
        """
        return list(maybe_parallel_map(pool, self._station_by_index, iter(self.files_index), chunk_size=1))

    def get_station_by_id(self, get_id: str) -> Optional[List[Station]]:
        """
        :param get_id: the id to filter on
        :return: list of all stations with the requested id or None if id can't be found
        """
        result = [s for s in self.get_stations() if s.id() == get_id]
        if len(result) < 1:
            return None
        return result

Subclasses

ApiReaderDw

Static methods

def read_files_in_index(indexf: Index) ‑> List[src.redvox_api_m.redvox_api_m_pb2.RedvoxPacketM]

read all the files in the index

:return: list of RedvoxPacketM, converted from API 900 if necessary

Expand source code

@staticmethod
def read_files_in_index(indexf: io.Index) -> List[api_m.RedvoxPacketM]:
    """
    read all the files in the index

    :return: list of RedvoxPacketM, converted from API 900 if necessary
    """
    result: List[api_m.RedvoxPacketM] = []

    # Iterate over the API 900 packets in a memory efficient way
    # and convert to API 1000
    # noinspection PyTypeChecker
    for packet_900 in indexf.stream_raw(io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_900})):
        # noinspection Mypy
        result.append(ac.convert_api_900_to_1000_raw(packet_900))

    # Grab the API 1000 packets
    # noinspection PyTypeChecker
    for packet in indexf.stream_raw(io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_1000})):
        # noinspection Mypy
        result.append(packet)

    return result

Methods

def get_station_by_id(self, get_id: str) ‑> Optional[List[Station]]

:param get_id: the id to filter on :return: list of all stations with the requested id or None if id can't be found

Expand source code

def get_station_by_id(self, get_id: str) -> Optional[List[Station]]:
    """
    :param get_id: the id to filter on
    :return: list of all stations with the requested id or None if id can't be found
    """
    result = [s for s in self.get_stations() if s.id() == get_id]
    if len(result) < 1:
        return None
    return result

def get_stations(self, pool: Optional[multiprocessing.pool.Pool] = None) ‑> List[Station]

:param pool: optional multiprocessing pool :return: List of all stations in the ApiReader

Expand source code

def get_stations(self, pool: Optional[multiprocessing.pool.Pool] = None) -> List[Station]:
    """
    :param pool: optional multiprocessing pool
    :return: List of all stations in the ApiReader
    """
    return list(maybe_parallel_map(pool, self._station_by_index, iter(self.files_index), chunk_size=1))

def read_files_by_id(self, station_id: str) ‑> Optional[List[src.redvox_api_m.redvox_api_m_pb2.RedvoxPacketM]]

:param station_id: the id to filter on :return: the list of packets with the requested id, or None if the id can't be found

Expand source code

def read_files_by_id(self, station_id: str) -> Optional[List[api_m.RedvoxPacketM]]:
    """
    :param station_id: the id to filter on
    :return: the list of packets with the requested id, or None if the id can't be found
    """

    result: List[api_m.RedvoxPacketM] = []

    # Iterate over the API 900 packets in a memory efficient way
    # and convert to API 1000
    for packet_900 in self._flatten_files_index().stream_raw(
        io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_900}).with_station_ids({station_id})
    ):
        # noinspection Mypy
        result.append(ac.convert_api_900_to_1000_raw(packet_900))

    # Grab the API 1000 packets
    for packet in self._flatten_files_index().stream_raw(
        io.ReadFilter.empty().with_api_versions({io.ApiVersion.API_1000}).with_station_ids({station_id})
    ):
        # noinspection Mypy
        result.append(packet)

    if len(result) == 0:
        return None

    return result