Source code for arista.preprocess.template_rescue

# ─────────────────────────────────────────────────────────────────
#  arista.preprocess.template_rescue  « broken-MAT detection »
# ─────────────────────────────────────────────────────────────────
"""Detect a broken / truncated sensor MAT file.

In Robert's original pytci pipeline a ``temperature_data_*.mat`` with
fewer than 1000 rows was treated as corrupt; the sensor trace was
then substituted from a pre-computed median template stored in
``_legacy/pytci/brokenTempFile_adap.pkl`` (only the ``adaptation``
protocol template was ever generated).

For v0.1 we only ship the **detection** half — actual substitution
needs the per-protocol templates regenerated against the corpus, which
is a sprint-7 task once the DB exists. Calling :func:`load_template`
today raises :class:`NotImplementedError` and points the user at the
issue tracker.
"""

from __future__ import annotations

from arista.preprocess.io import SensorRecord

_BROKEN_MAT_ROW_THRESHOLD: int = 1000


[docs] def is_broken_sensor( sensor: SensorRecord, min_rows: int = _BROKEN_MAT_ROW_THRESHOLD, ) -> bool: """Return True if a sensor MAT file looks truncated. The heuristic mirrors pytci: a full recording logs the sensor at a much higher rate than the imaging frame rate, so an honest MAT file has thousands of rows. Anything substantially smaller is almost certainly a partially-written file from a crashed acquisition. Args: sensor: Sensor record from :func:`arista.preprocess.io.read_sensor_mat`. min_rows: Threshold below which we declare the file broken (default 1000, matching pytci). """ return sensor.n_samples < min_rows
[docs] def load_template(stimulus_name: str) -> SensorRecord: """Substitute a median-template sensor trace for a broken MAT. Not yet implemented in v0.1; raises with a clear pointer to the sprint plan rather than silently returning bogus data. Args: stimulus_name: Canonical stimulus name (e.g. ``"adaptation"``). """ raise NotImplementedError( f"Template rescue for stimulus {stimulus_name!r} is not implemented " f"in v0.1. Regenerate the per-protocol median templates from the " f"corpus once the DB is populated (sprint 7) and re-enable this " f"function. Until then, recordings with broken sensor MATs must be " f"manually flagged with qc_flag='broken_temp' on ingest." )