Source code for arista.db.seeds

# ╔══════════════════════════════════════════════════════════════════╗
# ║  arista — db.seeds                                               ║
# ║  « dimension-table seeds: researchers, cell_types, stimulus,     ║
# ║    strains »                                                     ║
# ╠══════════════════════════════════════════════════════════════════╣
# ║  Inserts every entry that's known at design time into the four   ║
# ║  controlled-vocabulary tables. Researchers and cell-types are    ║
# ║  fixed; stimulus_protocols and strains will grow as new          ║
# ║  experiments are added, but every entry shipped here is one we   ║
# ║  expect ingest to see.                                           ║
# ╚══════════════════════════════════════════════════════════════════╝
"""Seed the controlled-vocabulary dimension tables."""

from __future__ import annotations

import json
import sqlite3

from arista.constants import (
    CANONICAL_STRAINS,
    CELL_TYPES,
    STIMULUS_PROTOCOLS,
)

# ─────────────────────────────────────────────────────────────────
#  Researcher catalogue  « four students contributing Ca²⁺ data »
# ─────────────────────────────────────────────────────────────────
# Anne Oepen contributed behaviour-only data (see [[Data Sources]] §
# Anne Oepen) and is intentionally NOT in this list — the v1.0.0 DB is
# Ca²⁺-only.
RESEARCHERS: tuple[tuple[str, str, str], ...] = (
    ("Robert Kossen",   "PhD",      "2016–2019"),
    ("Niko",            "MSc",      "2016"),
    ("Laurin Büld",     "MSc",      "2020–2021"),
    ("Alexander Busch", "post-BSc", "2021–2022"),
)



[docs]
def seed_researchers(conn: sqlite3.Connection) -> None:
    """Insert the four researcher rows. Idempotent via ``INSERT OR IGNORE``."""
    conn.executemany(
        "INSERT OR IGNORE INTO researchers (name, role, period) VALUES (?, ?, ?)",
        RESEARCHERS,
    )




[docs]
def seed_cell_types(conn: sqlite3.Connection) -> None:
    """Insert CC / HC / WC entries from :data:`arista.constants.CELL_TYPES`."""
    conn.executemany(
        "INSERT OR IGNORE INTO cell_types (code, name, description) VALUES (?, ?, ?)",
        [(ct.code, ct.name, ct.description) for ct in CELL_TYPES.values()],
    )




[docs]
def seed_stimulus_protocols(conn: sqlite3.Connection) -> None:
    """Insert every protocol from :data:`arista.constants.STIMULUS_PROTOCOLS`.

    The ``target_sequence`` tuple is serialised as a JSON array string
    so ``json_each()`` can unroll it later inside SQL queries.
    """
    rows = [
        (
            p.name,
            p.family,
            p.description,
            json.dumps(list(p.target_sequence)) if p.target_sequence else None,
        )
        for p in STIMULUS_PROTOCOLS.values()
    ]
    conn.executemany(
        "INSERT OR IGNORE INTO stimulus_protocols "
        "(name, family, description, target_sequence_json) VALUES (?, ?, ?, ?)",
        rows,
    )




[docs]
def seed_strains(conn: sqlite3.Connection) -> None:
    """Insert the canonical strain list as starter rows.

    Strains continue to grow over time; this seed only guarantees the
    documented ones exist on first DB build. The ingester inserts any
    additional strain it encounters via :func:`ensure_strain`.
    """
    conn.executemany(
        "INSERT OR IGNORE INTO strains (strain_name) VALUES (?)",
        [(s,) for s in CANONICAL_STRAINS],
    )




[docs]
def seed_dimensions(conn: sqlite3.Connection) -> None:
    """Apply every dimension seed in dependency order and commit."""
    seed_researchers(conn)
    seed_cell_types(conn)
    seed_stimulus_protocols(conn)
    seed_strains(conn)
    conn.commit()