import sqlite3
import pandas as pd
# Sqlite connection
conn = sqlite3.connect("./data/mimic-iv.sqlite")
# Custom lab items classification
items = pd.read_csv("./config/lab_items.csv").dropna()
items_list = items["item_id"].astype("str").tolist()
# Création d'un index pour accélérer les requêtes
conn.execute("CREATE INDEX IF NOT EXISTS biological_index ON labevents (stay_id, itemid)");
stays = pd.read_sql(f"""
SELECT
s.stay_id,
s.intime intime,
p.gender gender,
p.anchor_age age,
t.temperature,
t.heartrate,
t.resprate,
t.o2sat,
t.sbp,
t.dbp,
t.pain,
t.chiefcomplaint
FROM edstays s
LEFT JOIN patients p
ON p.subject_id = s.subject_id
LEFT Join triage t
ON t.stay_id = s.stay_id
""", conn)
# @TODO : include drugs
# Writting
stays.to_csv("./data/features.csv")
labs = pd.read_sql(f"""
SELECT
le.stay_id,
le.itemid item_id
FROM labevents le
WHERE le.itemid IN ('{"','".join(items_list)}')
GROUP BY
le.stay_id,
le.itemid
""", conn)
labs_deduplicate = pd.merge(
items[["item_id","3"]].rename(columns={"3":"label"}),
labs,
left_on="item_id",
right_on="item_id"
).drop_duplicates(["stay_id", "label"])[["stay_id","label"]] \
.reset_index(drop=True)
labs_deduplicate_pivot = pd.pivot_table(
labs_deduplicate.assign(value=1),
index=["stay_id"],
columns=["label"],
values="value"
).fillna(0)
labs_deduplicate_pivot_final = labs_deduplicate_pivot.join(
stays[["stay_id"]].set_index("stay_id"),
how="right"
).fillna(0).astype("int8").reset_index()
labs_deduplicate_pivot_final.to_csv("./data/labels.csv")