Nom à trouver
Ce notebook récupère la classifications des médicaments (ATC-IV) à partir des API publiques de la national library of medecine
.
This product uses publicly available data from the U.S. National Library of Medicine (NLM), National Institutes of Health, Department of Health and Human Services; NLM is not responsible for the product and does not endorse or recommend this or any other product.
import sqlite3
import pandas as pd
from xml.etree import ElementTree as ET
conn = sqlite3.connect("./data/mimic-iv.sqlite")
# Liste des traitements
drugs_gsn = pd.read_sql("""
SELECT gsn, ndc, name
FROM medrecon
GROUP BY gsn
""", conn)
# Création de la liste pour utilisation de l'outils RxMix
# https://mor.nlm.nih.gov/RxMix/
## Utilisation de RxMix pour ne pas sur-solliciter les serveurs de la NML à travers des queries unitaires
drugs_gsn["gsn"].to_csv("./config/gsn_for_batch.txt", header=False, index=False)
# Récupération des classes ATC associées
atccode = ET.parse("./config/atccode.xml")
atccode_parsed = [
(int(x[0].text),
[
y.text
for y in x[2].findall(".//classId")
if y.text not in ["-",""]
]) for x in atccode.getroot() if len(x) > 2
]
n_found = len([x for x in atccode_parsed if len(x[1])>0])
n_total = drugs_gsn.shape[0]
print(f"Identification de {n_found} sur {n_total} ({n_found/n_total:.2f}) code NDC avec un code rxCUI associé.")
Identification de 5906 sur 9262 (0.64) code NDC avec un code rxCUI associé.
atccode_parsed_filtered = dict([x for x in atccode_parsed if len(x[1])>0])
atccode_parsed_filtered_df = pd.DataFrame.from_dict(atccode_parsed_filtered, orient="index") \
.apply(lambda x: x.dropna().tolist(), axis=1) \
.reset_index() \
.rename(columns={"index":"gsn", 0:"atc"})
# Réunion de NDC et ATC
drugs_atc = atccode_parsed_filtered_df \
.explode("atc") \
.dropna() \
.drop_duplicates(["gsn","atc"])[["gsn","atc"]]
drugs_atc.to_csv("./config/atc_items.csv", index=False)