{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![Urgences - Image CC0 - pexels.com](img/pexels-pixabay-263402.jpg \"Urgences\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Challenge - [ED Lab Prediction]\n",
    "_Nom à trouver_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Objectif"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Ce notebook récupère la classifications des médicaments (ATC-IV) à partir des API publiques de la `national library of medecine`."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```\n",
    "\n",
    "    This product uses publicly available data from the U.S. National Library of Medicine (NLM), National Institutes of Health, Department of Health and Human Services; NLM is not responsible for the product and does not endorse or recommend this or any other product.\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Récupération des données"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sqlite3\n",
    "import pandas as pd\n",
    "from xml.etree import ElementTree as ET"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "conn = sqlite3.connect(\"./data/mimic-iv.sqlite\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 284,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Liste des traitements\n",
    "drugs_gsn = pd.read_sql(\"\"\"\n",
    "    SELECT gsn, ndc, name\n",
    "    FROM medrecon\n",
    "    GROUP BY gsn\n",
    "\"\"\", conn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 223,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Création de la liste pour utilisation de l'outils RxMix\n",
    "# https://mor.nlm.nih.gov/RxMix/\n",
    "\n",
    "## Utilisation de RxMix pour ne pas sur-solliciter les serveurs de la NML à travers des queries unitaires\n",
    "\n",
    "drugs_gsn[\"gsn\"].to_csv(\"./config/gsn_for_batch.txt\", header=False, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 263,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Récupération des classes ATC associées\n",
    "atccode = ET.parse(\"./config/atccode.xml\")\n",
    "\n",
    "atccode_parsed = [\n",
    "    (int(x[0].text), \n",
    "    [\n",
    "        y.text \n",
    "        for y in x[2].findall(\".//classId\") \n",
    "        if y.text not in [\"-\",\"\"]\n",
    "    ]) for x in atccode.getroot() if len(x) > 2\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 267,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Identification de 5906 sur 9262 (0.64) code NDC avec un code rxCUI associé.\n"
     ]
    }
   ],
   "source": [
    "n_found = len([x for x in atccode_parsed if len(x[1])>0])\n",
    "n_total = drugs_gsn.shape[0]\n",
    "\n",
    "print(f\"Identification de {n_found} sur {n_total} ({n_found/n_total:.2f}) code NDC avec un code rxCUI associé.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 272,
   "metadata": {},
   "outputs": [],
   "source": [
    "atccode_parsed_filtered = dict([x for x in atccode_parsed if len(x[1])>0])\n",
    "atccode_parsed_filtered_df = pd.DataFrame.from_dict(atccode_parsed_filtered, orient=\"index\") \\\n",
    "    .apply(lambda x: x.dropna().tolist(), axis=1) \\\n",
    "    .reset_index() \\\n",
    "    .rename(columns={\"index\":\"gsn\", 0:\"atc\"})\n",
    "\n",
    "# Réunion de NDC et ATC\n",
    "drugs_atc = atccode_parsed_filtered_df \\\n",
    " .explode(\"atc\") \\\n",
    " .dropna() \\\n",
    " .drop_duplicates([\"gsn\",\"atc\"])[[\"gsn\",\"atc\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 296,
   "metadata": {},
   "outputs": [],
   "source": [
    "drugs_atc.to_csv(\"./config/atc_items.csv\", index=False)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "28b293e0c0671e44c7281dde6399c7c7419d3faca031d22494da8635907ada72"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}