|
@@ -0,0 +1,191 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ ""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Challenge - [ED Lab Prediction]\n",
|
|
|
+ "_Nom à trouver_"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## Objectif"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "Ce notebook récupère la classifications des médicaments (ATC-IV) à partir des API publiques de la `national library of medecine`."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "```\n",
|
|
|
+ "\n",
|
|
|
+ " This product uses publicly available data from the U.S. National Library of Medicine (NLM), National Institutes of Health, Department of Health and Human Services; NLM is not responsible for the product and does not endorse or recommend this or any other product.\n",
|
|
|
+ "\n",
|
|
|
+ "```"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Récupération des données"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import sqlite3\n",
|
|
|
+ "import pandas as pd\n",
|
|
|
+ "from xml.etree import ElementTree as ET"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 12,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "conn = sqlite3.connect(\"./data/mimic-iv.sqlite\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 284,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Liste des traitements\n",
|
|
|
+ "drugs_gsn = pd.read_sql(\"\"\"\n",
|
|
|
+ " SELECT gsn, ndc, name\n",
|
|
|
+ " FROM medrecon\n",
|
|
|
+ " GROUP BY gsn\n",
|
|
|
+ "\"\"\", conn)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 223,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Création de la liste pour utilisation de l'outils RxMix\n",
|
|
|
+ "# https://mor.nlm.nih.gov/RxMix/\n",
|
|
|
+ "\n",
|
|
|
+ "## Utilisation de RxMix pour ne pas sur-solliciter les serveurs de la NML à travers des queries unitaires\n",
|
|
|
+ "\n",
|
|
|
+ "drugs_gsn[\"gsn\"].to_csv(\"./config/gsn_for_batch.txt\", header=False, index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 263,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Récupération des classes ATC associées\n",
|
|
|
+ "atccode = ET.parse(\"./config/atccode.xml\")\n",
|
|
|
+ "\n",
|
|
|
+ "atccode_parsed = [\n",
|
|
|
+ " (int(x[0].text), \n",
|
|
|
+ " [\n",
|
|
|
+ " y.text \n",
|
|
|
+ " for y in x[2].findall(\".//classId\") \n",
|
|
|
+ " if y.text not in [\"-\",\"\"]\n",
|
|
|
+ " ]) for x in atccode.getroot() if len(x) > 2\n",
|
|
|
+ "]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 267,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "Identification de 5906 sur 9262 (0.64) code NDC avec un code rxCUI associé.\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "n_found = len([x for x in atccode_parsed if len(x[1])>0])\n",
|
|
|
+ "n_total = drugs_gsn.shape[0]\n",
|
|
|
+ "\n",
|
|
|
+ "print(f\"Identification de {n_found} sur {n_total} ({n_found/n_total:.2f}) code NDC avec un code rxCUI associé.\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 272,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "atccode_parsed_filtered = dict([x for x in atccode_parsed if len(x[1])>0])\n",
|
|
|
+ "atccode_parsed_filtered_df = pd.DataFrame.from_dict(atccode_parsed_filtered, orient=\"index\") \\\n",
|
|
|
+ " .apply(lambda x: x.dropna().tolist(), axis=1) \\\n",
|
|
|
+ " .reset_index() \\\n",
|
|
|
+ " .rename(columns={\"index\":\"gsn\", 0:\"atc\"})\n",
|
|
|
+ "\n",
|
|
|
+ "# Réunion de NDC et ATC\n",
|
|
|
+ "drugs_atc = atccode_parsed_filtered_df \\\n",
|
|
|
+ " .explode(\"atc\") \\\n",
|
|
|
+ " .dropna() \\\n",
|
|
|
+ " .drop_duplicates([\"gsn\",\"atc\"])[[\"gsn\",\"atc\"]]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 296,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drugs_atc.to_csv(\"./config/atc_items.csv\", index=False)"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "interpreter": {
|
|
|
+ "hash": "28b293e0c0671e44c7281dde6399c7c7419d3faca031d22494da8635907ada72"
|
|
|
+ },
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3.9.7 ('base')",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.9.7"
|
|
|
+ },
|
|
|
+ "orig_nbformat": 4
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 2
|
|
|
+}
|