Browse Source

Initial push

Ali 3 years ago
parent
commit
57387df1c9
6 changed files with 3460 additions and 0 deletions
  1. 3 0
      .gitignore
  2. 159 0
      database_constitution/database_constitution.py
  3. 37 0
      download_data.py
  4. 3260 0
      getting_started.ipynb
  5. BIN
      img/pexels-pixabay-263402.jpg
  6. 1 0
      requirements.txt

+ 3 - 0
.gitignore

@@ -58,3 +58,6 @@ docs/_build/
 # PyBuilder
 target/
 
+# Data
+data/
+workflow/

+ 159 - 0
database_constitution/database_constitution.py

@@ -0,0 +1,159 @@
+###
+#
+# MIMIC-IV database creation for Datacamp 2022
+#
+###
+
+import pandas as pd
+import sqlite3
+import dask.dataframe as dd
+import dask
+from io import StringIO
+
+#
+# Parameters
+#
+
+sqlite_path = "../data/mimic-iv.sqlite"
+csv_ed_path = "../data/mimic-iv-ed/mimic-iv-ed-1.0/ed/"
+csv_mimic_path = "../data/mimic-iv/mimiciv/1.0/"
+
+# Loading sqlite database
+
+con = sqlite3.connect(sqlite_path)
+
+# Saving CSV to sqlite database
+
+ed_csv = [
+    "triage",
+    "medrecon",
+    "edstays",
+    "diagnosis"
+]
+
+global_csv = [
+    ["core/patients", "subject_id"],
+    ["hosp/d_labitems", None]
+]
+
+datasets = {}
+
+## ED data
+
+for csv in ed_csv:
+    print(f"Writting {csv}")
+    datasets[csv] = pd.read_csv(f"{csv_ed_path}/{csv}.csv.gz")
+    datasets[csv].to_sql(csv, con, if_exists="replace")
+
+# Getting patients list
+patients_list = datasets["edstays"]["subject_id"].unique().tolist()
+
+## Global Mimic Data
+
+for csv in global_csv:
+    print(f"Writting {csv[0]}")
+    datasets[csv[0]] = pd.read_csv(f"{csv_mimic_path}/{csv[0]}.csv.gz")
+
+    filename = csv[0].split("/")[-1]
+
+    # Filtering
+    if csv[1] == "subject_id":
+        datasets[csv[0]] = datasets[csv[0]][
+            datasets[csv[0]]["subject_id"].isin(patients_list)
+        ].reset_index(drop=True)
+
+    datasets[csv[0]].to_sql(filename, con, if_exists="replace")
+
+# Special cases : microbiology and labevents
+
+## microbiology
+microbiology_df = pd.read_csv(f"{csv_mimic_path}/hosp/microbiologyevents.csv.gz")
+
+microbiology_df = microbiology_df[
+    (microbiology_df["subject_id"].isin(patients_list))
+].reset_index(drop=True)
+
+microbiology_df_filter = pd.merge(
+            datasets["edstays"][["subject_id","intime", "outtime", "stay_id"]],
+            microbiology_df[["microevent_id","subject_id", "charttime"]],
+            on="subject_id"
+)
+
+microbiology_df_filter = microbiology_df_filter[
+            (
+                (microbiology_df_filter["charttime"] >= microbiology_df_filter["intime"]) 
+                & (microbiology_df_filter["charttime"] <= microbiology_df_filter["outtime"])
+            )]
+
+microbiology_df = pd.merge(microbiology_df, microbiology_df_filter[["stay_id", "microevent_id"]], left_on="microevent_id", right_on="microevent_id", how="inner").drop_duplicates("microevent_id")
+
+microbiology_df.to_sql("microbiologyevents", con, if_exists="replace")
+
+## labevents
+### The database is too big to be processed once, dealing with batchs
+
+n_lines = int(1e9)
+
+# Dropping table
+cursor = con.cursor()
+cursor.execute("DROP TABLE IF EXISTS labevents")
+cursor.close()
+
+with open(f"{csv_mimic_path}/hosp/labevents.csv", "r") as f:
+    header = f.readline()
+
+    i = 0
+    while True:
+        print(f"Writting rows {i*n_lines} to {(i+1)*n_lines}")
+        i += 1
+
+        lines = f.readlines(n_lines)
+
+        if f.closed or len(lines) == 0:
+            break
+
+        temp_file = StringIO("\n".join([header]+lines))
+        temp_df = pd.read_csv(temp_file)
+
+        # Filtering intesresting results
+        temp_df = temp_df[
+            (temp_df["subject_id"].astype("int64").isin(patients_list))
+        ].reset_index(drop=True)
+
+        # Filtering results to the accurate date
+        temp_df_filter = pd.merge(
+            datasets["edstays"][["subject_id","intime", "outtime", "stay_id"]],
+            temp_df[["labevent_id","subject_id", "charttime"]],
+            on="subject_id"
+        )
+        temp_df_filter = temp_df_filter[
+            (
+                (temp_df_filter["charttime"] >= temp_df_filter["intime"]) 
+                & (temp_df_filter["charttime"] <= temp_df_filter["outtime"])
+            )]
+
+        temp_df = pd.merge(temp_df, temp_df_filter[["stay_id", "labevent_id"]], left_on="labevent_id", right_on="labevent_id", how="inner")
+
+        # Writting to database
+        temp_df.to_sql("labevents", con, if_exists="append")
+
+# Creating index
+indexes = {
+    "triage":["subject_id","stay_id"],
+    "medrecon":["subject_id","stay_id"],
+    "edstays":["subject_id","stay_id","hadm_id"],
+    "diagnosis":["subject_id","stay_id"],
+    "patients":["subject_id"],
+    "d_labitems":["itemid"],
+    "microbiologyevents":["microevent_id","subject_id","stay_id","hadm_id","micro_specimen_id"],
+    "labevents":["labevent_id","subject_id","stay_id","hadm_id","specimen_id","itemid"],
+}
+
+cursor = con.cursor()
+for table_name, indexes_columns in indexes.items():
+    for indexes_column in indexes_columns:
+        indexes_query = f"CREATE INDEX {table_name}_{indexes_column} ON {table_name} ({indexes_column})"
+
+        cursor.execute(indexes_query)
+
+cursor.close()

+ 37 - 0
download_data.py

@@ -0,0 +1,37 @@
+import requests
+from progress.bar import Bar
+import os
+import sys
+
+#
+# Script for MIMIC-IV project data download
+#
+
+# Getting command line parameter
+if len(sys.argv) > 1:
+    token = sys.argv[1]
+
+file_path = {
+    "mimic-iv.sqlite":[
+        "http://chessm2ds.alibellamine.me:5000/",
+        3214753792
+    ]
+}
+
+print("Downloading MIMIC-IV data")
+
+folder = "./data"
+
+for file_name, file_metadata in file_path.items():
+    destination = "/".join([folder, file_name])
+    
+    if os.path.isfile(destination) == False or os.path.getsize(destination) != file_metadata[1]:
+        uri = f"{file_metadata[0]}/{token}"
+        r = requests.get(uri, stream=True)
+        file_size = int(int(r.headers.get('content-length'))/1000)
+
+        with open(destination, "wb") as f:
+            with Bar(f"Downloading {file_name}", max=int(file_size)) as bar:
+                for chunck in r.iter_content(chunk_size=1024):
+                    f.write(chunck)
+                    bar.next()

+ 3260 - 0
getting_started.ipynb

@@ -0,0 +1,3260 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![Urgences - Image CC0 - pexels.com](img/pexels-pixabay-263402.jpg \"Urgences\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Challenge - [ED Lab Prediction]\n",
+    "_Nom à trouver_"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Ce notebook est à destination de l'équipe travaillant autours du projet.  \n",
+    "L'objectif est de présenter sommairement le projet et les données."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Rationel"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_@TODO Levi_"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Données"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Les données sont issus du projet MIMIC-IV.  \n",
+    "Le projet MIMIC est un projet d'open-data médical initié par l'hopital _Beth Israel Deaconess_ à Boston.  \n",
+    "Initialement, seul des données de réanimation été accessible.\n",
+    "\n",
+    "Pour sa 4ème édition, a été mis à disposition un jeu de données couvrant un spectre bien plus large :\n",
+    "- Données relatives aux passages aux urgences\n",
+    "- Données relatives aux hospitalisations\n",
+    "- Données relatives aux séjour en réanimation\n",
+    "- Données de radiographie thoracique avec compte rendu associé"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "L'ensemble de ces données ont été mis à disposition dans le cadre de projets complémentaires :\n",
+    "- [MIMIC-IV](https://physionet.org/content/mimiciv/0.4/) : hospitalisation et réanimation\n",
+    "- [MIMIC-IV-ED](https://physionet.org/content/mimic-iv-ed/1.0/) : urgences\n",
+    "- [MIMIC-IV-CXR](https://physionet.org/content/mimic-cxr/2.0.0/) : radiographie thoracique\n",
+    "\n",
+    "Ces bases sont complémentaires dans le sens où chaque collecte a été faite durant une période temporelle spécifique, qui se recoupe plus où moins.  \n",
+    "Certains éléments nécessaires à l'exploitation de MIMIC-IV-ED sont présent dans MIMIC-IV.  \n",
+    "La lecture de la documentation de MIMIC-IV et de MIMIC-IV-ED est vivement recommandé (lien ci-dessus).\n",
+    "\n",
+    "En complément, un certains nombre de ressources est disponible sur le site du projet [MIMIC-IV](https://mimic.mit.edu/)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Nous proposons de travailler autours de ce jeu de données avec pour objectif la tache suivante : **prédire les examens biologiques qui seront réalisés lors de l'arrivé d'un patient aux urgences**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Téléchargement des données"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "La base de données de biologie étant volumineuse (nous y reviendrons plus bas), un pré-traitement des données a été effectué.  \n",
+    "Le pré-traitement est le suivant :\n",
+    "- Intégration de l'ensemble des données utiles au sein d'une base de données SQLITE\n",
+    "- Tri des lignes de biologies afin de ne conserver que celles répondant au critères suivants :\n",
+    "  - Date de réalisation >= date de début du passage aux urgences\n",
+    "  - Date de réalisation <= date de fin du passage aux urgences"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Un token de téléchargement des données vous a normallement été mis à disposition."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "    python3 download_data.py [TOKEN]\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Le script de transformation est disponible dans `database_constitution/database_constitution.py`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Chargement et exploration des données"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Les données sont téléchargés dans le dossier data.  \n",
+    "Il est facile de requêter une base sqlite à l'aide de pandas."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import sqlite3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>stay_id</th>\n",
+       "      <th>temperature</th>\n",
+       "      <th>heartrate</th>\n",
+       "      <th>resprate</th>\n",
+       "      <th>o2sat</th>\n",
+       "      <th>sbp</th>\n",
+       "      <th>dbp</th>\n",
+       "      <th>pain</th>\n",
+       "      <th>acuity</th>\n",
+       "      <th>chiefcomplaint</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>15585360</td>\n",
+       "      <td>37573921</td>\n",
+       "      <td>97.0</td>\n",
+       "      <td>87.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>71.0</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>€‡</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>17192424</td>\n",
+       "      <td>34160628</td>\n",
+       "      <td>98.6</td>\n",
+       "      <td>82.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>111.0</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>‡</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>15248757</td>\n",
+       "      <td>32172727</td>\n",
+       "      <td>97.1</td>\n",
+       "      <td>112.0</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>147.0</td>\n",
+       "      <td>97.0</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>ˆ‡</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>16648037</td>\n",
+       "      <td>38946064</td>\n",
+       "      <td>98.5</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>99.0</td>\n",
+       "      <td>160.0</td>\n",
+       "      <td>86.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>‰</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>13492931</td>\n",
+       "      <td>39828574</td>\n",
+       "      <td>100.6</td>\n",
+       "      <td>90.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>96.0</td>\n",
+       "      <td>107.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>'</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>448967</th>\n",
+       "      <td>448967</td>\n",
+       "      <td>17192157</td>\n",
+       "      <td>31192693</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>448968</th>\n",
+       "      <td>448968</td>\n",
+       "      <td>15632852</td>\n",
+       "      <td>37860864</td>\n",
+       "      <td>98.2</td>\n",
+       "      <td>115.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>124.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>448969</th>\n",
+       "      <td>448969</td>\n",
+       "      <td>17526143</td>\n",
+       "      <td>30744048</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>448970</th>\n",
+       "      <td>448970</td>\n",
+       "      <td>18321345</td>\n",
+       "      <td>37024607</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>448971</th>\n",
+       "      <td>448971</td>\n",
+       "      <td>11894181</td>\n",
+       "      <td>39631961</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>448972 rows × 12 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         index  subject_id   stay_id  temperature  heartrate  resprate  o2sat  \\\n",
+       "0            0    15585360  37573921         97.0       87.0      18.0  100.0   \n",
+       "1            1    17192424  34160628         98.6       82.0       NaN  100.0   \n",
+       "2            2    15248757  32172727         97.1      112.0      20.0  100.0   \n",
+       "3            3    16648037  38946064         98.5       59.0      18.0   99.0   \n",
+       "4            4    13492931  39828574        100.6       90.0      16.0   96.0   \n",
+       "...        ...         ...       ...          ...        ...       ...    ...   \n",
+       "448967  448967    17192157  31192693          NaN        NaN       NaN    NaN   \n",
+       "448968  448968    15632852  37860864         98.2      115.0      18.0  100.0   \n",
+       "448969  448969    17526143  30744048          NaN        NaN       NaN    NaN   \n",
+       "448970  448970    18321345  37024607          NaN        NaN       NaN    NaN   \n",
+       "448971  448971    11894181  39631961          NaN        NaN       NaN    NaN   \n",
+       "\n",
+       "          sbp   dbp  pain  acuity chiefcomplaint  \n",
+       "0       150.0  71.0  10.0     3.0            €‡  \n",
+       "1       111.0  81.0   3.0     3.0              ‡  \n",
+       "2       147.0  97.0   8.0     4.0             ˆ‡  \n",
+       "3       160.0  86.0   2.0     2.0              ‰  \n",
+       "4       107.0  55.0   0.0     3.0              '  \n",
+       "...       ...   ...   ...     ...            ...  \n",
+       "448967    NaN   NaN   NaN     NaN           None  \n",
+       "448968  124.0  68.0   0.0     3.0           None  \n",
+       "448969    NaN   NaN   NaN     2.0           None  \n",
+       "448970    NaN   NaN   NaN     NaN           None  \n",
+       "448971    NaN   NaN   NaN     1.0           None  \n",
+       "\n",
+       "[448972 rows x 12 columns]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# D'abord, il fait établir une connection avec la base\n",
+    "conn = sqlite3.connect(\"./data/mimic-iv.sqlite\")\n",
+    "\n",
+    "# Ensuite, on effectue des requête à l'aide de la fonction read_sql de pandas, pour récupérer tous les éléments de la table, il suffit de faire un SELECT * FROM nom_de_la_table\n",
+    "triage = pd.read_sql(\"SELECT * FROM triage\", conn)\n",
+    "triage"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "L'avantage de travailler avec une base sqlite, c'est qu'il nous permet de manipuler et transformer des données qui ne peuvent pas toujours tenir en mémoire vive.  \n",
+    "Quelques exemples de traitements :"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dimension de labevents_100 :  (100, 17)\n",
+      "Nombre de lignes dans labevents : 11594360\n",
+      "Contenu de labevents pour le patient n°10000764 : (27, 17)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Sélectionner un nombre limité de lignes\n",
+    "labevents_100 = pd.read_sql(\"SELECT * FROM labevents LIMIT 100\", conn)\n",
+    "print(\"Dimension de labevents_100 : \", labevents_100.shape)\n",
+    "\n",
+    "# Compter le nombre de ligne\n",
+    "labevents_n = pd.read_sql(\"SELECT COUNT(0) FROM labevents\", conn)\n",
+    "print(\"Nombre de lignes dans labevents :\", labevents_n.values[0][0])\n",
+    "\n",
+    "# Filter des données\n",
+    "labevents_10000764 = pd.read_sql(\"SELECT * FROM labevents WHERE subject_id = 10000764\", conn)\n",
+    "print(\"Contenu de labevents pour le patient n°10000764 :\", labevents_10000764.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Grâce à un système d'index, les transformations de données via sqlite se font relativement rapidement.  \n",
+    "C'est notamment le cas pour les filtre (WHERE) et les jointure.\n",
+    "Un exemple de jointure :"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>labevent_id</th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>hadm_id</th>\n",
+       "      <th>specimen_id</th>\n",
+       "      <th>itemid</th>\n",
+       "      <th>charttime</th>\n",
+       "      <th>storetime</th>\n",
+       "      <th>value</th>\n",
+       "      <th>valuenum</th>\n",
+       "      <th>...</th>\n",
+       "      <th>flag</th>\n",
+       "      <th>priority</th>\n",
+       "      <th>comments</th>\n",
+       "      <th>stay_id</th>\n",
+       "      <th>index</th>\n",
+       "      <th>itemid</th>\n",
+       "      <th>label</th>\n",
+       "      <th>fluid</th>\n",
+       "      <th>category</th>\n",
+       "      <th>loinc_code</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2599</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>76601642</td>\n",
+       "      <td>50971</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 21:04:00</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>1236</td>\n",
+       "      <td>50971</td>\n",
+       "      <td>Potassium</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Chemistry</td>\n",
+       "      <td>2823-3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2578</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51146</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>192</td>\n",
+       "      <td>51146</td>\n",
+       "      <td>Basophils</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Hematology</td>\n",
+       "      <td>704-7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2579</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51200</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>619</td>\n",
+       "      <td>51200</td>\n",
+       "      <td>Eosinophils</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Hematology</td>\n",
+       "      <td>711-2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2580</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51221</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>40.2</td>\n",
+       "      <td>40.2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>765</td>\n",
+       "      <td>51221</td>\n",
+       "      <td>Hematocrit</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Hematology</td>\n",
+       "      <td>4544-3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2581</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51222</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>14.1</td>\n",
+       "      <td>14.1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>775</td>\n",
+       "      <td>51222</td>\n",
+       "      <td>Hemoglobin</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Hematology</td>\n",
+       "      <td>718-7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2582</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51244</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>abnormal</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>991</td>\n",
+       "      <td>51244</td>\n",
+       "      <td>Lymphocytes</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Hematology</td>\n",
+       "      <td>731-0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>6</td>\n",
+       "      <td>2583</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51248</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>32.8</td>\n",
+       "      <td>32.8</td>\n",
+       "      <td>...</td>\n",
+       "      <td>abnormal</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>1016</td>\n",
+       "      <td>51248</td>\n",
+       "      <td>MCH</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Hematology</td>\n",
+       "      <td>785-6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>7</td>\n",
+       "      <td>2584</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51249</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>35.1</td>\n",
+       "      <td>35.1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>abnormal</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>1017</td>\n",
+       "      <td>51249</td>\n",
+       "      <td>MCHC</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Hematology</td>\n",
+       "      <td>786-4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>8</td>\n",
+       "      <td>2585</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51250</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>93</td>\n",
+       "      <td>93.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>1019</td>\n",
+       "      <td>51250</td>\n",
+       "      <td>MCV</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Hematology</td>\n",
+       "      <td>787-2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>9</td>\n",
+       "      <td>2586</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51254</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>3.8</td>\n",
+       "      <td>3.8</td>\n",
+       "      <td>...</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>1051</td>\n",
+       "      <td>51254</td>\n",
+       "      <td>Monocytes</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Hematology</td>\n",
+       "      <td>742-7</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10 rows × 23 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  index  labevent_id  subject_id  hadm_id  specimen_id itemid  \\\n",
+       "0     0         2599    10000764      NaN     76601642  50971   \n",
+       "1     1         2578    10000764      NaN      6228584  51146   \n",
+       "2     2         2579    10000764      NaN      6228584  51200   \n",
+       "3     3         2580    10000764      NaN      6228584  51221   \n",
+       "4     4         2581    10000764      NaN      6228584  51222   \n",
+       "5     5         2582    10000764      NaN      6228584  51244   \n",
+       "6     6         2583    10000764      NaN      6228584  51248   \n",
+       "7     7         2584    10000764      NaN      6228584  51249   \n",
+       "8     8         2585    10000764      NaN      6228584  51250   \n",
+       "9     9         2586    10000764      NaN      6228584  51254   \n",
+       "\n",
+       "             charttime            storetime value  valuenum  ...      flag  \\\n",
+       "0  2132-10-14 20:15:00  2132-10-14 21:04:00   4.1       4.1  ...      None   \n",
+       "1  2132-10-14 20:15:00  2132-10-14 20:33:00   0.1       0.1  ...      None   \n",
+       "2  2132-10-14 20:15:00  2132-10-14 20:33:00   0.3       0.3  ...      None   \n",
+       "3  2132-10-14 20:15:00  2132-10-14 20:33:00  40.2      40.2  ...      None   \n",
+       "4  2132-10-14 20:15:00  2132-10-14 20:33:00  14.1      14.1  ...      None   \n",
+       "5  2132-10-14 20:15:00  2132-10-14 20:33:00   4.7       4.7  ...  abnormal   \n",
+       "6  2132-10-14 20:15:00  2132-10-14 20:33:00  32.8      32.8  ...  abnormal   \n",
+       "7  2132-10-14 20:15:00  2132-10-14 20:33:00  35.1      35.1  ...  abnormal   \n",
+       "8  2132-10-14 20:15:00  2132-10-14 20:33:00    93      93.0  ...      None   \n",
+       "9  2132-10-14 20:15:00  2132-10-14 20:33:00   3.8       3.8  ...      None   \n",
+       "\n",
+       "   priority  comments   stay_id index itemid        label  fluid    category  \\\n",
+       "0      STAT      None  35420907  1236  50971    Potassium  Blood   Chemistry   \n",
+       "1      STAT      None  35420907   192  51146    Basophils  Blood  Hematology   \n",
+       "2      STAT      None  35420907   619  51200  Eosinophils  Blood  Hematology   \n",
+       "3      STAT      None  35420907   765  51221   Hematocrit  Blood  Hematology   \n",
+       "4      STAT      None  35420907   775  51222   Hemoglobin  Blood  Hematology   \n",
+       "5      STAT      None  35420907   991  51244  Lymphocytes  Blood  Hematology   \n",
+       "6      STAT      None  35420907  1016  51248          MCH  Blood  Hematology   \n",
+       "7      STAT      None  35420907  1017  51249         MCHC  Blood  Hematology   \n",
+       "8      STAT      None  35420907  1019  51250          MCV  Blood  Hematology   \n",
+       "9      STAT      None  35420907  1051  51254    Monocytes  Blood  Hematology   \n",
+       "\n",
+       "  loinc_code  \n",
+       "0     2823-3  \n",
+       "1      704-7  \n",
+       "2      711-2  \n",
+       "3     4544-3  \n",
+       "4      718-7  \n",
+       "5      731-0  \n",
+       "6      785-6  \n",
+       "7      786-4  \n",
+       "8      787-2  \n",
+       "9      742-7  \n",
+       "\n",
+       "[10 rows x 23 columns]"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# On récupère le libellé des examens\n",
+    "labevents_with_item = pd.read_sql(\"\"\"\n",
+    "    SELECT * \n",
+    "    FROM labevents as le\n",
+    "    LEFT JOIN d_labitems as li\n",
+    "        ON le.itemid = li.itemid\n",
+    "    LIMIT 1000\n",
+    "\"\"\", conn)\n",
+    "# Tous les types de jointures : LEFT, RIGHT, INNER, OUTER sont possible\n",
+    "\n",
+    "# L'object retourné est toujours un pandas dataframe sur lequel on peux faire les transformations habituelles\n",
+    "labevents_with_item.head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tables disponibles"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "L'ensemble des tables disponible est le suivant :"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>nom_de_la_table</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>d_labitems</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>diagnosis</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>edstays</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>labevents</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>medrecon</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>microbiologyevents</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>patients</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>triage</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      nom_de_la_table\n",
+       "0          d_labitems\n",
+       "1           diagnosis\n",
+       "2             edstays\n",
+       "3           labevents\n",
+       "4            medrecon\n",
+       "5  microbiologyevents\n",
+       "6            patients\n",
+       "7              triage"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"\"\"\n",
+    "    SELECT tbl_name AS nom_de_la_table\n",
+    "    FROM sqlite_master\n",
+    "    GROUP BY tbl_name\n",
+    "\"\"\", conn)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "L'ensemble des tables correspondent aux fichiers correspondant au sein de MIMIC-IV-ED.  \n",
+    "Il y a quelques exceptions qui proviennent de MIMIC-IV :\n",
+    "- labevents\n",
+    "- microbiologyevents\n",
+    "- d_labitems"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### d_labitems"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "d_labitems contient le libellé des examens de biologie.  \n",
+    "Chaque libellé a un identifiant unique `itemid`. Il est utile à l'identification des examens dans la table labevents.\n",
+    "\n",
+    "Plus de détail ici : https://mimic.mit.edu/docs/iv/modules/hosp/d_labitems/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>itemid</th>\n",
+       "      <th>label</th>\n",
+       "      <th>fluid</th>\n",
+       "      <th>category</th>\n",
+       "      <th>loinc_code</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>51905</td>\n",
+       "      <td></td>\n",
+       "      <td>Other Body Fluid</td>\n",
+       "      <td>Chemistry</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>51532</td>\n",
+       "      <td>11-Deoxycorticosterone</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Chemistry</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>51957</td>\n",
+       "      <td>17-Hydroxycorticosteroids</td>\n",
+       "      <td>Urine</td>\n",
+       "      <td>Chemistry</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>51958</td>\n",
+       "      <td>17-Ketosteroids, Urine</td>\n",
+       "      <td>Urine</td>\n",
+       "      <td>Chemistry</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>52068</td>\n",
+       "      <td>24 Hr</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Hematology</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>5</td>\n",
+       "      <td>51066</td>\n",
+       "      <td>24 hr Calcium</td>\n",
+       "      <td>Urine</td>\n",
+       "      <td>Chemistry</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>6</td>\n",
+       "      <td>51067</td>\n",
+       "      <td>24 hr Creatinine</td>\n",
+       "      <td>Urine</td>\n",
+       "      <td>Chemistry</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>7</td>\n",
+       "      <td>51068</td>\n",
+       "      <td>24 hr Protein</td>\n",
+       "      <td>Urine</td>\n",
+       "      <td>Chemistry</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>8</td>\n",
+       "      <td>50853</td>\n",
+       "      <td>25-OH Vitamin D</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Chemistry</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>9</td>\n",
+       "      <td>51533</td>\n",
+       "      <td>3t</td>\n",
+       "      <td>Blood</td>\n",
+       "      <td>Chemistry</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index  itemid                      label             fluid    category  \\\n",
+       "0      0   51905                             Other Body Fluid   Chemistry   \n",
+       "1      1   51532     11-Deoxycorticosterone             Blood   Chemistry   \n",
+       "2      2   51957  17-Hydroxycorticosteroids             Urine   Chemistry   \n",
+       "3      3   51958     17-Ketosteroids, Urine             Urine   Chemistry   \n",
+       "4      4   52068                      24 Hr             Blood  Hematology   \n",
+       "5      5   51066              24 hr Calcium             Urine   Chemistry   \n",
+       "6      6   51067           24 hr Creatinine             Urine   Chemistry   \n",
+       "7      7   51068              24 hr Protein             Urine   Chemistry   \n",
+       "8      8   50853            25-OH Vitamin D             Blood   Chemistry   \n",
+       "9      9   51533                         3t             Blood   Chemistry   \n",
+       "\n",
+       "  loinc_code  \n",
+       "0       None  \n",
+       "1       None  \n",
+       "2       None  \n",
+       "3       None  \n",
+       "4       None  \n",
+       "5       None  \n",
+       "6       None  \n",
+       "7       None  \n",
+       "8       None  \n",
+       "9       None  "
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"SELECT * FROM d_labitems LIMIT 10\", conn)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### labevents"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "labevents contient les examents de biologie ainsi que leurs résultat.\n",
+    "Chaque examen a deux dates associés :\n",
+    "- charttime qui est la date de réception de l'examen, on peux la considérer comme une approximation de la date de prélèvement\n",
+    "- storetime date de mise à disposition du résultat de l'examen, selon l'examen celle-ci est plus où moins espacé de la date de prélèvement\n",
+    "\n",
+    "Chaque ligne correspond au résultat d'un examen (par exemple, dosage du potassium, identifié par `itemid`) et est identifié par un identifiant unique `labevent_id`. Plusieurs examens peuvent être effectué sur le même prélèvement sanguin.  \n",
+    "Chaque prélèvement est identifié par un identifiant unique `specimen_id`.  \n",
+    "Enfin, `subject_id` identifie le patient, `hadm_id` l'identifiant de séjour lorsque le patient a été hospitalisé au décours de son passage aux urgences et `stay_id` l'identifiant du passage aux urgences.\n",
+    "\n",
+    "Plus de détail ici : https://mimic.mit.edu/docs/iv/modules/hosp/labevents/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>labevent_id</th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>hadm_id</th>\n",
+       "      <th>specimen_id</th>\n",
+       "      <th>itemid</th>\n",
+       "      <th>charttime</th>\n",
+       "      <th>storetime</th>\n",
+       "      <th>value</th>\n",
+       "      <th>valuenum</th>\n",
+       "      <th>valueuom</th>\n",
+       "      <th>ref_range_lower</th>\n",
+       "      <th>ref_range_upper</th>\n",
+       "      <th>flag</th>\n",
+       "      <th>priority</th>\n",
+       "      <th>comments</th>\n",
+       "      <th>stay_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2599</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>None</td>\n",
+       "      <td>76601642</td>\n",
+       "      <td>50971</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 21:04:00</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>mEq/L</td>\n",
+       "      <td>3.3</td>\n",
+       "      <td>5.1</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2578</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>None</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51146</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2579</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>None</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51200</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2580</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>None</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51221</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>40.2</td>\n",
+       "      <td>40.2</td>\n",
+       "      <td>%</td>\n",
+       "      <td>40.0</td>\n",
+       "      <td>52.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2581</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>None</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51222</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>14.1</td>\n",
+       "      <td>14.1</td>\n",
+       "      <td>g/dL</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2582</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>None</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51244</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>%</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>abnormal</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>6</td>\n",
+       "      <td>2583</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>None</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51248</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>32.8</td>\n",
+       "      <td>32.8</td>\n",
+       "      <td>pg</td>\n",
+       "      <td>27.0</td>\n",
+       "      <td>32.0</td>\n",
+       "      <td>abnormal</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>7</td>\n",
+       "      <td>2584</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>None</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51249</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>35.1</td>\n",
+       "      <td>35.1</td>\n",
+       "      <td>%</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>abnormal</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>8</td>\n",
+       "      <td>2585</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>None</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51250</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>93</td>\n",
+       "      <td>93.0</td>\n",
+       "      <td>fL</td>\n",
+       "      <td>82.0</td>\n",
+       "      <td>98.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>9</td>\n",
+       "      <td>2586</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>None</td>\n",
+       "      <td>6228584</td>\n",
+       "      <td>51254</td>\n",
+       "      <td>2132-10-14 20:15:00</td>\n",
+       "      <td>2132-10-14 20:33:00</td>\n",
+       "      <td>3.8</td>\n",
+       "      <td>3.8</td>\n",
+       "      <td>%</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>STAT</td>\n",
+       "      <td>None</td>\n",
+       "      <td>35420907</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index  labevent_id  subject_id hadm_id  specimen_id  itemid  \\\n",
+       "0      0         2599    10000764    None     76601642   50971   \n",
+       "1      1         2578    10000764    None      6228584   51146   \n",
+       "2      2         2579    10000764    None      6228584   51200   \n",
+       "3      3         2580    10000764    None      6228584   51221   \n",
+       "4      4         2581    10000764    None      6228584   51222   \n",
+       "5      5         2582    10000764    None      6228584   51244   \n",
+       "6      6         2583    10000764    None      6228584   51248   \n",
+       "7      7         2584    10000764    None      6228584   51249   \n",
+       "8      8         2585    10000764    None      6228584   51250   \n",
+       "9      9         2586    10000764    None      6228584   51254   \n",
+       "\n",
+       "             charttime            storetime value  valuenum valueuom  \\\n",
+       "0  2132-10-14 20:15:00  2132-10-14 21:04:00   4.1       4.1    mEq/L   \n",
+       "1  2132-10-14 20:15:00  2132-10-14 20:33:00   0.1       0.1        %   \n",
+       "2  2132-10-14 20:15:00  2132-10-14 20:33:00   0.3       0.3        %   \n",
+       "3  2132-10-14 20:15:00  2132-10-14 20:33:00  40.2      40.2        %   \n",
+       "4  2132-10-14 20:15:00  2132-10-14 20:33:00  14.1      14.1     g/dL   \n",
+       "5  2132-10-14 20:15:00  2132-10-14 20:33:00   4.7       4.7        %   \n",
+       "6  2132-10-14 20:15:00  2132-10-14 20:33:00  32.8      32.8       pg   \n",
+       "7  2132-10-14 20:15:00  2132-10-14 20:33:00  35.1      35.1        %   \n",
+       "8  2132-10-14 20:15:00  2132-10-14 20:33:00    93      93.0       fL   \n",
+       "9  2132-10-14 20:15:00  2132-10-14 20:33:00   3.8       3.8        %   \n",
+       "\n",
+       "   ref_range_lower  ref_range_upper      flag priority comments   stay_id  \n",
+       "0              3.3              5.1      None     STAT     None  35420907  \n",
+       "1              0.0              2.0      None     STAT     None  35420907  \n",
+       "2              0.0              4.0      None     STAT     None  35420907  \n",
+       "3             40.0             52.0      None     STAT     None  35420907  \n",
+       "4             14.0             18.0      None     STAT     None  35420907  \n",
+       "5             18.0             42.0  abnormal     STAT     None  35420907  \n",
+       "6             27.0             32.0  abnormal     STAT     None  35420907  \n",
+       "7             31.0             35.0  abnormal     STAT     None  35420907  \n",
+       "8             82.0             98.0      None     STAT     None  35420907  \n",
+       "9              2.0             11.0      None     STAT     None  35420907  "
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"SELECT * FROM labevents LIMIT 10\", conn)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### microbiologyevents"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Parmis les examens de biologie, certains revêtent d'un statut particulier, il s'agit des examens de microbiologie, c'est à dire les examens recherchant la présence de bactérie dans un prélèvement biologique (sang, urine, selle, liquide céphalo-rachidien ...).\n",
+    "\n",
+    "Ils sont enregistrés dans une table dédié : microbiologyevents  \n",
+    "Le champs \"spec_type_desc\" contient le site de prélèvement, souvent le sang (BLOOD CULTURE) où les urines (URINES) (cf plus bas).\n",
+    "\n",
+    "L'interprétation de ces examens faisant l'objet de spécificté, on retrouve de nombreuses colonnes spécifiant : la valeur quantitative de l'analyse, son interprétation, le nimbre de résistance de la bactérie identifié ... En première intention, il semble qu'il s'agit de champs que nous n'aurons pas à exploiter.\n",
+    "\n",
+    "Plus de détail ici : https://mimic.mit.edu/docs/iv/modules/hosp/microbiologyevents/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>microevent_id</th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>hadm_id</th>\n",
+       "      <th>micro_specimen_id</th>\n",
+       "      <th>chartdate</th>\n",
+       "      <th>charttime</th>\n",
+       "      <th>spec_itemid</th>\n",
+       "      <th>spec_type_desc</th>\n",
+       "      <th>test_seq</th>\n",
+       "      <th>...</th>\n",
+       "      <th>isolate_num</th>\n",
+       "      <th>quantity</th>\n",
+       "      <th>ab_itemid</th>\n",
+       "      <th>ab_name</th>\n",
+       "      <th>dilution_text</th>\n",
+       "      <th>dilution_comparison</th>\n",
+       "      <th>dilution_value</th>\n",
+       "      <th>interpretation</th>\n",
+       "      <th>comments</th>\n",
+       "      <th>stay_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1562</td>\n",
+       "      <td>10004235</td>\n",
+       "      <td>None</td>\n",
+       "      <td>139903</td>\n",
+       "      <td>2196-02-24 00:00:00</td>\n",
+       "      <td>2196-02-24 12:40:00</td>\n",
+       "      <td>70012</td>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>90013.0</td>\n",
+       "      <td>TOBRAMYCIN</td>\n",
+       "      <td>&lt;=1</td>\n",
+       "      <td>&lt;=</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>S</td>\n",
+       "      <td>None</td>\n",
+       "      <td>38926302</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1558</td>\n",
+       "      <td>10004235</td>\n",
+       "      <td>None</td>\n",
+       "      <td>139903</td>\n",
+       "      <td>2196-02-24 00:00:00</td>\n",
+       "      <td>2196-02-24 12:40:00</td>\n",
+       "      <td>70012</td>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>90004.0</td>\n",
+       "      <td>AMPICILLIN</td>\n",
+       "      <td>8</td>\n",
+       "      <td>=</td>\n",
+       "      <td>8.00</td>\n",
+       "      <td>S</td>\n",
+       "      <td>None</td>\n",
+       "      <td>38926302</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1559</td>\n",
+       "      <td>10004235</td>\n",
+       "      <td>None</td>\n",
+       "      <td>139903</td>\n",
+       "      <td>2196-02-24 00:00:00</td>\n",
+       "      <td>2196-02-24 12:40:00</td>\n",
+       "      <td>70012</td>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>90005.0</td>\n",
+       "      <td>CEFAZOLIN</td>\n",
+       "      <td>&lt;=4</td>\n",
+       "      <td>&lt;=</td>\n",
+       "      <td>4.00</td>\n",
+       "      <td>S</td>\n",
+       "      <td>None</td>\n",
+       "      <td>38926302</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>1560</td>\n",
+       "      <td>10004235</td>\n",
+       "      <td>None</td>\n",
+       "      <td>139903</td>\n",
+       "      <td>2196-02-24 00:00:00</td>\n",
+       "      <td>2196-02-24 12:40:00</td>\n",
+       "      <td>70012</td>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>90008.0</td>\n",
+       "      <td>TRIMETHOPRIM/SULFA</td>\n",
+       "      <td>&lt;=1</td>\n",
+       "      <td>&lt;=</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>S</td>\n",
+       "      <td>None</td>\n",
+       "      <td>38926302</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>1561</td>\n",
+       "      <td>10004235</td>\n",
+       "      <td>None</td>\n",
+       "      <td>139903</td>\n",
+       "      <td>2196-02-24 00:00:00</td>\n",
+       "      <td>2196-02-24 12:40:00</td>\n",
+       "      <td>70012</td>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>90012.0</td>\n",
+       "      <td>GENTAMICIN</td>\n",
+       "      <td>&lt;=1</td>\n",
+       "      <td>&lt;=</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>S</td>\n",
+       "      <td>None</td>\n",
+       "      <td>38926302</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>5</td>\n",
+       "      <td>1563</td>\n",
+       "      <td>10004235</td>\n",
+       "      <td>None</td>\n",
+       "      <td>139903</td>\n",
+       "      <td>2196-02-24 00:00:00</td>\n",
+       "      <td>2196-02-24 12:40:00</td>\n",
+       "      <td>70012</td>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>90017.0</td>\n",
+       "      <td>CEFTAZIDIME</td>\n",
+       "      <td>&lt;=1</td>\n",
+       "      <td>&lt;=</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>S</td>\n",
+       "      <td>None</td>\n",
+       "      <td>38926302</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>6</td>\n",
+       "      <td>1564</td>\n",
+       "      <td>10004235</td>\n",
+       "      <td>None</td>\n",
+       "      <td>139903</td>\n",
+       "      <td>2196-02-24 00:00:00</td>\n",
+       "      <td>2196-02-24 12:40:00</td>\n",
+       "      <td>70012</td>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>90018.0</td>\n",
+       "      <td>CEFTRIAXONE</td>\n",
+       "      <td>&lt;=1</td>\n",
+       "      <td>&lt;=</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>S</td>\n",
+       "      <td>None</td>\n",
+       "      <td>38926302</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>7</td>\n",
+       "      <td>1565</td>\n",
+       "      <td>10004235</td>\n",
+       "      <td>None</td>\n",
+       "      <td>139903</td>\n",
+       "      <td>2196-02-24 00:00:00</td>\n",
+       "      <td>2196-02-24 12:40:00</td>\n",
+       "      <td>70012</td>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>90019.0</td>\n",
+       "      <td>CIPROFLOXACIN</td>\n",
+       "      <td>&lt;=0.25</td>\n",
+       "      <td>&lt;=</td>\n",
+       "      <td>0.25</td>\n",
+       "      <td>S</td>\n",
+       "      <td>None</td>\n",
+       "      <td>38926302</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>8</td>\n",
+       "      <td>1566</td>\n",
+       "      <td>10004235</td>\n",
+       "      <td>None</td>\n",
+       "      <td>139903</td>\n",
+       "      <td>2196-02-24 00:00:00</td>\n",
+       "      <td>2196-02-24 12:40:00</td>\n",
+       "      <td>70012</td>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>90022.0</td>\n",
+       "      <td>AMPICILLIN/SULBACTAM</td>\n",
+       "      <td>&lt;=2</td>\n",
+       "      <td>&lt;=</td>\n",
+       "      <td>2.00</td>\n",
+       "      <td>S</td>\n",
+       "      <td>None</td>\n",
+       "      <td>38926302</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>9</td>\n",
+       "      <td>1567</td>\n",
+       "      <td>10004235</td>\n",
+       "      <td>None</td>\n",
+       "      <td>139903</td>\n",
+       "      <td>2196-02-24 00:00:00</td>\n",
+       "      <td>2196-02-24 12:40:00</td>\n",
+       "      <td>70012</td>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>90026.0</td>\n",
+       "      <td>PIPERACILLIN/TAZO</td>\n",
+       "      <td>&lt;=4</td>\n",
+       "      <td>&lt;=</td>\n",
+       "      <td>4.00</td>\n",
+       "      <td>S</td>\n",
+       "      <td>None</td>\n",
+       "      <td>38926302</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index  microevent_id  subject_id hadm_id  micro_specimen_id  \\\n",
+       "0      0           1562    10004235    None             139903   \n",
+       "1      1           1558    10004235    None             139903   \n",
+       "2      2           1559    10004235    None             139903   \n",
+       "3      3           1560    10004235    None             139903   \n",
+       "4      4           1561    10004235    None             139903   \n",
+       "5      5           1563    10004235    None             139903   \n",
+       "6      6           1564    10004235    None             139903   \n",
+       "7      7           1565    10004235    None             139903   \n",
+       "8      8           1566    10004235    None             139903   \n",
+       "9      9           1567    10004235    None             139903   \n",
+       "\n",
+       "             chartdate            charttime  spec_itemid spec_type_desc  \\\n",
+       "0  2196-02-24 00:00:00  2196-02-24 12:40:00        70012  BLOOD CULTURE   \n",
+       "1  2196-02-24 00:00:00  2196-02-24 12:40:00        70012  BLOOD CULTURE   \n",
+       "2  2196-02-24 00:00:00  2196-02-24 12:40:00        70012  BLOOD CULTURE   \n",
+       "3  2196-02-24 00:00:00  2196-02-24 12:40:00        70012  BLOOD CULTURE   \n",
+       "4  2196-02-24 00:00:00  2196-02-24 12:40:00        70012  BLOOD CULTURE   \n",
+       "5  2196-02-24 00:00:00  2196-02-24 12:40:00        70012  BLOOD CULTURE   \n",
+       "6  2196-02-24 00:00:00  2196-02-24 12:40:00        70012  BLOOD CULTURE   \n",
+       "7  2196-02-24 00:00:00  2196-02-24 12:40:00        70012  BLOOD CULTURE   \n",
+       "8  2196-02-24 00:00:00  2196-02-24 12:40:00        70012  BLOOD CULTURE   \n",
+       "9  2196-02-24 00:00:00  2196-02-24 12:40:00        70012  BLOOD CULTURE   \n",
+       "\n",
+       "   test_seq  ... isolate_num quantity  ab_itemid               ab_name  \\\n",
+       "0         1  ...         1.0     None    90013.0            TOBRAMYCIN   \n",
+       "1         1  ...         1.0     None    90004.0            AMPICILLIN   \n",
+       "2         1  ...         1.0     None    90005.0             CEFAZOLIN   \n",
+       "3         1  ...         1.0     None    90008.0    TRIMETHOPRIM/SULFA   \n",
+       "4         1  ...         1.0     None    90012.0            GENTAMICIN   \n",
+       "5         1  ...         1.0     None    90017.0           CEFTAZIDIME   \n",
+       "6         1  ...         1.0     None    90018.0           CEFTRIAXONE   \n",
+       "7         1  ...         1.0     None    90019.0         CIPROFLOXACIN   \n",
+       "8         1  ...         1.0     None    90022.0  AMPICILLIN/SULBACTAM   \n",
+       "9         1  ...         1.0     None    90026.0     PIPERACILLIN/TAZO   \n",
+       "\n",
+       "   dilution_text dilution_comparison  dilution_value interpretation  comments  \\\n",
+       "0            <=1          <=                    1.00              S      None   \n",
+       "1              8          =                     8.00              S      None   \n",
+       "2            <=4          <=                    4.00              S      None   \n",
+       "3            <=1          <=                    1.00              S      None   \n",
+       "4            <=1          <=                    1.00              S      None   \n",
+       "5            <=1          <=                    1.00              S      None   \n",
+       "6            <=1          <=                    1.00              S      None   \n",
+       "7         <=0.25          <=                    0.25              S      None   \n",
+       "8            <=2          <=                    2.00              S      None   \n",
+       "9            <=4          <=                    4.00              S      None   \n",
+       "\n",
+       "    stay_id  \n",
+       "0  38926302  \n",
+       "1  38926302  \n",
+       "2  38926302  \n",
+       "3  38926302  \n",
+       "4  38926302  \n",
+       "5  38926302  \n",
+       "6  38926302  \n",
+       "7  38926302  \n",
+       "8  38926302  \n",
+       "9  38926302  \n",
+       "\n",
+       "[10 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"SELECT * FROM microbiologyevents LIMIT 10\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>spec_type_desc</th>\n",
+       "      <th>total</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>53</th>\n",
+       "      <td>URINE</td>\n",
+       "      <td>186509</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>BLOOD CULTURE</td>\n",
+       "      <td>177616</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>47</th>\n",
+       "      <td>SWAB</td>\n",
+       "      <td>15476</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>PERITONEAL FLUID</td>\n",
+       "      <td>5923</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>45</th>\n",
+       "      <td>STOOL</td>\n",
+       "      <td>5426</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>CSF;SPINAL FLUID</td>\n",
+       "      <td>3847</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>ABSCESS</td>\n",
+       "      <td>3011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>JOINT FLUID</td>\n",
+       "      <td>2341</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52</th>\n",
+       "      <td>TISSUE</td>\n",
+       "      <td>1825</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>SPUTUM</td>\n",
+       "      <td>1587</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      spec_type_desc   total\n",
+       "53             URINE  186509\n",
+       "6      BLOOD CULTURE  177616\n",
+       "47              SWAB   15476\n",
+       "35  PERITONEAL FLUID    5923\n",
+       "45             STOOL    5426\n",
+       "18  CSF;SPINAL FLUID    3847\n",
+       "1            ABSCESS    3011\n",
+       "32       JOINT FLUID    2341\n",
+       "52            TISSUE    1825\n",
+       "44            SPUTUM    1587"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"SELECT spec_type_desc, COUNT(0) total FROM microbiologyevents GROUP BY spec_type_desc\", conn).sort_values(\"total\", ascending=False).head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### edstays"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "La table edstays contient la liste de tout les passages aux urgences.  \n",
+    "Sont enregistré : l'identifiant du patient, l'identifiant du séjour si le patient est hospitalisé en aval de son passage aux urgences, l'identifiant du passage aux urgences, la date et heure d'entrée et sortie.\n",
+    "\n",
+    "Plus de détail ici : https://mimic.mit.edu/docs/iv/modules/ed/edstays/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>hadm_id</th>\n",
+       "      <th>stay_id</th>\n",
+       "      <th>intime</th>\n",
+       "      <th>outtime</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>10000115</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>38081480</td>\n",
+       "      <td>2154-12-10 02:04:00</td>\n",
+       "      <td>2154-12-10 02:16:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10000115</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>30295111</td>\n",
+       "      <td>2154-12-17 16:37:00</td>\n",
+       "      <td>2154-12-17 17:38:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>10000473</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>33267868</td>\n",
+       "      <td>2138-03-15 20:07:00</td>\n",
+       "      <td>2138-03-15 20:26:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>10000764</td>\n",
+       "      <td>27897940.0</td>\n",
+       "      <td>35420907</td>\n",
+       "      <td>2132-10-14 19:31:00</td>\n",
+       "      <td>2132-10-14 23:32:59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>10001038</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>34301067</td>\n",
+       "      <td>2149-08-07 08:51:00</td>\n",
+       "      <td>2149-08-07 08:55:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>5</td>\n",
+       "      <td>10001757</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>30596003</td>\n",
+       "      <td>2170-10-15 11:06:00</td>\n",
+       "      <td>2170-10-15 11:26:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>6</td>\n",
+       "      <td>10002563</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>36308013</td>\n",
+       "      <td>2192-11-08 14:00:00</td>\n",
+       "      <td>2192-11-08 14:23:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>7</td>\n",
+       "      <td>10003299</td>\n",
+       "      <td>28891311.0</td>\n",
+       "      <td>33780565</td>\n",
+       "      <td>2178-12-10 17:04:00</td>\n",
+       "      <td>2178-12-10 21:36:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>8</td>\n",
+       "      <td>10003299</td>\n",
+       "      <td>21404960.0</td>\n",
+       "      <td>39680380</td>\n",
+       "      <td>2179-06-30 09:18:00</td>\n",
+       "      <td>2179-06-30 16:35:45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>9</td>\n",
+       "      <td>10003299</td>\n",
+       "      <td>29323205.0</td>\n",
+       "      <td>32908139</td>\n",
+       "      <td>2181-10-22 11:46:00</td>\n",
+       "      <td>2181-10-22 19:09:15</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index  subject_id     hadm_id   stay_id               intime  \\\n",
+       "0      0    10000115         NaN  38081480  2154-12-10 02:04:00   \n",
+       "1      1    10000115         NaN  30295111  2154-12-17 16:37:00   \n",
+       "2      2    10000473         NaN  33267868  2138-03-15 20:07:00   \n",
+       "3      3    10000764  27897940.0  35420907  2132-10-14 19:31:00   \n",
+       "4      4    10001038         NaN  34301067  2149-08-07 08:51:00   \n",
+       "5      5    10001757         NaN  30596003  2170-10-15 11:06:00   \n",
+       "6      6    10002563         NaN  36308013  2192-11-08 14:00:00   \n",
+       "7      7    10003299  28891311.0  33780565  2178-12-10 17:04:00   \n",
+       "8      8    10003299  21404960.0  39680380  2179-06-30 09:18:00   \n",
+       "9      9    10003299  29323205.0  32908139  2181-10-22 11:46:00   \n",
+       "\n",
+       "               outtime  \n",
+       "0  2154-12-10 02:16:00  \n",
+       "1  2154-12-17 17:38:00  \n",
+       "2  2138-03-15 20:26:00  \n",
+       "3  2132-10-14 23:32:59  \n",
+       "4  2149-08-07 08:55:00  \n",
+       "5  2170-10-15 11:26:00  \n",
+       "6  2192-11-08 14:23:00  \n",
+       "7  2178-12-10 21:36:00  \n",
+       "8  2179-06-30 16:35:45  \n",
+       "9  2181-10-22 19:09:15  "
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"SELECT * FROM edstays LIMIT 10\", conn)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### patients"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "La table patient contient la liste des patients ainsi que les information relative à leurs age et genre.  \n",
+    "La base MIMIC-IV étant pseudonymisé, toutes les dates ont été falsifié, la table patient nous donne une approximation de la véritable date des enregistrement dans le champs anchor_year_group.\n",
+    "\n",
+    "Plus d'informations ici : https://mimic.mit.edu/docs/iv/modules/core/patients/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>anchor_age</th>\n",
+       "      <th>anchor_year</th>\n",
+       "      <th>anchor_year_group</th>\n",
+       "      <th>dod</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>10021917</td>\n",
+       "      <td>M</td>\n",
+       "      <td>54</td>\n",
+       "      <td>2147</td>\n",
+       "      <td>2017 - 2019</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10033879</td>\n",
+       "      <td>F</td>\n",
+       "      <td>28</td>\n",
+       "      <td>2173</td>\n",
+       "      <td>2011 - 2013</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>10036909</td>\n",
+       "      <td>M</td>\n",
+       "      <td>50</td>\n",
+       "      <td>2167</td>\n",
+       "      <td>2011 - 2013</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>10050355</td>\n",
+       "      <td>F</td>\n",
+       "      <td>55</td>\n",
+       "      <td>2168</td>\n",
+       "      <td>2014 - 2016</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>10050389</td>\n",
+       "      <td>M</td>\n",
+       "      <td>43</td>\n",
+       "      <td>2133</td>\n",
+       "      <td>2014 - 2016</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>5</td>\n",
+       "      <td>10050411</td>\n",
+       "      <td>F</td>\n",
+       "      <td>52</td>\n",
+       "      <td>2150</td>\n",
+       "      <td>2017 - 2019</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>6</td>\n",
+       "      <td>10068290</td>\n",
+       "      <td>M</td>\n",
+       "      <td>29</td>\n",
+       "      <td>2151</td>\n",
+       "      <td>2014 - 2016</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>7</td>\n",
+       "      <td>10127166</td>\n",
+       "      <td>F</td>\n",
+       "      <td>51</td>\n",
+       "      <td>2149</td>\n",
+       "      <td>2008 - 2010</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>8</td>\n",
+       "      <td>10143090</td>\n",
+       "      <td>M</td>\n",
+       "      <td>48</td>\n",
+       "      <td>2155</td>\n",
+       "      <td>2011 - 2013</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>9</td>\n",
+       "      <td>10148710</td>\n",
+       "      <td>M</td>\n",
+       "      <td>67</td>\n",
+       "      <td>2137</td>\n",
+       "      <td>2008 - 2010</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index  subject_id gender  anchor_age  anchor_year anchor_year_group   dod\n",
+       "0      0    10021917      M          54         2147       2017 - 2019  None\n",
+       "1      1    10033879      F          28         2173       2011 - 2013  None\n",
+       "2      2    10036909      M          50         2167       2011 - 2013  None\n",
+       "3      3    10050355      F          55         2168       2014 - 2016  None\n",
+       "4      4    10050389      M          43         2133       2014 - 2016  None\n",
+       "5      5    10050411      F          52         2150       2017 - 2019  None\n",
+       "6      6    10068290      M          29         2151       2014 - 2016  None\n",
+       "7      7    10127166      F          51         2149       2008 - 2010  None\n",
+       "8      8    10143090      M          48         2155       2011 - 2013  None\n",
+       "9      9    10148710      M          67         2137       2008 - 2010  None"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"SELECT * FROM patients LIMIT 10\", conn)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### triage"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "La table triage contient les données de \"tri\" des patients.  \n",
+    "Le circuit classique d'un passage aux urgences est le suivant :\n",
+    "- Enregistrement administratif du patient\n",
+    "- Patient reçu rapidement par un infirmier (en France on parle d'infirmier d'accueil et d'orientation, où IAO)\n",
+    "- Un niveau de sévèrité est attribué au patient par l'IAO\n",
+    "- Le médecin voit ensuite le patient, dès qu'il le peut, par ordre de priorité et d'attente\n",
+    "- Si le médecin a besoin de plus d'éléments, il prescrit des examens complémentaires au patient\n",
+    "\n",
+    "Ainsi, l'ensemble des informations disponible à l'arrivé du patient sont enregistré dans cette table."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Le champs chiefcomplaint est un champs de texte libre, le reste contient des variables quantitative de paramètre vitaux recueillis par l'IAO.  \n",
+    "Ce champs peux contenir une liste, qui est alors séparé par une virgule.  \n",
+    "Il s'agit d'un champs qui a fait l'objet d'une désidentification, ainsi si une information personnelle était présente dans la liste, elle est remplacé par le caractère \"___\" (3 underscore à la suite).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>stay_id</th>\n",
+       "      <th>temperature</th>\n",
+       "      <th>heartrate</th>\n",
+       "      <th>resprate</th>\n",
+       "      <th>o2sat</th>\n",
+       "      <th>sbp</th>\n",
+       "      <th>dbp</th>\n",
+       "      <th>pain</th>\n",
+       "      <th>acuity</th>\n",
+       "      <th>chiefcomplaint</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>15585360</td>\n",
+       "      <td>37573921</td>\n",
+       "      <td>97.0</td>\n",
+       "      <td>87.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>71.0</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>€‡</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>17192424</td>\n",
+       "      <td>34160628</td>\n",
+       "      <td>98.6</td>\n",
+       "      <td>82.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>111.0</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>‡</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>15248757</td>\n",
+       "      <td>32172727</td>\n",
+       "      <td>97.1</td>\n",
+       "      <td>112.0</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>147.0</td>\n",
+       "      <td>97.0</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>ˆ‡</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>16648037</td>\n",
+       "      <td>38946064</td>\n",
+       "      <td>98.5</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>99.0</td>\n",
+       "      <td>160.0</td>\n",
+       "      <td>86.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>‰</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>13492931</td>\n",
+       "      <td>39828574</td>\n",
+       "      <td>100.6</td>\n",
+       "      <td>90.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>96.0</td>\n",
+       "      <td>107.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>'</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>5</td>\n",
+       "      <td>11475777</td>\n",
+       "      <td>38193311</td>\n",
+       "      <td>97.1</td>\n",
+       "      <td>85.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>138.0</td>\n",
+       "      <td>86.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>-</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>6</td>\n",
+       "      <td>16615356</td>\n",
+       "      <td>38143467</td>\n",
+       "      <td>98.5</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>94.0</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>-</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>7</td>\n",
+       "      <td>17748455</td>\n",
+       "      <td>33063682</td>\n",
+       "      <td>99.4</td>\n",
+       "      <td>152.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>96.0</td>\n",
+       "      <td>136.0</td>\n",
+       "      <td>77.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>-</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>8</td>\n",
+       "      <td>13718556</td>\n",
+       "      <td>30494050</td>\n",
+       "      <td>98.5</td>\n",
+       "      <td>84.0</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>124.0</td>\n",
+       "      <td>92.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>-</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>9</td>\n",
+       "      <td>13908077</td>\n",
+       "      <td>39580901</td>\n",
+       "      <td>97.0</td>\n",
+       "      <td>96.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>98.0</td>\n",
+       "      <td>129.0</td>\n",
+       "      <td>44.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>-</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index  subject_id   stay_id  temperature  heartrate  resprate  o2sat  \\\n",
+       "0      0    15585360  37573921         97.0       87.0      18.0  100.0   \n",
+       "1      1    17192424  34160628         98.6       82.0       NaN  100.0   \n",
+       "2      2    15248757  32172727         97.1      112.0      20.0  100.0   \n",
+       "3      3    16648037  38946064         98.5       59.0      18.0   99.0   \n",
+       "4      4    13492931  39828574        100.6       90.0      16.0   96.0   \n",
+       "5      5    11475777  38193311         97.1       85.0      16.0  100.0   \n",
+       "6      6    16615356  38143467         98.5      100.0      16.0  100.0   \n",
+       "7      7    17748455  33063682         99.4      152.0      18.0   96.0   \n",
+       "8      8    13718556  30494050         98.5       84.0      14.0  100.0   \n",
+       "9      9    13908077  39580901         97.0       96.0      18.0   98.0   \n",
+       "\n",
+       "     sbp   dbp  pain  acuity chiefcomplaint  \n",
+       "0  150.0  71.0  10.0     3.0            €‡  \n",
+       "1  111.0  81.0   3.0     3.0              ‡  \n",
+       "2  147.0  97.0   8.0     4.0             ˆ‡  \n",
+       "3  160.0  86.0   2.0     2.0              ‰  \n",
+       "4  107.0  55.0   0.0     3.0              '  \n",
+       "5  138.0  86.0   7.0     3.0              -  \n",
+       "6  130.0  94.0   8.0     3.0              -  \n",
+       "7  136.0  77.0   0.0     1.0              -  \n",
+       "8  124.0  92.0   4.0     3.0              -  \n",
+       "9  129.0  44.0   4.0     2.0              -  "
+      ]
+     },
+     "execution_count": 77,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"SELECT * FROM triage LIMIT 10\", conn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>stay_id</th>\n",
+       "      <th>temperature</th>\n",
+       "      <th>heartrate</th>\n",
+       "      <th>resprate</th>\n",
+       "      <th>o2sat</th>\n",
+       "      <th>sbp</th>\n",
+       "      <th>dbp</th>\n",
+       "      <th>pain</th>\n",
+       "      <th>acuity</th>\n",
+       "      <th>chiefcomplaint</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1305</td>\n",
+       "      <td>12601251</td>\n",
+       "      <td>34627547</td>\n",
+       "      <td>98.3</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>172.0</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>Abdominal distention, Abd pain, Dyspnea on exe...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1308</td>\n",
+       "      <td>16663465</td>\n",
+       "      <td>36113926</td>\n",
+       "      <td>98.8</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>98.0</td>\n",
+       "      <td>176.0</td>\n",
+       "      <td>71.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>Abdominal distention, Abd pain, S/P KIDNEY TRA...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1331</td>\n",
+       "      <td>14248362</td>\n",
+       "      <td>34323484</td>\n",
+       "      <td>98.3</td>\n",
+       "      <td>90.0</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>98.0</td>\n",
+       "      <td>140.0</td>\n",
+       "      <td>70.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>Abdominal distention, Abnormal labs, Jaundice,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1332</td>\n",
+       "      <td>11493670</td>\n",
+       "      <td>35944304</td>\n",
+       "      <td>96.4</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>99.0</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>Abdominal distention, Abnormal labs, Jaundice,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1340</td>\n",
+       "      <td>15032149</td>\n",
+       "      <td>34358091</td>\n",
+       "      <td>96.6</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>128.0</td>\n",
+       "      <td>77.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>Abdominal distention, Abnormal sodium level, A...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1341</td>\n",
+       "      <td>18160217</td>\n",
+       "      <td>37565547</td>\n",
+       "      <td>98.1</td>\n",
+       "      <td>82.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>116.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>Abdominal distention, Abnormal sodium level, T...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1345</td>\n",
+       "      <td>14539683</td>\n",
+       "      <td>30348440</td>\n",
+       "      <td>96.9</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>128.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>Abdominal distention, Altered mental status, J...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>1346</td>\n",
+       "      <td>12491283</td>\n",
+       "      <td>39692564</td>\n",
+       "      <td>97.7</td>\n",
+       "      <td>88.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>99.0</td>\n",
+       "      <td>116.0</td>\n",
+       "      <td>70.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>Abdominal distention, Altered mental status, J...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1350</td>\n",
+       "      <td>16220647</td>\n",
+       "      <td>39977711</td>\n",
+       "      <td>98.3</td>\n",
+       "      <td>94.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>121.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>Abdominal distention, Anemia, Dyspnea, RUQ abd...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>1368</td>\n",
+       "      <td>14912902</td>\n",
+       "      <td>36669324</td>\n",
+       "      <td>98.4</td>\n",
+       "      <td>119.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>96.0</td>\n",
+       "      <td>124.0</td>\n",
+       "      <td>76.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>Abdominal distention, Back pain, L Shoulder pa...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index  subject_id   stay_id  temperature  heartrate  resprate  o2sat  \\\n",
+       "0   1305    12601251  34627547         98.3       81.0      16.0  100.0   \n",
+       "1   1308    16663465  36113926         98.8       58.0      18.0   98.0   \n",
+       "2   1331    14248362  34323484         98.3       90.0      17.0   98.0   \n",
+       "3   1332    11493670  35944304         96.4       80.0      17.0   99.0   \n",
+       "4   1340    15032149  34358091         96.6       51.0      20.0  100.0   \n",
+       "5   1341    18160217  37565547         98.1       82.0      16.0  100.0   \n",
+       "6   1345    14539683  30348440         96.9       80.0      14.0  100.0   \n",
+       "7   1346    12491283  39692564         97.7       88.0      18.0   99.0   \n",
+       "8   1350    16220647  39977711         98.3       94.0      16.0  100.0   \n",
+       "9   1368    14912902  36669324         98.4      119.0      18.0   96.0   \n",
+       "\n",
+       "     sbp   dbp  pain  acuity  \\\n",
+       "0  172.0  65.0   3.0     3.0   \n",
+       "1  176.0  71.0   7.0     2.0   \n",
+       "2  140.0  70.0   0.0     2.0   \n",
+       "3  105.0  65.0   0.0     2.0   \n",
+       "4  128.0  77.0   2.0     2.0   \n",
+       "5  116.0  55.0   0.0     3.0   \n",
+       "6  128.0  58.0   NaN     2.0   \n",
+       "7  116.0  70.0   7.0     2.0   \n",
+       "8  121.0  63.0   6.0     3.0   \n",
+       "9  124.0  76.0   7.0     2.0   \n",
+       "\n",
+       "                                      chiefcomplaint  \n",
+       "0  Abdominal distention, Abd pain, Dyspnea on exe...  \n",
+       "1  Abdominal distention, Abd pain, S/P KIDNEY TRA...  \n",
+       "2  Abdominal distention, Abnormal labs, Jaundice,...  \n",
+       "3  Abdominal distention, Abnormal labs, Jaundice,...  \n",
+       "4  Abdominal distention, Abnormal sodium level, A...  \n",
+       "5  Abdominal distention, Abnormal sodium level, T...  \n",
+       "6  Abdominal distention, Altered mental status, J...  \n",
+       "7  Abdominal distention, Altered mental status, J...  \n",
+       "8  Abdominal distention, Anemia, Dyspnea, RUQ abd...  \n",
+       "9  Abdominal distention, Back pain, L Shoulder pa...  "
+      ]
+     },
+     "execution_count": 93,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"SELECT * FROM triage WHERE LENGTH(chiefcomplaint) > 50 LIMIT 10\", conn)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### medrecon"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "La table medrecon contient la liste des traitements habituels du patient.  \n",
+    "Par traitement habituel, on entends les traitement que le patient prenait avant même son passage aux urgences.\n",
+    "\n",
+    "Ces traitements sont tous identifiés à l'aide d'identifiant produit :\n",
+    "- NDC - National Drug Classification (de la NDA)\n",
+    "- GSN - Generic sequence number\n",
+    "\n",
+    "Les traitements ont été regroupés à l'aide d'une classification nommé \"Enhanced Therapeutic Class\", chaque groupe est identifié par un code `etccode` associé au libelé du groupe `etcdescription`.\n",
+    "\n",
+    "Une analyse plus poussée des traitements pourrait être nécessaires, elle imposerait d'exploiter le code produit NDC.  \n",
+    "L'idéal est d'obtenir un code ATC, qui est une classification fonctionnelle très pratique pour identifier l'indication des médicaments.  \n",
+    "Une approche pourrait être de se reposer sur l'API de la `National Library of Medicine` (https://lhncbc.nlm.nih.gov/RxNav/).  \n",
+    "Une travail similaire a été effectué par d'autres personnes : https://github.com/fabkury/ndc_map"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>stay_id</th>\n",
+       "      <th>charttime</th>\n",
+       "      <th>name</th>\n",
+       "      <th>gsn</th>\n",
+       "      <th>ndc</th>\n",
+       "      <th>etc_rn</th>\n",
+       "      <th>etccode</th>\n",
+       "      <th>etcdescription</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>12363835</td>\n",
+       "      <td>34078373</td>\n",
+       "      <td>2177-02-13 03:31:00</td>\n",
+       "      <td>Caltrate 600 + D</td>\n",
+       "      <td>63395</td>\n",
+       "      <td>11845014031</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6143.0</td>\n",
+       "      <td>Minerals and Electrolytes - Calcium Replacemen...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>12363835</td>\n",
+       "      <td>34078373</td>\n",
+       "      <td>2177-02-13 03:31:00</td>\n",
+       "      <td>Coumadin</td>\n",
+       "      <td>6562</td>\n",
+       "      <td>15330026801</td>\n",
+       "      <td>1</td>\n",
+       "      <td>806.0</td>\n",
+       "      <td>Anticoagulants - Coumarin</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>12363835</td>\n",
+       "      <td>34078373</td>\n",
+       "      <td>2177-02-13 03:31:00</td>\n",
+       "      <td>Flonase</td>\n",
+       "      <td>18368</td>\n",
+       "      <td>12280027016</td>\n",
+       "      <td>1</td>\n",
+       "      <td>301.0</td>\n",
+       "      <td>Nasal Corticosteroids</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>12363835</td>\n",
+       "      <td>34078373</td>\n",
+       "      <td>2177-02-13 03:31:00</td>\n",
+       "      <td>lidocaine</td>\n",
+       "      <td>43256</td>\n",
+       "      <td>10544037630</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3932.0</td>\n",
+       "      <td>Dermatological - Topical Local Anesthetic Amides</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>12363835</td>\n",
+       "      <td>34078373</td>\n",
+       "      <td>2177-02-13 03:31:00</td>\n",
+       "      <td>omeprazole</td>\n",
+       "      <td>43137</td>\n",
+       "      <td>16714074801</td>\n",
+       "      <td>1</td>\n",
+       "      <td>445.0</td>\n",
+       "      <td>Gastric Acid Secretion Reducing Agents - Proto...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>5</td>\n",
+       "      <td>12363835</td>\n",
+       "      <td>34078373</td>\n",
+       "      <td>2177-02-13 03:31:00</td>\n",
+       "      <td>ranitidine HCl</td>\n",
+       "      <td>11674</td>\n",
+       "      <td>10544036030</td>\n",
+       "      <td>1</td>\n",
+       "      <td>443.0</td>\n",
+       "      <td>Gastric Acid Secretion Reducers - Histamine H2...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>6</td>\n",
+       "      <td>12363835</td>\n",
+       "      <td>34078373</td>\n",
+       "      <td>2177-02-13 03:31:00</td>\n",
+       "      <td>Tylenol Extra Strength</td>\n",
+       "      <td>4490</td>\n",
+       "      <td>10003011501</td>\n",
+       "      <td>1</td>\n",
+       "      <td>577.0</td>\n",
+       "      <td>Analgesic or Antipyretic Non-Opioid</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>7</td>\n",
+       "      <td>15128994</td>\n",
+       "      <td>30058281</td>\n",
+       "      <td>2167-08-29 04:27:00</td>\n",
+       "      <td>Dialyvite</td>\n",
+       "      <td>60020</td>\n",
+       "      <td>10542000010</td>\n",
+       "      <td>1</td>\n",
+       "      <td>690.0</td>\n",
+       "      <td>B-Complex Vitamin Combinations</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>8</td>\n",
+       "      <td>15128994</td>\n",
+       "      <td>30058281</td>\n",
+       "      <td>2167-08-29 04:27:00</td>\n",
+       "      <td>allopurinol</td>\n",
+       "      <td>2535</td>\n",
+       "      <td>11289101302</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2602.0</td>\n",
+       "      <td>Hyperuricemia Therapy - Xanthine Oxidase Inhib...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>9</td>\n",
+       "      <td>15128994</td>\n",
+       "      <td>30058281</td>\n",
+       "      <td>2167-08-29 04:27:00</td>\n",
+       "      <td>albuterol sulfate</td>\n",
+       "      <td>28090</td>\n",
+       "      <td>21695042308</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5970.0</td>\n",
+       "      <td>Asthma/COPD Therapy - Beta 2-Adrenergic Agents...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index  subject_id   stay_id            charttime                    name  \\\n",
+       "0      0    12363835  34078373  2177-02-13 03:31:00        Caltrate 600 + D   \n",
+       "1      1    12363835  34078373  2177-02-13 03:31:00                Coumadin   \n",
+       "2      2    12363835  34078373  2177-02-13 03:31:00                 Flonase   \n",
+       "3      3    12363835  34078373  2177-02-13 03:31:00               lidocaine   \n",
+       "4      4    12363835  34078373  2177-02-13 03:31:00              omeprazole   \n",
+       "5      5    12363835  34078373  2177-02-13 03:31:00          ranitidine HCl   \n",
+       "6      6    12363835  34078373  2177-02-13 03:31:00  Tylenol Extra Strength   \n",
+       "7      7    15128994  30058281  2167-08-29 04:27:00               Dialyvite   \n",
+       "8      8    15128994  30058281  2167-08-29 04:27:00             allopurinol   \n",
+       "9      9    15128994  30058281  2167-08-29 04:27:00       albuterol sulfate   \n",
+       "\n",
+       "     gsn          ndc  etc_rn  etccode  \\\n",
+       "0  63395  11845014031       1   6143.0   \n",
+       "1   6562  15330026801       1    806.0   \n",
+       "2  18368  12280027016       1    301.0   \n",
+       "3  43256  10544037630       1   3932.0   \n",
+       "4  43137  16714074801       1    445.0   \n",
+       "5  11674  10544036030       1    443.0   \n",
+       "6   4490  10003011501       1    577.0   \n",
+       "7  60020  10542000010       1    690.0   \n",
+       "8   2535  11289101302       1   2602.0   \n",
+       "9  28090  21695042308       1   5970.0   \n",
+       "\n",
+       "                                      etcdescription  \n",
+       "0  Minerals and Electrolytes - Calcium Replacemen...  \n",
+       "1                          Anticoagulants - Coumarin  \n",
+       "2                              Nasal Corticosteroids  \n",
+       "3   Dermatological - Topical Local Anesthetic Amides  \n",
+       "4  Gastric Acid Secretion Reducing Agents - Proto...  \n",
+       "5  Gastric Acid Secretion Reducers - Histamine H2...  \n",
+       "6                Analgesic or Antipyretic Non-Opioid  \n",
+       "7                     B-Complex Vitamin Combinations  \n",
+       "8  Hyperuricemia Therapy - Xanthine Oxidase Inhib...  \n",
+       "9  Asthma/COPD Therapy - Beta 2-Adrenergic Agents...  "
+      ]
+     },
+     "execution_count": 97,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"SELECT * FROM medrecon LIMIT 10\", conn)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### diagnosis"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Bien que ne faisant pas partit de l'objet du projet, la table `diagnosis` a été inclut dans l'éventualité où nous aurions besoin de l'explorer.  \n",
+    "L'ensemble des diagnostic sont identifié à l'aide de la classification internationale des maladie dans sa version 9 (ICD-9)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>stay_id</th>\n",
+       "      <th>seq_num</th>\n",
+       "      <th>icd_code</th>\n",
+       "      <th>icd_version</th>\n",
+       "      <th>icd_title</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>15825222</td>\n",
+       "      <td>39090953</td>\n",
+       "      <td>1</td>\n",
+       "      <td>486</td>\n",
+       "      <td>9</td>\n",
+       "      <td>PNEUMONIA,ORGANISM UNSPECIFIED</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>15825222</td>\n",
+       "      <td>39090953</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4254</td>\n",
+       "      <td>9</td>\n",
+       "      <td>PRIM CARDIOMYOPATHY NEC</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>11554870</td>\n",
+       "      <td>37245764</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5609</td>\n",
+       "      <td>9</td>\n",
+       "      <td>INTESTINAL OBSTRUCT NOS</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>19748558</td>\n",
+       "      <td>30511202</td>\n",
+       "      <td>1</td>\n",
+       "      <td>49392</td>\n",
+       "      <td>9</td>\n",
+       "      <td>ASTHMA, UNSPECIFIED, WITH ACUTE EXACERBATION</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>18008347</td>\n",
+       "      <td>34907903</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7842</td>\n",
+       "      <td>9</td>\n",
+       "      <td>SWELLING IN HEAD &amp; NECK</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>5</td>\n",
+       "      <td>15475846</td>\n",
+       "      <td>30308014</td>\n",
+       "      <td>1</td>\n",
+       "      <td>78079</td>\n",
+       "      <td>9</td>\n",
+       "      <td>OTHER MALAISE AND FATIGUE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>6</td>\n",
+       "      <td>15475846</td>\n",
+       "      <td>30308014</td>\n",
+       "      <td>2</td>\n",
+       "      <td>27651</td>\n",
+       "      <td>9</td>\n",
+       "      <td>DEHYDRATION</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>7</td>\n",
+       "      <td>16538197</td>\n",
+       "      <td>33556881</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6084</td>\n",
+       "      <td>9</td>\n",
+       "      <td>MALE GEN INFLAM DIS NEC</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>8</td>\n",
+       "      <td>18438529</td>\n",
+       "      <td>35200231</td>\n",
+       "      <td>1</td>\n",
+       "      <td>30500</td>\n",
+       "      <td>9</td>\n",
+       "      <td>ALCOHOL ABUSE-UNSPEC</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>9</td>\n",
+       "      <td>17185991</td>\n",
+       "      <td>37532781</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7840</td>\n",
+       "      <td>9</td>\n",
+       "      <td>HEADACHE</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index  subject_id   stay_id  seq_num icd_code  icd_version  \\\n",
+       "0      0    15825222  39090953        1      486            9   \n",
+       "1      1    15825222  39090953        2     4254            9   \n",
+       "2      2    11554870  37245764        1     5609            9   \n",
+       "3      3    19748558  30511202        1    49392            9   \n",
+       "4      4    18008347  34907903        1     7842            9   \n",
+       "5      5    15475846  30308014        1    78079            9   \n",
+       "6      6    15475846  30308014        2    27651            9   \n",
+       "7      7    16538197  33556881        1     6084            9   \n",
+       "8      8    18438529  35200231        1    30500            9   \n",
+       "9      9    17185991  37532781        1     7840            9   \n",
+       "\n",
+       "                                      icd_title  \n",
+       "0                PNEUMONIA,ORGANISM UNSPECIFIED  \n",
+       "1                       PRIM CARDIOMYOPATHY NEC  \n",
+       "2                       INTESTINAL OBSTRUCT NOS  \n",
+       "3  ASTHMA, UNSPECIFIED, WITH ACUTE EXACERBATION  \n",
+       "4                       SWELLING IN HEAD & NECK  \n",
+       "5                     OTHER MALAISE AND FATIGUE  \n",
+       "6                                   DEHYDRATION  \n",
+       "7                       MALE GEN INFLAM DIS NEC  \n",
+       "8                          ALCOHOL ABUSE-UNSPEC  \n",
+       "9                                      HEADACHE  "
+      ]
+     },
+     "execution_count": 98,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_sql(\"SELECT * FROM diagnosis LIMIT 10\", conn)"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "c304935560631f5a20c1bdabb506947800ccd82d813704000c078f0735b9b818"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.9 64-bit ('base': conda)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.9"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

BIN
img/pexels-pixabay-263402.jpg


+ 1 - 0
requirements.txt

@@ -0,0 +1 @@
+progress