|
@@ -39,7 +39,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 1,
|
|
|
+ "execution_count": 2,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -49,7 +49,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 2,
|
|
|
+ "execution_count": 3,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -67,7 +67,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 3,
|
|
|
+ "execution_count": 4,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -84,7 +84,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 23,
|
|
|
+ "execution_count": 5,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -112,7 +112,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 24,
|
|
|
+ "execution_count": 6,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -123,12 +123,12 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 25,
|
|
|
+ "execution_count": 10,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"drugs = pd.read_sql(f\"\"\"\n",
|
|
|
- " SELECT stay_id, gsn\n",
|
|
|
+ " SELECT stay_id, gsn, etccode, 1 n\n",
|
|
|
" FROM medrecon\n",
|
|
|
" WHERE gsn IN ({','.join(drugs_rules_list)})\n",
|
|
|
"\"\"\", conn)"
|
|
@@ -136,7 +136,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 26,
|
|
|
+ "execution_count": 15,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -146,14 +146,15 @@
|
|
|
" drugs_rules,\n",
|
|
|
" left_on=\"gsn\",\n",
|
|
|
" right_on=\"gsn\"\n",
|
|
|
- ").drop_duplicates([\"stay_id\",\"atc\"])\n",
|
|
|
+ ").groupby([\"stay_id\",\"atc\"])[\"n\"].sum() \\\n",
|
|
|
+ " .reset_index()\n",
|
|
|
"\n",
|
|
|
"atc_stays[\"atc_2\"] = atc_stays[\"atc\"].str.slice(0, 3)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 27,
|
|
|
+ "execution_count": 21,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -161,36 +162,55 @@
|
|
|
"## Le code ATC complet (Anatomique, Thérapeutique et Pharmacologique), ATC IV\n",
|
|
|
"\n",
|
|
|
"atc_stays_pivoted_4 = pd.pivot_table(\n",
|
|
|
- " atc_stays[[\"stay_id\",\"atc\"]] \\\n",
|
|
|
- " .assign(value=1),\n",
|
|
|
+ " atc_stays[[\"stay_id\",\"atc\", \"n\"]],\n",
|
|
|
" columns=[\"atc\"],\n",
|
|
|
" index=[\"stay_id\"],\n",
|
|
|
- " values=\"value\"\n",
|
|
|
+ " values=\"n\"\n",
|
|
|
").fillna(0).reset_index()"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 28,
|
|
|
+ "execution_count": 40,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"## Le code ATC 2 (Anatomique et Thérapeutique)\n",
|
|
|
"\n",
|
|
|
"atc_stays_pivoted_2 = pd.pivot_table(\n",
|
|
|
- " atc_stays[[\"stay_id\",\"atc_2\"]] \\\n",
|
|
|
- " .drop_duplicates() \\\n",
|
|
|
- " .rename(columns={\"atc_2\":\"atc\"}) \\\n",
|
|
|
- " .assign(value=1),\n",
|
|
|
+ " atc_stays[[\"stay_id\",\"atc_2\", \"n\"]] \\\n",
|
|
|
+ " .groupby([\"stay_id\",\"atc_2\"])[\"n\"].sum() \\\n",
|
|
|
+ " .reset_index() \\\n",
|
|
|
+ " .rename(columns={\"atc_2\":\"atc\"}),\n",
|
|
|
" columns=[\"atc\"],\n",
|
|
|
" index=[\"stay_id\"],\n",
|
|
|
- " values=\"value\"\n",
|
|
|
+ " values=\"n\"\n",
|
|
|
+ ").fillna(0).reset_index()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 42,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "## Les codes ETC\n",
|
|
|
+ "\n",
|
|
|
+ "etc_pivoted = pd.pivot_table(\n",
|
|
|
+ " drugs[[\"stay_id\",\"etccode\", \"n\"]].dropna() \\\n",
|
|
|
+ " .assign(etccode = lambda x: x[\"etccode\"].astype(\"int\").astype(\"str\")) \\\n",
|
|
|
+ " .groupby([\"stay_id\",\"etccode\"])[\"n\"].sum() \\\n",
|
|
|
+ " .reset_index() \\\n",
|
|
|
+ " .rename(columns={\"etccode\":\"atc\"}),\n",
|
|
|
+ " columns=[\"atc\"],\n",
|
|
|
+ " index=[\"stay_id\"],\n",
|
|
|
+ " values=\"n\"\n",
|
|
|
").fillna(0).reset_index()"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 29,
|
|
|
+ "execution_count": 43,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -210,13 +230,22 @@
|
|
|
" how=\"left\"\n",
|
|
|
")\n",
|
|
|
"\n",
|
|
|
+ "stays_etc = pd.merge(\n",
|
|
|
+ " stays,\n",
|
|
|
+ " etc_pivoted,\n",
|
|
|
+ " left_on=\"stay_id\",\n",
|
|
|
+ " right_on=\"stay_id\",\n",
|
|
|
+ " how=\"left\"\n",
|
|
|
+ ")\n",
|
|
|
+ "\n",
|
|
|
"stays_atc_4[atc_stays_pivoted_4.columns[1:]] = stays_atc_4[atc_stays_pivoted_4.columns[1:]].fillna(0)\n",
|
|
|
- "stays_atc_2[atc_stays_pivoted_2.columns[1:]] = stays_atc_2[atc_stays_pivoted_2.columns[1:]].fillna(0)"
|
|
|
+ "stays_atc_2[atc_stays_pivoted_2.columns[1:]] = stays_atc_2[atc_stays_pivoted_2.columns[1:]].fillna(0)\n",
|
|
|
+ "stays_etc[etc_pivoted.columns[1:]] = stays_etc[etc_pivoted.columns[1:]].fillna(0)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 39,
|
|
|
+ "execution_count": 44,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -224,7 +253,8 @@
|
|
|
"# On écrit en parquet pour optimiser le stockage et les temps d'io\n",
|
|
|
"\n",
|
|
|
"stays_atc_2.sort_values(\"stay_id\").reset_index(drop=True).to_parquet(\"./data/features_atc2.parquet\", engine=\"pyarrow\", index=False)\n",
|
|
|
- "stays_atc_4.sort_values(\"stay_id\").reset_index(drop=True).to_parquet(\"./data/features_atc4.parquet\", engine=\"pyarrow\", index=False)"
|
|
|
+ "stays_atc_4.sort_values(\"stay_id\").reset_index(drop=True).to_parquet(\"./data/features_atc4.parquet\", engine=\"pyarrow\", index=False)\n",
|
|
|
+ "stays_etc.sort_values(\"stay_id\").reset_index(drop=True).to_parquet(\"./data/features_etc.parquet\", engine=\"pyarrow\", index=False)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -236,9 +266,21 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 12,
|
|
|
+ "execution_count": 1,
|
|
|
"metadata": {},
|
|
|
- "outputs": [],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "ename": "NameError",
|
|
|
+ "evalue": "name 'pd' is not defined",
|
|
|
+ "output_type": "error",
|
|
|
+ "traceback": [
|
|
|
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
|
+ "Input \u001b[0;32mIn [1]\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m labs \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241m.\u001b[39mread_sql(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m SELECT \u001b[39m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m le.stay_id,\u001b[39m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m le.itemid item_id\u001b[39m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m FROM labevents le\u001b[39m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124m WHERE le.itemid IN (\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m,\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(items_list)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m)\u001b[39m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124m GROUP BY\u001b[39m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;124m le.stay_id,\u001b[39m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;124m le.itemid\u001b[39m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;124m\"\"\"\u001b[39m, conn)\n",
|
|
|
+ "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
"source": [
|
|
|
"labs = pd.read_sql(f\"\"\"\n",
|
|
|
" SELECT \n",
|
|
@@ -254,7 +296,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 13,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -270,7 +312,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 14,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -284,7 +326,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 15,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -296,7 +338,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 16,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -306,10 +348,10 @@
|
|
|
],
|
|
|
"metadata": {
|
|
|
"interpreter": {
|
|
|
- "hash": "28b293e0c0671e44c7281dde6399c7c7419d3faca031d22494da8635907ada72"
|
|
|
+ "hash": "c304935560631f5a20c1bdabb506947800ccd82d813704000c078f0735b9b818"
|
|
|
},
|
|
|
"kernelspec": {
|
|
|
- "display_name": "Python 3.9.7 ('base')",
|
|
|
+ "display_name": "R",
|
|
|
"language": "python",
|
|
|
"name": "python3"
|
|
|
},
|
|
@@ -323,7 +365,7 @@
|
|
|
"name": "python",
|
|
|
"nbconvert_exporter": "python",
|
|
|
"pygments_lexer": "ipython3",
|
|
|
- "version": "3.9.7"
|
|
|
+ "version": "3.9.9"
|
|
|
},
|
|
|
"orig_nbformat": 4
|
|
|
},
|