|
@@ -39,7 +39,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 4,
|
|
|
+ "execution_count": 2,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -49,7 +49,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 110,
|
|
|
+ "execution_count": 3,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -67,7 +67,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 111,
|
|
|
+ "execution_count": 4,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -84,7 +84,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 301,
|
|
|
+ "execution_count": 5,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -112,7 +112,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 302,
|
|
|
+ "execution_count": 6,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -123,7 +123,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 284,
|
|
|
+ "execution_count": 7,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -136,7 +136,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 285,
|
|
|
+ "execution_count": 8,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -153,7 +153,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 286,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -171,7 +171,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 287,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -190,7 +190,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 304,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -213,15 +213,27 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 306,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
- "outputs": [],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "ename": "NameError",
|
|
|
+ "evalue": "name 'stays_atc_2' is not defined",
|
|
|
+ "output_type": "error",
|
|
|
+ "traceback": [
|
|
|
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
|
+ "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_10104/535576780.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;31m# On écrit en parquet pour optimiser le stockage et les temps d'io\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mstays_atc_2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"stay_id\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreset_index\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_parquet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"./data/features_atc2.parquet\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"pyarrow\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[0mstays_atc_4\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"stay_id\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreset_index\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_parquet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"./data/features_atc4.parquet\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"pyarrow\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
|
+ "\u001b[1;31mNameError\u001b[0m: name 'stays_atc_2' is not defined"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
"source": [
|
|
|
"# Ecriture du featues dataset\n",
|
|
|
"# On écrit en parquet pour optimiser le stockage et les temps d'io\n",
|
|
|
"\n",
|
|
|
- "stays_atc_2.to_parquet(\"./data/features_atc2.parquet\", engine=\"pyarrow\", index=False)\n",
|
|
|
- "stays_atc_4.to_parquet(\"./data/features_atc4.parquet\", engine=\"pyarrow\", index=False)"
|
|
|
+ "stays_atc_2.sort_values(\"stay_id\").reset_index(drop=True).to_parquet(\"./data/features_atc2.parquet\", engine=\"pyarrow\", index=False)\n",
|
|
|
+ "stays_atc_4.sort_values(\"stay_id\").reset_index(drop=True).to_parquet(\"./data/features_atc4.parquet\", engine=\"pyarrow\", index=False)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -233,7 +245,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 312,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -251,7 +263,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 315,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -267,7 +279,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 316,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -281,7 +293,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 317,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -293,11 +305,11 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 319,
|
|
|
+ "execution_count": null,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "labs_deduplicate_pivot_final.to_parquet(\"./data/labels.parquet\", index=False)"
|
|
|
+ "labs_deduplicate_pivot_final.sort_values(\"stay_id\").reset_index(drop=True).to_parquet(\"./data/labels.parquet\", index=False)"
|
|
|
]
|
|
|
}
|
|
|
],
|