|
@@ -39,7 +39,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 2,
|
|
|
+ "execution_count": 1,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -49,7 +49,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 3,
|
|
|
+ "execution_count": 2,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -67,7 +67,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 4,
|
|
|
+ "execution_count": 3,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -84,7 +84,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 5,
|
|
|
+ "execution_count": 23,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -112,7 +112,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 6,
|
|
|
+ "execution_count": 24,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -123,7 +123,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 7,
|
|
|
+ "execution_count": 25,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -136,7 +136,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 8,
|
|
|
+ "execution_count": 26,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -153,7 +153,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 27,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -171,7 +171,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 28,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -190,7 +190,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 29,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -200,7 +200,7 @@
|
|
|
" left_on=\"stay_id\",\n",
|
|
|
" right_on=\"stay_id\",\n",
|
|
|
" how=\"left\"\n",
|
|
|
- ").fillna(0)\n",
|
|
|
+ ")\n",
|
|
|
"\n",
|
|
|
"stays_atc_2 = pd.merge(\n",
|
|
|
" stays,\n",
|
|
@@ -208,26 +208,17 @@
|
|
|
" left_on=\"stay_id\",\n",
|
|
|
" right_on=\"stay_id\",\n",
|
|
|
" how=\"left\"\n",
|
|
|
- ").fillna(0)"
|
|
|
+ ")\n",
|
|
|
+ "\n",
|
|
|
+ "stays_atc_4[atc_stays_pivoted_4.columns[1:]] = stays_atc_4[atc_stays_pivoted_4.columns[1:]].fillna(0)\n",
|
|
|
+ "stays_atc_2[atc_stays_pivoted_2.columns[1:]] = stays_atc_2[atc_stays_pivoted_2.columns[1:]].fillna(0)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 39,
|
|
|
"metadata": {},
|
|
|
- "outputs": [
|
|
|
- {
|
|
|
- "ename": "NameError",
|
|
|
- "evalue": "name 'stays_atc_2' is not defined",
|
|
|
- "output_type": "error",
|
|
|
- "traceback": [
|
|
|
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
- "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
|
- "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_10104/535576780.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;31m# On écrit en parquet pour optimiser le stockage et les temps d'io\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mstays_atc_2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"stay_id\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreset_index\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_parquet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"./data/features_atc2.parquet\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"pyarrow\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[0mstays_atc_4\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"stay_id\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreset_index\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_parquet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"./data/features_atc4.parquet\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"pyarrow\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
|
- "\u001b[1;31mNameError\u001b[0m: name 'stays_atc_2' is not defined"
|
|
|
- ]
|
|
|
- }
|
|
|
- ],
|
|
|
+ "outputs": [],
|
|
|
"source": [
|
|
|
"# Ecriture du featues dataset\n",
|
|
|
"# On écrit en parquet pour optimiser le stockage et les temps d'io\n",
|
|
@@ -245,7 +236,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 12,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -263,7 +254,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 13,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -279,7 +270,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 14,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -293,7 +284,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 15,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -305,7 +296,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 16,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|