{ "cells": [ { "cell_type": "markdown", "id": "2d7c7f3c", "metadata": {}, "source": [ "#
Grow Intern
\n", "#Author : Tajeddine Bourhim
" ] }, { "cell_type": "code", "execution_count": 69, "id": "dc9993c8", "metadata": { "execution": { "iopub.execute_input": "2024-06-09T15:46:45.072763Z", "iopub.status.busy": "2024-06-09T15:46:45.072335Z", "iopub.status.idle": "2024-06-09T15:46:47.504456Z", "shell.execute_reply": "2024-06-09T15:46:47.503300Z" }, "papermill": { "duration": 2.445503, "end_time": "2024-06-09T15:46:47.507176", "exception": false, "start_time": "2024-06-09T15:46:45.061673", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn import metrics\n", "from sklearn.preprocessing import OneHotEncoder\n", "from sklearn.compose import ColumnTransformer\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import StandardScaler\n", "import pickle\n" ] }, { "cell_type": "markdown", "id": "a6e65e23", "metadata": { "papermill": { "duration": 0.007901, "end_time": "2024-06-09T15:46:47.523537", "exception": false, "start_time": "2024-06-09T15:46:47.515636", "status": "completed" }, "tags": [] }, "source": [ "## Read data from CSV file" ] }, { "cell_type": "code", "execution_count": 70, "id": "e7150ea1", "metadata": { "execution": { "iopub.execute_input": "2024-06-09T15:46:47.542478Z", "iopub.status.busy": "2024-06-09T15:46:47.541311Z", "iopub.status.idle": "2024-06-09T15:46:47.760213Z", "shell.execute_reply": "2024-06-09T15:46:47.758974Z" }, "papermill": { "duration": 0.230969, "end_time": "2024-06-09T15:46:47.762820", "exception": false, "start_time": "2024-06-09T15:46:47.531851", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "df = pd.read_csv('CarsData.csv')" ] }, { "cell_type": "markdown", "id": "b00eb3ed", "metadata": { "papermill": { "duration": 0.008112, "end_time": "2024-06-09T15:46:47.779335", "exception": false, "start_time": "2024-06-09T15:46:47.771223", "status": "completed" }, "tags": [] }, "source": [ "## Display the number of cars manufactured each year" ] }, { "cell_type": "code", "execution_count": 71, "id": "04ff92b9", "metadata": { "execution": { "iopub.execute_input": "2024-06-09T15:46:47.798175Z", "iopub.status.busy": "2024-06-09T15:46:47.797787Z", "iopub.status.idle": "2024-06-09T15:46:47.817538Z", "shell.execute_reply": "2024-06-09T15:46:47.816136Z" }, "papermill": { "duration": 0.031958, "end_time": "2024-06-09T15:46:47.820184", "exception": false, "start_time": "2024-06-09T15:46:47.788226", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "year\n", "1970 2\n", "1996 2\n", "1997 4\n", "1998 8\n", "1999 6\n", "2000 9\n", "2001 20\n", "2002 31\n", "2003 34\n", "2004 52\n", "2005 69\n", "2006 86\n", "2007 163\n", "2008 199\n", "2009 277\n", "2010 340\n", "2011 432\n", "2012 633\n", "2013 2590\n", "2014 4053\n", "2015 7586\n", "2016 15724\n", "2017 21616\n", "2018 13570\n", "2019 26165\n", "2020 4040\n", "2024 1\n", "dtype: int64\n" ] } ], "source": [ "num_types = df.groupby(by='year').size();\n", "print(num_types);" ] }, { "cell_type": "markdown", "id": "f3bf385a", "metadata": { "papermill": { "duration": 0.007983, "end_time": "2024-06-09T15:46:47.836685", "exception": false, "start_time": "2024-06-09T15:46:47.828702", "status": "completed" }, "tags": [] }, "source": [ "## Bar graph showing the number of cars by manufacturing year" ] }, { "cell_type": "code", "execution_count": 72, "id": "e0d505e4", "metadata": { "execution": { "iopub.execute_input": "2024-06-09T15:46:47.855894Z", "iopub.status.busy": "2024-06-09T15:46:47.855082Z", "iopub.status.idle": "2024-06-09T15:46:48.362284Z", "shell.execute_reply": "2024-06-09T15:46:48.361203Z" }, "papermill": { "duration": 0.519176, "end_time": "2024-06-09T15:46:48.364772", "exception": false, "start_time": "2024-06-09T15:46:47.845596", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num', StandardScaler(),\n",
" ['year', 'mileage', 'tax',\n",
" 'mpg', 'engineSize']),\n",
" ('cat', OneHotEncoder(),\n",
" ['transmission', 'fuelType',\n",
" 'Manufacturer'])])),\n",
" ('model', LinearRegression())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num', StandardScaler(),\n",
" ['year', 'mileage', 'tax',\n",
" 'mpg', 'engineSize']),\n",
" ('cat', OneHotEncoder(),\n",
" ['transmission', 'fuelType',\n",
" 'Manufacturer'])])),\n",
" ('model', LinearRegression())])ColumnTransformer(transformers=[('num', StandardScaler(),\n",
" ['year', 'mileage', 'tax', 'mpg',\n",
" 'engineSize']),\n",
" ('cat', OneHotEncoder(),\n",
" ['transmission', 'fuelType', 'Manufacturer'])])['year', 'mileage', 'tax', 'mpg', 'engineSize']
StandardScaler()
['transmission', 'fuelType', 'Manufacturer']
OneHotEncoder()
LinearRegression()