{ "cells": [ { "cell_type": "markdown", "id": "57e93a16-97eb-4fef-809f-ea579962d099", "metadata": {}, "source": [ "# Wandb and Structured Learning" ] }, { "cell_type": "code", "execution_count": null, "id": "1b249271-60e8-4b31-9f06-49d6d21b5486", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "markdown", "id": "e13756e3-52f9-4e44-a806-ef48a6831fe4", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "id": "317ac900-271b-4f9d-92e7-285fdc6e8218", "metadata": {}, "outputs": [], "source": [ "from sklearn import datasets\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.model_selection import cross_val_score\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", "from sklearn.naive_bayes import GaussianNB\n", "from fastai.vision.all import *\n", "from aiking.data.external import * #We need to import this after fastai modules\n", "import wandb" ] }, { "cell_type": "code", "execution_count": null, "id": "dc8d1cdf-995c-4ecb-b4d8-f12aa5737fe5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Path('/content/drive/MyDrive/PPV/S_Personal_Study/aiking/data/titanic')" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = untar_data(\"kaggle_competitions::titanic\"); path\n", "\n", "# untar_data??" ] }, { "cell_type": "code", "execution_count": null, "id": "937f359e-870e-46d7-9241-915406dfae6d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(#3) [Path('/content/drive/MyDrive/PPV/S_Personal_Study/aiking/data/titanic/train.csv'),Path('/content/drive/MyDrive/PPV/S_Personal_Study/aiking/data/titanic/test.csv'),Path('/content/drive/MyDrive/PPV/S_Personal_Study/aiking/data/titanic/gender_submission.csv')]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path.ls()" ] }, { "cell_type": "code", "execution_count": null, "id": "cbe662a4-f32a-48da-9152-f35fc6e27201", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "unsupported operand type(s) for /: 'NoneType' and 'str'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m<ipython-input-28-768b4736ad05>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;34m\"train.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'NoneType' and 'str'" ] } ], "source": [ "df = pd.read_csv(path/\"train.csv\"); df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "b943f313-a86c-4c53-864e-33dfae9816f5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 891\n", "Survived 2\n", "Pclass 3\n", "Name 891\n", "Sex 2\n", "Age 88\n", "SibSp 7\n", "Parch 7\n", "Ticket 681\n", "Fare 248\n", "Cabin 147\n", "Embarked 3\n", "dtype: int64" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.nunique()" ] }, { "cell_type": "code", "execution_count": null, "id": "fbb05fce-4a4b-4612-8eae-f61ce1dcbb13", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 891 entries, 0 to 890\n", "Data columns (total 12 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 PassengerId 891 non-null int64 \n", " 1 Survived 891 non-null int64 \n", " 2 Pclass 891 non-null int64 \n", " 3 Name 891 non-null object \n", " 4 Sex 891 non-null object \n", " 5 Age 714 non-null float64\n", " 6 SibSp 891 non-null int64 \n", " 7 Parch 891 non-null int64 \n", " 8 Ticket 891 non-null object \n", " 9 Fare 891 non-null float64\n", " 10 Cabin 204 non-null object \n", " 11 Embarked 889 non-null object \n", "dtypes: float64(2), int64(5), object(5)\n", "memory usage: 83.7+ KB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "11ecd9ce-1c5d-4bbe-ab2b-fe1bbba882c5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 5 }