{ "cells": [ { "cell_type": "markdown", "id": "ce7f13aa-169c-4891-9659-7c0ff1028635", "metadata": {}, "source": [ "# Autocorrect" ] }, { "cell_type": "markdown", "id": "188407f3-65d4-4e3e-aa01-f4e6bf4a6ca6", "metadata": {}, "source": [ "## Imports " ] }, { "cell_type": "code", "execution_count": 47, "id": "d95242ba-9866-4a8c-b2f1-db45338952b2", "metadata": {}, "outputs": [], "source": [ "from fastcore.all import *\n", "import numpy as np\n", "import scipy as sp\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import random\n", "import nltk\n", "import re\n", "from collections import Counter\n", "import string" ] }, { "cell_type": "code", "execution_count": 48, "id": "b3646c3b-e490-41bf-a650-8be6aac7c2ae", "metadata": {}, "outputs": [], "source": [ "sns.set()" ] }, { "cell_type": "code", "execution_count": 49, "id": "36ddb7e5-7be3-44af-b8b4-8b8c205fc5a3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[1, 1, 2, 2, 3, 3]" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = [1,2,3]\n", "b = [[1,2,3],[4,5,6]]\n", "b = [1,2]\n", "\n", "[e for e in a for l in b]" ] }, { "cell_type": "code", "execution_count": 50, "id": "f77679c3-96ec-42c4-9b82-7f3e80ee5e70", "metadata": {}, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "string.ascii_letters" ] }, { "cell_type": "code", "execution_count": 51, "id": "65de8f33-bb32-4bdd-b7f4-ec04f711f0d0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Counter({'Are': 1,\n", " 'This': 1,\n", " 'a': 1,\n", " 'is': 1,\n", " 'of': 1,\n", " 'ready': 1,\n", " 'test': 2,\n", " 'to': 1,\n", " 'wit': 1,\n", " 'wits': 1,\n", " 'you': 1,\n", " 'your': 1})" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "corpus = \"This is a test of wits. Are you ready to test your wit?\"\n", "Counter(re.findall(r'(\\w+)', corpus))" ] }, { "cell_type": "code", "execution_count": 52, "id": "d4847e57-669e-4852-a0a0-81ef42d66a78", "metadata": {}, "outputs": [], "source": [ "word = \"Rahul\"" ] }, { "cell_type": "code", "execution_count": 53, "id": "99dc54a2-2f26-482c-812b-0f1f2dcf8630", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('', 'Rahul'),\n", " ('R', 'ahul'),\n", " ('Ra', 'hul'),\n", " ('Rah', 'ul'),\n", " ('Rahu', 'l'),\n", " ('Rahul', '')]" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "splits = [(word[:i], word[i:]) for i in range(len(word)+1)]; splits" ] }, { "cell_type": "code", "execution_count": 54, "id": "43f9decb-417b-4985-b390-e96830a83c54", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['ahul', 'Rhul', 'Raul', 'Rahl', 'Rahu']" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[L+R[1:] for L,R in splits if R] # deletes" ] }, { "cell_type": "code", "execution_count": 55, "id": "bf532663-3fdf-4a7f-a55d-db87f784e3a1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['aRhul', 'Rhaul', 'Rauhl', 'Rahlu']" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[L[:-1]+ R[0]+L[-1:]+R[1:] for L,R in splits if R and L] # switch letter" ] }, { "cell_type": "code", "execution_count": 61, "id": "c52c69a6-a7bb-4193-a7c2-6d21968e63d9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['aahul',\n", " 'bahul',\n", " 'Rbhul',\n", " 'Raaul',\n", " 'Rabul',\n", " 'Rahal',\n", " 'Rahbl',\n", " 'Rahua',\n", " 'Rahub']" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[L+C+R[1:] for L,R in splits for C in string.ascii_letters[:2] if R and C != R[0]] #replace letters" ] }, { "cell_type": "code", "execution_count": 63, "id": "7680b33a-e81e-4e4b-8555-0f92bca83436", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['aRahul',\n", " 'bRahul',\n", " 'Raahul',\n", " 'Rbahul',\n", " 'Raahul',\n", " 'Rabhul',\n", " 'Rahaul',\n", " 'Rahbul',\n", " 'Rahual',\n", " 'Rahubl',\n", " 'Rahula',\n", " 'Rahulb']" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[L+C+R for L,R in splits for C in string.ascii_letters[:2] ] # if R and L] # insert letters" ] }, { "cell_type": "code", "execution_count": null, "id": "30b8be1c-52a2-4daf-937e-29535c48aba5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }