{
"cells": [
{
"cell_type": "markdown",
"id": "ce7f13aa-169c-4891-9659-7c0ff1028635",
"metadata": {},
"source": [
"# Autocorrect"
]
},
{
"cell_type": "markdown",
"id": "188407f3-65d4-4e3e-aa01-f4e6bf4a6ca6",
"metadata": {},
"source": [
"## Imports "
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "d95242ba-9866-4a8c-b2f1-db45338952b2",
"metadata": {},
"outputs": [],
"source": [
"from fastcore.all import *\n",
"import numpy as np\n",
"import scipy as sp\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import random\n",
"import nltk\n",
"import re\n",
"from collections import Counter\n",
"import string"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "b3646c3b-e490-41bf-a650-8be6aac7c2ae",
"metadata": {},
"outputs": [],
"source": [
"sns.set()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "36ddb7e5-7be3-44af-b8b4-8b8c205fc5a3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[1, 1, 2, 2, 3, 3]"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = [1,2,3]\n",
"b = [[1,2,3],[4,5,6]]\n",
"b = [1,2]\n",
"\n",
"[e for e in a for l in b]"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "f77679c3-96ec-42c4-9b82-7f3e80ee5e70",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
},
"text/plain": [
"'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"string.ascii_letters"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "65de8f33-bb32-4bdd-b7f4-ec04f711f0d0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Counter({'Are': 1,\n",
" 'This': 1,\n",
" 'a': 1,\n",
" 'is': 1,\n",
" 'of': 1,\n",
" 'ready': 1,\n",
" 'test': 2,\n",
" 'to': 1,\n",
" 'wit': 1,\n",
" 'wits': 1,\n",
" 'you': 1,\n",
" 'your': 1})"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"corpus = \"This is a test of wits. Are you ready to test your wit?\"\n",
"Counter(re.findall(r'(\\w+)', corpus))"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "d4847e57-669e-4852-a0a0-81ef42d66a78",
"metadata": {},
"outputs": [],
"source": [
"word = \"Rahul\""
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "99dc54a2-2f26-482c-812b-0f1f2dcf8630",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('', 'Rahul'),\n",
" ('R', 'ahul'),\n",
" ('Ra', 'hul'),\n",
" ('Rah', 'ul'),\n",
" ('Rahu', 'l'),\n",
" ('Rahul', '')]"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"splits = [(word[:i], word[i:]) for i in range(len(word)+1)]; splits"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "43f9decb-417b-4985-b390-e96830a83c54",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['ahul', 'Rhul', 'Raul', 'Rahl', 'Rahu']"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[L+R[1:] for L,R in splits if R] # deletes"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "bf532663-3fdf-4a7f-a55d-db87f784e3a1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['aRhul', 'Rhaul', 'Rauhl', 'Rahlu']"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[L[:-1]+ R[0]+L[-1:]+R[1:] for L,R in splits if R and L] # switch letter"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "c52c69a6-a7bb-4193-a7c2-6d21968e63d9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['aahul',\n",
" 'bahul',\n",
" 'Rbhul',\n",
" 'Raaul',\n",
" 'Rabul',\n",
" 'Rahal',\n",
" 'Rahbl',\n",
" 'Rahua',\n",
" 'Rahub']"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[L+C+R[1:] for L,R in splits for C in string.ascii_letters[:2] if R and C != R[0]] #replace letters"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "7680b33a-e81e-4e4b-8555-0f92bca83436",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['aRahul',\n",
" 'bRahul',\n",
" 'Raahul',\n",
" 'Rbahul',\n",
" 'Raahul',\n",
" 'Rabhul',\n",
" 'Rahaul',\n",
" 'Rahbul',\n",
" 'Rahual',\n",
" 'Rahubl',\n",
" 'Rahula',\n",
" 'Rahulb']"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[L+C+R for L,R in splits for C in string.ascii_letters[:2] ] # if R and L] # insert letters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "30b8be1c-52a2-4daf-937e-29535c48aba5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}