Autocorrect#

Imports#

from fastcore.all import *
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import nltk
import re
from collections import Counter
import string
sns.set()
a = [1,2,3]
b = [[1,2,3],[4,5,6]]
b = [1,2]

[e for e in a for l in b]
[1, 1, 2, 2, 3, 3]
string.ascii_letters
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
corpus = "This is a test of wits. Are you ready to test your wit?"
Counter(re.findall(r'(\w+)', corpus))
Counter({'Are': 1,
         'This': 1,
         'a': 1,
         'is': 1,
         'of': 1,
         'ready': 1,
         'test': 2,
         'to': 1,
         'wit': 1,
         'wits': 1,
         'you': 1,
         'your': 1})
word = "Rahul"
splits = [(word[:i], word[i:]) for i in range(len(word)+1)]; splits
[('', 'Rahul'),
 ('R', 'ahul'),
 ('Ra', 'hul'),
 ('Rah', 'ul'),
 ('Rahu', 'l'),
 ('Rahul', '')]
[L+R[1:] for L,R in splits if R] # deletes
['ahul', 'Rhul', 'Raul', 'Rahl', 'Rahu']
[L[:-1]+ R[0]+L[-1:]+R[1:] for L,R in splits if R and L] # switch letter
['aRhul', 'Rhaul', 'Rauhl', 'Rahlu']
[L+C+R[1:] for L,R in splits  for C in string.ascii_letters[:2] if R and C != R[0]] #replace letters
['aahul',
 'bahul',
 'Rbhul',
 'Raaul',
 'Rabul',
 'Rahal',
 'Rahbl',
 'Rahua',
 'Rahub']
[L+C+R for L,R in splits for C in string.ascii_letters[:2] ]  # if R and L] # insert letters
['aRahul',
 'bRahul',
 'Raahul',
 'Rbahul',
 'Raahul',
 'Rabhul',
 'Rahaul',
 'Rahbul',
 'Rahual',
 'Rahubl',
 'Rahula',
 'Rahulb']