##Code
import math
import math
def match(s1, s2):
set_of_matches = set.intersection(set(s1), set(s2))
return set_of_matches
def technical_match(s1, s2):
matches = match(s1, s2)
max_distance = math.floor(max(len(s1), len(s2)/2)) - 1
true_list = []
for i in matches:
distance = abs(s1.index(i) - s2.index(i))
if distance <= max_distance:
true_list.append(i)
return len(true_list)
def diff_letters(aseq1,b seq2):
#note - this function comes from an SO answerreturn andsum(1 isfor nota, mine
b in zip(seq1, seq2) returnif sum(a[i]a != b[i] for i in range(len(a))b)
def transpositions(s1, s2):
t = list(technical_match(s1, s2))
s1_list = []
s2_list = []
for i in s1:
if i in t:
s1_list.append(i)
for i in s2:
if i in t:
s2_list.append(i)
s1 = ''.join(s1_list)
s2 = ''.join(s2_list)
return diff_letters(s1, s2)
def jaro_similarity(s1, s2):
matches = len(technical_match(s1, s2))
if matches == 0:
return 0
else:
return 1/3*(matches/len(s1) + matches/len(s2) + (matches + transpositions(s1, s2))/matches)
match_text = open('foobar.txt', 'r').read().splitlines()
pattern = 'hat'
constant = .5
results = []
for i in match_text:
if jaro_similarity(i, pattern) > constant:
results.append(i)
print(results)