Skip to main content
Commonmark migration
Source Link

##Problem

Problem

##Approach

Approach

##Code

Code

##Review

Review

##Problem

##Approach

##Code

##Review

Problem

Approach

Code

Review

edited tags
Link
200_success
  • 145.7k
  • 22
  • 191
  • 481
deleted 58 characters in body
Source Link
auden
  • 441
  • 2
  • 12

##Code import math

import math

def match(s1, s2):
    set_of_matches = set.intersection(set(s1), set(s2))
    return set_of_matches

def technical_match(s1, s2):
    matches = match(s1, s2)
    max_distance = math.floor(max(len(s1), len(s2)/2)) - 1
    true_list = []
    for i in matches:
        distance = abs(s1.index(i) - s2.index(i))
        if distance <= max_distance:
            true_list.append(i)
    return len(true_list)

def diff_letters(aseq1,b seq2):
    #note - this function comes from an SO answerreturn andsum(1 isfor nota, mine
b in zip(seq1, seq2) returnif sum(a[i]a != b[i] for i in range(len(a))b)

def transpositions(s1, s2):
    t = list(technical_match(s1, s2))
    s1_list = []
    s2_list = []
    for i in s1:
        if i in t:
            s1_list.append(i)
    for i in s2:
        if i in t:
            s2_list.append(i)
    s1 = ''.join(s1_list)
    s2 = ''.join(s2_list)
    return diff_letters(s1, s2)

def jaro_similarity(s1, s2):
    matches = len(technical_match(s1, s2))
    if matches == 0:
        return 0
    else:
        return 1/3*(matches/len(s1) + matches/len(s2) + (matches + transpositions(s1, s2))/matches)

match_text = open('foobar.txt', 'r').read().splitlines()
pattern = 'hat'
constant = .5

results = []
for i in match_text:
    if jaro_similarity(i, pattern) > constant:
        results.append(i)

print(results)

##Code import math

def match(s1, s2):
    set_of_matches = set.intersection(set(s1), set(s2))
    return set_of_matches

def technical_match(s1, s2):
    matches = match(s1, s2)
    max_distance = math.floor(max(len(s1), len(s2)/2)) - 1
    true_list = []
    for i in matches:
        distance = abs(s1.index(i) - s2.index(i))
        if distance <= max_distance:
            true_list.append(i)
    return len(true_list)

def diff_letters(a,b):
    #note - this function comes from an SO answer and is not mine
    return sum(a[i] != b[i] for i in range(len(a)))

def transpositions(s1, s2):
    t = list(technical_match(s1, s2))
    s1_list = []
    s2_list = []
    for i in s1:
        if i in t:
            s1_list.append(i)
    for i in s2:
        if i in t:
            s2_list.append(i)
    s1 = ''.join(s1_list)
    s2 = ''.join(s2_list)
    return diff_letters(s1, s2)

def jaro_similarity(s1, s2):
    matches = technical_match(s1, s2)
    if matches == 0:
        return 0
    else:
        return 1/3*(matches/len(s1) + matches/len(s2) + (matches + transpositions(s1, s2))/matches)

match_text = open('foobar.txt', 'r').read().splitlines()
pattern = 'hat'
constant = .5

results = []
for i in match_text:
    if jaro_similarity(i, pattern) > constant:
        results.append(i)

print(results)

##Code

import math

def match(s1, s2):
    set_of_matches = set.intersection(set(s1), set(s2))
    return set_of_matches

def technical_match(s1, s2):
    matches = match(s1, s2)
    max_distance = math.floor(max(len(s1), len(s2)/2)) - 1
    true_list = []
    for i in matches:
        distance = abs(s1.index(i) - s2.index(i))
        if distance <= max_distance:
            true_list.append(i)
    return true_list

def diff_letters(seq1, seq2):
    return sum(1 for a, b in zip(seq1, seq2) if a != b)

def transpositions(s1, s2):
    t = list(technical_match(s1, s2))
    s1_list = []
    s2_list = []
    for i in s1:
        if i in t:
            s1_list.append(i)
    for i in s2:
        if i in t:
            s2_list.append(i)
    s1 = ''.join(s1_list)
    s2 = ''.join(s2_list)
    return diff_letters(s1, s2)

def jaro_similarity(s1, s2):
    matches = len(technical_match(s1, s2))
    if matches == 0:
        return 0
    else:
        return 1/3*(matches/len(s1) + matches/len(s2) + (matches + transpositions(s1, s2))/matches)

match_text = open('foobar.txt', 'r').read().splitlines()
pattern = 'hat'
constant = .5

results = []
for i in match_text:
    if jaro_similarity(i, pattern) > constant:
        results.append(i)

print(results)
added 427 characters in body
Source Link
auden
  • 441
  • 2
  • 12
Loading
Source Link
auden
  • 441
  • 2
  • 12
Loading