Skip to main content
Bounty Awarded with 50 reputation awarded by Simd
Fix issue with initial state not being marked as accepting when k=n
Source Link
gsitcia
  • 3.5k
  • 9
  • 14

Python, 34413453 bytes

import random

def skip():pass

class State:
    states = []
    def __init__(self, accepting=False):
        self.n = len(State.states)
        State.states.append(self)
        self.accepting = accepting
        self.transitions = {}
    def __repr__(self):
        return f'q_{self.n}'
    def add(self, c, s):
        self.transitions.setdefault(c, set()).add(s)
    def __eq__(self, o):
        return self.n == o.n
    def __hash__(self):
        return hash((State, self.n))
    def epsilon_helper(self):
        z = {self}
        if '' in self.transitions:
            for s in self.transitions.pop(''):
                z.update(s.epsilon_helper())
        self.epsilon_helper = lambda:z
        return z 
    def remove_epsilon(self):
        self.remove_epsilon = skip
        for a in self.epsilon_helper():
            a.remove_epsilon()
            for x, S in a.transitions.items():
                for b in list(S):
                    b.remove_epsilon()
                    for c in b.epsilon_helper():
                        self.add(x, c)
                        c.remove_epsilon()
    def freeze(self):
        self.freeze = lambda:None
        for c, s in self.transitions.items():
            for x in s:
                x.freeze()
            self.transitions[c] = frozenset(s)
    def count_accepting(self):
        x = self.accepting + sum(s.count_accepting() for S in self.transitions.values() for s in S)
        self.count_accepting = lambda:x
        return x
    def accept(self, w):
        if w == '': return self.accepting
        return any(s.accept(w[1:]) for s in self.transitions.get(w[0],()))
    def pick(self, i):
        if self.accepting:
            if i == 0: return ''
            i -= 1
        for j, s in self.transitions.items():
            s = min(s)
            x = s.count_accepting()
            if i < x:
                return j + s.pick(i)
            i -= x
        raise IndexError

def levenshtein(w, n):
    A = set(w)
    W = len(w)
    states = [[State(j <= i) for _j in w] + [Staterange(Truelen(w),-1,-1)] for _i in range(n+1n,-1,-1)]
    for i, c in enumerate(w):
        for j in range(n):
            s = states[j][i]
            for x in A:
                s.add(x, states[j+1][i])
                if x != c:
                    s.add(x, states[j+1][i+1])
            s.add('', states[j+1][i+1])
            s.add(c, states[j][i+1])
        states[n][i].add(c, states[n][i+1])
    for j in range(n):
        s = states[j][W]
        for x in A:
            s.add(x, states[j+1][W])
    s = states[0][0]
    s.remove_epsilon()
    s.freeze()
    return s

def union(states):
    d = {}
    accepting = False
    for s in states:
        for c, S in s.transitions.items():
            d[c] = d.get(c, frozenset()) | S
        if s.accepting:
            accepting = True
    return d, accepting

def difference(q0, q1, D = None):
    if D is None:
        return difference(frozenset([q0]), frozenset([q1]), {})
    if (q0, q1) in D:
        return D[q0, q1]
    d0, a0 = union(q0)
    d1, a1 = union(q1)
    Q0 = State(a0 and not a1)
    D[q0, q1] = Q0
    for i in set(d0) | set(d1):
        Q0.transitions[i] = frozenset([difference(d0.get(i, frozenset()), d1.get(i, frozenset()), D)])
    return Q0

def exact(w, n):
    if n == 0: raise ValueError
    return difference(levenshtein(w, n), levenshtein(w, n-1))

def f(w, n):
    s = exact(w, n)
    return s.pick(random.randrange(s.count_accepting()))

Attempt This Online!Attempt This Online!

Python, 3441 bytes

import random

def skip():pass

class State:
    states = []
    def __init__(self, accepting=False):
        self.n = len(State.states)
        State.states.append(self)
        self.accepting = accepting
        self.transitions = {}
    def __repr__(self):
        return f'q_{self.n}'
    def add(self, c, s):
        self.transitions.setdefault(c, set()).add(s)
    def __eq__(self, o):
        return self.n == o.n
    def __hash__(self):
        return hash((State, self.n))
    def epsilon_helper(self):
        z = {self}
        if '' in self.transitions:
            for s in self.transitions.pop(''):
                z.update(s.epsilon_helper())
        self.epsilon_helper = lambda:z
        return z 
    def remove_epsilon(self):
        self.remove_epsilon = skip
        for a in self.epsilon_helper():
            a.remove_epsilon()
            for x, S in a.transitions.items():
                for b in list(S):
                    b.remove_epsilon()
                    for c in b.epsilon_helper():
                        self.add(x, c)
                        c.remove_epsilon()
    def freeze(self):
        self.freeze = lambda:None
        for c, s in self.transitions.items():
            for x in s:
                x.freeze()
            self.transitions[c] = frozenset(s)
    def count_accepting(self):
        x = self.accepting + sum(s.count_accepting() for S in self.transitions.values() for s in S)
        self.count_accepting = lambda:x
        return x
    def accept(self, w):
        if w == '': return self.accepting
        return any(s.accept(w[1:]) for s in self.transitions.get(w[0],()))
    def pick(self, i):
        if self.accepting:
            if i == 0: return ''
            i -= 1
        for j, s in self.transitions.items():
            s = min(s)
            x = s.count_accepting()
            if i < x:
                return j + s.pick(i)
            i -= x
        raise IndexError

def levenshtein(w, n):
    A = set(w)
    W = len(w)
    states = [[State() for _ in w] + [State(True)] for _ in range(n+1)]
    for i, c in enumerate(w):
        for j in range(n):
            s = states[j][i]
            for x in A:
                s.add(x, states[j+1][i])
                if x != c:
                    s.add(x, states[j+1][i+1])
            s.add('', states[j+1][i+1])
            s.add(c, states[j][i+1])
        states[n][i].add(c, states[n][i+1])
    for j in range(n):
        s = states[j][W]
        for x in A:
            s.add(x, states[j+1][W])
    s = states[0][0]
    s.remove_epsilon()
    s.freeze()
    return s

def union(states):
    d = {}
    accepting = False
    for s in states:
        for c, S in s.transitions.items():
            d[c] = d.get(c, frozenset()) | S
        if s.accepting:
            accepting = True
    return d, accepting

def difference(q0, q1, D = None):
    if D is None:
        return difference(frozenset([q0]), frozenset([q1]), {})
    if (q0, q1) in D:
        return D[q0, q1]
    d0, a0 = union(q0)
    d1, a1 = union(q1)
    Q0 = State(a0 and not a1)
    D[q0, q1] = Q0
    for i in set(d0) | set(d1):
        Q0.transitions[i] = frozenset([difference(d0.get(i, frozenset()), d1.get(i, frozenset()), D)])
    return Q0

def exact(w, n):
    if n == 0: raise ValueError
    return difference(levenshtein(w, n), levenshtein(w, n-1))

def f(w, n):
    s = exact(w, n)
    return s.pick(random.randrange(s.count_accepting()))

Attempt This Online!

Python, 3453 bytes

import random

def skip():pass

class State:
    states = []
    def __init__(self, accepting=False):
        self.n = len(State.states)
        State.states.append(self)
        self.accepting = accepting
        self.transitions = {}
    def __repr__(self):
        return f'q_{self.n}'
    def add(self, c, s):
        self.transitions.setdefault(c, set()).add(s)
    def __eq__(self, o):
        return self.n == o.n
    def __hash__(self):
        return hash((State, self.n))
    def epsilon_helper(self):
        z = {self}
        if '' in self.transitions:
            for s in self.transitions.pop(''):
                z.update(s.epsilon_helper())
        self.epsilon_helper = lambda:z
        return z 
    def remove_epsilon(self):
        self.remove_epsilon = skip
        for a in self.epsilon_helper():
            a.remove_epsilon()
            for x, S in a.transitions.items():
                for b in list(S):
                    b.remove_epsilon()
                    for c in b.epsilon_helper():
                        self.add(x, c)
                        c.remove_epsilon()
    def freeze(self):
        self.freeze = lambda:None
        for c, s in self.transitions.items():
            for x in s:
                x.freeze()
            self.transitions[c] = frozenset(s)
    def count_accepting(self):
        x = self.accepting + sum(s.count_accepting() for S in self.transitions.values() for s in S)
        self.count_accepting = lambda:x
        return x
    def accept(self, w):
        if w == '': return self.accepting
        return any(s.accept(w[1:]) for s in self.transitions.get(w[0],()))
    def pick(self, i):
        if self.accepting:
            if i == 0: return ''
            i -= 1
        for j, s in self.transitions.items():
            s = min(s)
            x = s.count_accepting()
            if i < x:
                return j + s.pick(i)
            i -= x
        raise IndexError

def levenshtein(w, n):
    A = set(w)
    W = len(w)
    states = [[State(j <= i) for j in range(len(w),-1,-1)] for i in range(n,-1,-1)]
    for i, c in enumerate(w):
        for j in range(n):
            s = states[j][i]
            for x in A:
                s.add(x, states[j+1][i])
                if x != c:
                    s.add(x, states[j+1][i+1])
            s.add('', states[j+1][i+1])
            s.add(c, states[j][i+1])
        states[n][i].add(c, states[n][i+1])
    for j in range(n):
        s = states[j][W]
        for x in A:
            s.add(x, states[j+1][W])
    s = states[0][0]
    s.remove_epsilon()
    s.freeze()
    return s

def union(states):
    d = {}
    accepting = False
    for s in states:
        for c, S in s.transitions.items():
            d[c] = d.get(c, frozenset()) | S
        if s.accepting:
            accepting = True
    return d, accepting

def difference(q0, q1, D = None):
    if D is None:
        return difference(frozenset([q0]), frozenset([q1]), {})
    if (q0, q1) in D:
        return D[q0, q1]
    d0, a0 = union(q0)
    d1, a1 = union(q1)
    Q0 = State(a0 and not a1)
    D[q0, q1] = Q0
    for i in set(d0) | set(d1):
        Q0.transitions[i] = frozenset([difference(d0.get(i, frozenset()), d1.get(i, frozenset()), D)])
    return Q0

def exact(w, n):
    if n == 0: raise ValueError
    return difference(levenshtein(w, n), levenshtein(w, n-1))

def f(w, n):
    s = exact(w, n)
    return s.pick(random.randrange(s.count_accepting()))

Attempt This Online!

Source Link
gsitcia
  • 3.5k
  • 9
  • 14

Python, 3441 bytes

import random

def skip():pass

class State:
    states = []
    def __init__(self, accepting=False):
        self.n = len(State.states)
        State.states.append(self)
        self.accepting = accepting
        self.transitions = {}
    def __repr__(self):
        return f'q_{self.n}'
    def add(self, c, s):
        self.transitions.setdefault(c, set()).add(s)
    def __eq__(self, o):
        return self.n == o.n
    def __hash__(self):
        return hash((State, self.n))
    def epsilon_helper(self):
        z = {self}
        if '' in self.transitions:
            for s in self.transitions.pop(''):
                z.update(s.epsilon_helper())
        self.epsilon_helper = lambda:z
        return z 
    def remove_epsilon(self):
        self.remove_epsilon = skip
        for a in self.epsilon_helper():
            a.remove_epsilon()
            for x, S in a.transitions.items():
                for b in list(S):
                    b.remove_epsilon()
                    for c in b.epsilon_helper():
                        self.add(x, c)
                        c.remove_epsilon()
    def freeze(self):
        self.freeze = lambda:None
        for c, s in self.transitions.items():
            for x in s:
                x.freeze()
            self.transitions[c] = frozenset(s)
    def count_accepting(self):
        x = self.accepting + sum(s.count_accepting() for S in self.transitions.values() for s in S)
        self.count_accepting = lambda:x
        return x
    def accept(self, w):
        if w == '': return self.accepting
        return any(s.accept(w[1:]) for s in self.transitions.get(w[0],()))
    def pick(self, i):
        if self.accepting:
            if i == 0: return ''
            i -= 1
        for j, s in self.transitions.items():
            s = min(s)
            x = s.count_accepting()
            if i < x:
                return j + s.pick(i)
            i -= x
        raise IndexError

def levenshtein(w, n):
    A = set(w)
    W = len(w)
    states = [[State() for _ in w] + [State(True)] for _ in range(n+1)]
    for i, c in enumerate(w):
        for j in range(n):
            s = states[j][i]
            for x in A:
                s.add(x, states[j+1][i])
                if x != c:
                    s.add(x, states[j+1][i+1])
            s.add('', states[j+1][i+1])
            s.add(c, states[j][i+1])
        states[n][i].add(c, states[n][i+1])
    for j in range(n):
        s = states[j][W]
        for x in A:
            s.add(x, states[j+1][W])
    s = states[0][0]
    s.remove_epsilon()
    s.freeze()
    return s

def union(states):
    d = {}
    accepting = False
    for s in states:
        for c, S in s.transitions.items():
            d[c] = d.get(c, frozenset()) | S
        if s.accepting:
            accepting = True
    return d, accepting

def difference(q0, q1, D = None):
    if D is None:
        return difference(frozenset([q0]), frozenset([q1]), {})
    if (q0, q1) in D:
        return D[q0, q1]
    d0, a0 = union(q0)
    d1, a1 = union(q1)
    Q0 = State(a0 and not a1)
    D[q0, q1] = Q0
    for i in set(d0) | set(d1):
        Q0.transitions[i] = frozenset([difference(d0.get(i, frozenset()), d1.get(i, frozenset()), D)])
    return Q0

def exact(w, n):
    if n == 0: raise ValueError
    return difference(levenshtein(w, n), levenshtein(w, n-1))

def f(w, n):
    s = exact(w, n)
    return s.pick(random.randrange(s.count_accepting()))

Attempt This Online!

My attempt at implementing user1502040's idea of sampling the DFA.

The algorithm runs in basically three stages:

  • First it generates NFAs for s,k and s,k-1 (where each accepts words with at most k or k-1 errors respectively)
    • The NFAs have \$O(|s|\cdot k)\$ states, and building them takes time proportional to that (also proportional to the size of the alphabet)
  • Then it uses the algorithm outlined in this cs.se answer to find a DFA for the difference of two NFAs
    • This takes the majority of the time, but I'm not sure of the actual complexity. Given that the algorithm is basically the powerset construction, it's worst case exponential, but it appears to do better than that. (I found an algorithm that can find a DFA for a given word with <=k errors that takes time linear with respect to the length of the word (for fixed k). See Schulz and Mihov (2002)).
    • I tried graphing the number of states in the DFA for s="1111011012" and k from 1 to 30 and it appeared linear (especially for k>10), but I also tried s="the quick brown fox jumps over the lazy dog" and k from 1 to 7 and it appeared exponential. **
  • It counts the number of states, and picks a random index and chooses the path with that index in a preorder traversal of the DFA. (am I using my jargon right?)
    • This takes time linear in the size of the DFA (using dynamic programming).

In practice, the program appears to run quite quickly; it takes about 5 seconds for s="the quick brown fox jumps over the lazy dog" and k=5.

** For some reason the image feature isn't working for me, so here is the data: [35, 101, 228, 436, 722, 1080, 1464, 1833, 2184, 2519, 2844, 3162, 3480, 3798, 4116, 4434, 4752, 5070, 5388, 5706, 6024, 6342, 6660, 6978, 7296, 7614, 7932, 8250, 8568] and [172, 669, 2579, 10226, 40094, 155722, 606044]