Revisions to Word-Ladder solver in Python 3 - Code Review Stack Exchange

added 54 characters in body

Source Link

edited Jun 15, 2019 at 10:07

29.4k
3
49
98

"""Word-Ladder Solver.

blah blah blah
and blah
"""


import sys
import enum
import argparse
import itertools
from collections import deque


class Mode(enum.Enum):
    SWAP = 'swap-only'
    ADD_REM_SWAP = 'add-remove-swap'


class Node:
    """Node of a Tree"""
    def __init__(self, value, parent=None):
        self.value = value
        self.parent = parent

    def __iter__(self):
        if self.parent is not None:
            yield from reversed(list(reversed(self))).parent
        yield self.value

    def __reversed__(self):
        node = self
        while node is not None:
            yield node.value
            node = node.parent


def command_line_parser():
    parser = argparse.ArgumentParser(
            description=__doc__,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('word', nargs='+')
    parser.add_argument('final_word')
    parser.add_argument(
            '-m', '--mode', type=Mode,
            choices=[m.value for m in Mode], default=Mode.SWAP,
            help='mode of operation to use')
    parser.add_argument(
            '-d', '--dictionnary', '--words-file',
            metavar='PATH', default='/usr/share/dict/words',
            help='path to the list of words to search from')
    return parser


def pairwise(iterable):
    a, b = itertools.tee(iterable)
    next(b, None)
    yield from zip(a, b)


def hamming(s1, s2):
    return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))


def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]


def read_words(filename):
    with open(filename) as f:
        return set(map(str.lower, filter(str.isalpha, map(str.strip, f))))


def find_word_ladder(source, target, words, distance):
    checked = set()
    candidates = deque([Node(source)])

    while candidates:
        node = candidates.popleft()
        candidate = node.value
        if candidate == target:
            return node

        if candidate not in checked:
            checked.add(candidate)
            candidates.extend(
                    Node(word, node)
                    for word in words
                    if distance(word, candidate) == 1)


def main(targets, words, mode):
    if mode is Mode.SWAP:
        distance = hamming
    elif mode is Mode.ADD_REM_SWAP:
        distance = levenshtein
    else:
        return

    for source, target in pairwise(targets):
        if source not in words:
            sys.exit('unknown word in dictionnary: {}'.format(source))
        if target not in words:
            sys.exit('unknown word in dictionnary: {}'.format(target))
        chain = find_word_ladder(source, target, words, distance)
        print(list(chain))


if __name__ == '__main__':
    parser = command_line_parser()
    args = parser.parse_args()

    try:
        words = read_words(args.dictionnary)
    except OSError as e:
        parser.error('unable to read words file: {}'.format(e))

    if args.mode is Mode.SWAP:
        length = len(args.final_word)
        words = {w for w in words if len(w) == length}

    targets = args.word
    targets.append(args.final_word)
    main(targets, words, args.mode)

"""Word-Ladder Solver.

blah blah blah
and blah
"""


import sys
import enum
import argparse
import itertools
from collections import deque


class Mode(enum.Enum):
    SWAP = 'swap-only'
    ADD_REM_SWAP = 'add-remove-swap'


class Node:
    """Node of a Tree"""
    def __init__(self, value, parent=None):
        self.value = value
        self.parent = parent

    def __iter__(self):
        yield from reversed(list(reversed(self)))

    def __reversed__(self):
        node = self
        while node is not None:
            yield node.value
            node = node.parent


def command_line_parser():
    parser = argparse.ArgumentParser(
            description=__doc__,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('word', nargs='+')
    parser.add_argument('final_word')
    parser.add_argument(
            '-m', '--mode', type=Mode,
            choices=[m.value for m in Mode], default=Mode.SWAP,
            help='mode of operation to use')
    parser.add_argument(
            '-d', '--dictionnary', '--words-file',
            metavar='PATH', default='/usr/share/dict/words',
            help='path to the list of words to search from')
    return parser


def pairwise(iterable):
    a, b = itertools.tee(iterable)
    next(b, None)
    yield from zip(a, b)


def hamming(s1, s2):
    return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))


def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]


def read_words(filename):
    with open(filename) as f:
        return set(map(str.lower, filter(str.isalpha, map(str.strip, f))))


def find_word_ladder(source, target, words, distance):
    checked = set()
    candidates = deque([Node(source)])

    while candidates:
        node = candidates.popleft()
        candidate = node.value
        if candidate == target:
            return node

        if candidate not in checked:
            checked.add(candidate)
            candidates.extend(
                    Node(word, node)
                    for word in words
                    if distance(word, candidate) == 1)


def main(targets, words, mode):
    if mode is Mode.SWAP:
        distance = hamming
    elif mode is Mode.ADD_REM_SWAP:
        distance = levenshtein
    else:
        return

    for source, target in pairwise(targets):
        if source not in words:
            sys.exit('unknown word in dictionnary: {}'.format(source))
        if target not in words:
            sys.exit('unknown word in dictionnary: {}'.format(target))
        chain = find_word_ladder(source, target, words, distance)
        print(list(chain))


if __name__ == '__main__':
    parser = command_line_parser()
    args = parser.parse_args()

    try:
        words = read_words(args.dictionnary)
    except OSError as e:
        parser.error('unable to read words file: {}'.format(e))

    if args.mode is Mode.SWAP:
        length = len(args.final_word)
        words = {w for w in words if len(w) == length}

    targets = args.word
    targets.append(args.final_word)
    main(targets, words, args.mode)

"""Word-Ladder Solver.

blah blah blah
and blah
"""


import sys
import enum
import argparse
import itertools
from collections import deque


class Mode(enum.Enum):
    SWAP = 'swap-only'
    ADD_REM_SWAP = 'add-remove-swap'


class Node:
    """Node of a Tree"""
    def __init__(self, value, parent=None):
        self.value = value
        self.parent = parent

    def __iter__(self):
        if self.parent is not None:
            yield from self.parent
        yield self.value

    def __reversed__(self):
        node = self
        while node is not None:
            yield node.value
            node = node.parent


def command_line_parser():
    parser = argparse.ArgumentParser(
            description=__doc__,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('word', nargs='+')
    parser.add_argument('final_word')
    parser.add_argument(
            '-m', '--mode', type=Mode,
            choices=[m.value for m in Mode], default=Mode.SWAP,
            help='mode of operation to use')
    parser.add_argument(
            '-d', '--dictionnary', '--words-file',
            metavar='PATH', default='/usr/share/dict/words',
            help='path to the list of words to search from')
    return parser


def pairwise(iterable):
    a, b = itertools.tee(iterable)
    next(b, None)
    yield from zip(a, b)


def hamming(s1, s2):
    return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))


def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]


def read_words(filename):
    with open(filename) as f:
        return set(map(str.lower, filter(str.isalpha, map(str.strip, f))))


def find_word_ladder(source, target, words, distance):
    checked = set()
    candidates = deque([Node(source)])

    while candidates:
        node = candidates.popleft()
        candidate = node.value
        if candidate == target:
            return node

        if candidate not in checked:
            checked.add(candidate)
            candidates.extend(
                    Node(word, node)
                    for word in words
                    if distance(word, candidate) == 1)


def main(targets, words, mode):
    if mode is Mode.SWAP:
        distance = hamming
    elif mode is Mode.ADD_REM_SWAP:
        distance = levenshtein
    else:
        return

    for source, target in pairwise(targets):
        if source not in words:
            sys.exit('unknown word in dictionnary: {}'.format(source))
        if target not in words:
            sys.exit('unknown word in dictionnary: {}'.format(target))
        chain = find_word_ladder(source, target, words, distance)
        print(list(chain))


if __name__ == '__main__':
    parser = command_line_parser()
    args = parser.parse_args()

    try:
        words = read_words(args.dictionnary)
    except OSError as e:
        parser.error('unable to read words file: {}'.format(e))

    if args.mode is Mode.SWAP:
        length = len(args.final_word)
        words = {w for w in words if len(w) == length}

    targets = args.word
    targets.append(args.final_word)
    main(targets, words, args.mode)

added 28 characters in body

Source Link

edited Jun 14, 2019 at 12:38

301_Moved_Permanently

29.4k
3
49
98

"""Word-Ladder Solver.

blah blah blah
and blah
"""


import sys
import enum
import argparse
import itertools
from collections import deque


class Mode(enum.Enum):
    SWAP = 'swap-only'
    ADD_REM_SWAP = 'add-remove-swap'


class Node:
    """Node of a Tree"""
    def __init__(self, value, parent=None):
        self.value = value
        self.parent = parent

    def __iter__(self):
        yield from reversed(list(reversed(self)))

    def __reversed__(self):
        node = self
        while node is not None:
            yield node.value
            node = node.parent


def command_line_parser():
    parser = argparse.ArgumentParser(
            description=__doc__,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('word', nargs='+')
    parser.add_argument('final_word')
    parser.add_argument(
            '-m', '--mode', type=Mode,
            choices=[m.value for m in Mode], default=Mode.SWAP,
            help='mode of operation to use')
    parser.add_argument(
            '-d', '--dictionnary', '--words-file',
            metavar='PATH', default='/usr/share/dict/words',
            help='path to the list of words to search from')
    return parser


def pairwise(iterable):
    a, b = itertools.tee(iterable)
    next(b, None)
    yield from zip(a, b)


def hamming(s1, s2):
    return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))


def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]


def read_words(filename):
    with open(filename) as f:
        return set(map(str.lower, filter(str.isalpha, map(str.strip, f))))


def find_word_ladder(source, target, words, distance):
    checked = set()
    candidates = deque([Node(source)])

    while candidates:
        node = candidates.popleft()
        candidate = node.value
        if candidate == target:
            return node

        if candidate not in checked:
            checked.add(candidate)
            forcandidates.extend(
 word in words:
                if distanceNode(word, candidatenode) 
 == 1:                  for word in words
                    candidates.append(Nodeif distance(word, nodecandidate) == 1)


def main(targets, words, mode):
    if mode is Mode.SWAP:
        distance = hamming
    elif mode is Mode.ADD_REM_SWAP:
        distance = levenshtein
    else:
        return

    for source, target in pairwise(targets):
        if source not in words:
            sys.exit('unknown word in dictionnary: {}'.format(source))
        if target not in words:
            sys.exit('unknown word in dictionnary: {}'.format(target))
        chain = find_word_ladder(source, target, words, distance)
        print(list(chain))


if __name__ == '__main__':
    parser = command_line_parser()
    args = parser.parse_args()

    try:
        words = read_words(args.dictionnary)
    except OSError as e:
        parser.error('unable to read words file: {}'.format(e))

    if args.mode is Mode.SWAP:
        length = len(args.final_word)
        words = {w for w in words if len(w) == length}

    targets = args.word
    targets.append(args.final_word)
    main(targets, words, args.mode)

"""Word-Ladder Solver.

blah blah blah
and blah
"""


import sys
import enum
import argparse
import itertools
from collections import deque


class Mode(enum.Enum):
    SWAP = 'swap-only'
    ADD_REM_SWAP = 'add-remove-swap'


class Node:
    """Node of a Tree"""
    def __init__(self, value, parent=None):
        self.value = value
        self.parent = parent

    def __iter__(self):
        yield from reversed(list(reversed(self)))

    def __reversed__(self):
        node = self
        while node is not None:
            yield node.value
            node = node.parent


def command_line_parser():
    parser = argparse.ArgumentParser(
            description=__doc__,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('word', nargs='+')
    parser.add_argument('final_word')
    parser.add_argument(
            '-m', '--mode', type=Mode,
            choices=[m.value for m in Mode], default=Mode.SWAP,
            help='mode of operation to use')
    parser.add_argument(
            '-d', '--dictionnary', '--words-file',
            metavar='PATH', default='/usr/share/dict/words',
            help='path to the list of words to search from')
    return parser


def pairwise(iterable):
    a, b = itertools.tee(iterable)
    next(b, None)
    yield from zip(a, b)


def hamming(s1, s2):
    return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))


def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]


def read_words(filename):
    with open(filename) as f:
        return set(map(str.lower, filter(str.isalpha, map(str.strip, f))))


def find_word_ladder(source, target, words, distance):
    checked = set()
    candidates = deque([Node(source)])

    while candidates:
        node = candidates.popleft()
        candidate = node.value
        if candidate == target:
            return node

        if candidate not in checked:
            checked.add(candidate)
            for word in words:
                if distance(word, candidate) == 1:
                    candidates.append(Node(word, node))


def main(targets, words, mode):
    if mode is Mode.SWAP:
        distance = hamming
    elif mode is Mode.ADD_REM_SWAP:
        distance = levenshtein
    else:
        return

    for source, target in pairwise(targets):
        if source not in words:
            sys.exit('unknown word in dictionnary: {}'.format(source))
        if target not in words:
            sys.exit('unknown word in dictionnary: {}'.format(target))
        chain = find_word_ladder(source, target, words, distance)
        print(list(chain))


if __name__ == '__main__':
    parser = command_line_parser()
    args = parser.parse_args()

    try:
        words = read_words(args.dictionnary)
    except OSError as e:
        parser.error('unable to read words file: {}'.format(e))

    if args.mode is Mode.SWAP:
        length = len(args.final_word)
        words = {w for w in words if len(w) == length}

    targets = args.word
    targets.append(args.final_word)
    main(targets, words, args.mode)

"""Word-Ladder Solver.

blah blah blah
and blah
"""


import sys
import enum
import argparse
import itertools
from collections import deque


class Mode(enum.Enum):
    SWAP = 'swap-only'
    ADD_REM_SWAP = 'add-remove-swap'


class Node:
    """Node of a Tree"""
    def __init__(self, value, parent=None):
        self.value = value
        self.parent = parent

    def __iter__(self):
        yield from reversed(list(reversed(self)))

    def __reversed__(self):
        node = self
        while node is not None:
            yield node.value
            node = node.parent


def command_line_parser():
    parser = argparse.ArgumentParser(
            description=__doc__,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('word', nargs='+')
    parser.add_argument('final_word')
    parser.add_argument(
            '-m', '--mode', type=Mode,
            choices=[m.value for m in Mode], default=Mode.SWAP,
            help='mode of operation to use')
    parser.add_argument(
            '-d', '--dictionnary', '--words-file',
            metavar='PATH', default='/usr/share/dict/words',
            help='path to the list of words to search from')
    return parser


def pairwise(iterable):
    a, b = itertools.tee(iterable)
    next(b, None)
    yield from zip(a, b)


def hamming(s1, s2):
    return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))


def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]


def read_words(filename):
    with open(filename) as f:
        return set(map(str.lower, filter(str.isalpha, map(str.strip, f))))


def find_word_ladder(source, target, words, distance):
    checked = set()
    candidates = deque([Node(source)])

    while candidates:
        node = candidates.popleft()
        candidate = node.value
        if candidate == target:
            return node

        if candidate not in checked:
            checked.add(candidate)
            candidates.extend(
                    Node(word, node) 
                    for word in words
                    if distance(word, candidate) == 1)


def main(targets, words, mode):
    if mode is Mode.SWAP:
        distance = hamming
    elif mode is Mode.ADD_REM_SWAP:
        distance = levenshtein
    else:
        return

    for source, target in pairwise(targets):
        if source not in words:
            sys.exit('unknown word in dictionnary: {}'.format(source))
        if target not in words:
            sys.exit('unknown word in dictionnary: {}'.format(target))
        chain = find_word_ladder(source, target, words, distance)
        print(list(chain))


if __name__ == '__main__':
    parser = command_line_parser()
    args = parser.parse_args()

    try:
        words = read_words(args.dictionnary)
    except OSError as e:
        parser.error('unable to read words file: {}'.format(e))

    if args.mode is Mode.SWAP:
        length = len(args.final_word)
        words = {w for w in words if len(w) == length}

    targets = args.word
    targets.append(args.final_word)
    main(targets, words, args.mode)

Source Link

answered Jun 14, 2019 at 12:11

301_Moved_Permanently

29.4k
3
49
98

Your Queue class should be replaced by the builtin collections.deque which offers better performances (lists .pop(0) are \$\mathcal{O}(n)\$ since the remainder of the list have to be shifted, but deque.popleft() is \$\mathcal{O}(1)\$).

You should also take the habit of opening files using the with statement to avoid keeping opened file descriptors around:

def read_file(filename='/usr/share/dict/words'):
    with open(filename) as f:
        return set(map(str.lower, map(str.strip, f)))

Note that I return a set to accelerate the search if word not in all_words. You could also bring back the isalpha filter from your previous question:

def read_file(filename='/usr/share/dict/words'):
    with open(filename) as f:
        return set(map(str.lower, filter(str.isalpha, map(str.strip, f))))

Your code would also largely gain from using argparse instead of your various inputs.

And print_words could easily be converted to an __iter__ method on the Node class.

Example improvements:

"""Word-Ladder Solver.

blah blah blah
and blah
"""


import sys
import enum
import argparse
import itertools
from collections import deque


class Mode(enum.Enum):
    SWAP = 'swap-only'
    ADD_REM_SWAP = 'add-remove-swap'


class Node:
    """Node of a Tree"""
    def __init__(self, value, parent=None):
        self.value = value
        self.parent = parent

    def __iter__(self):
        yield from reversed(list(reversed(self)))

    def __reversed__(self):
        node = self
        while node is not None:
            yield node.value
            node = node.parent


def command_line_parser():
    parser = argparse.ArgumentParser(
            description=__doc__,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('word', nargs='+')
    parser.add_argument('final_word')
    parser.add_argument(
            '-m', '--mode', type=Mode,
            choices=[m.value for m in Mode], default=Mode.SWAP,
            help='mode of operation to use')
    parser.add_argument(
            '-d', '--dictionnary', '--words-file',
            metavar='PATH', default='/usr/share/dict/words',
            help='path to the list of words to search from')
    return parser


def pairwise(iterable):
    a, b = itertools.tee(iterable)
    next(b, None)
    yield from zip(a, b)


def hamming(s1, s2):
    return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))


def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]


def read_words(filename):
    with open(filename) as f:
        return set(map(str.lower, filter(str.isalpha, map(str.strip, f))))


def find_word_ladder(source, target, words, distance):
    checked = set()
    candidates = deque([Node(source)])

    while candidates:
        node = candidates.popleft()
        candidate = node.value
        if candidate == target:
            return node

        if candidate not in checked:
            checked.add(candidate)
            for word in words:
                if distance(word, candidate) == 1:
                    candidates.append(Node(word, node))


def main(targets, words, mode):
    if mode is Mode.SWAP:
        distance = hamming
    elif mode is Mode.ADD_REM_SWAP:
        distance = levenshtein
    else:
        return

    for source, target in pairwise(targets):
        if source not in words:
            sys.exit('unknown word in dictionnary: {}'.format(source))
        if target not in words:
            sys.exit('unknown word in dictionnary: {}'.format(target))
        chain = find_word_ladder(source, target, words, distance)
        print(list(chain))


if __name__ == '__main__':
    parser = command_line_parser()
    args = parser.parse_args()

    try:
        words = read_words(args.dictionnary)
    except OSError as e:
        parser.error('unable to read words file: {}'.format(e))

    if args.mode is Mode.SWAP:
        length = len(args.final_word)
        words = {w for w in words if len(w) == length}

    targets = args.word
    targets.append(args.final_word)
    main(targets, words, args.mode)

Example usage:

$ python words_ladder.py -d /usr/share/dict/cracklib-small five four dice
['five', 'fire', 'fore', 'tore', 'torr', 'tour', 'four']
['four', 'tour', 'torr', 'tore', 'tire', 'dire', 'dice']

Stack Exchange Network

Return to Answer