Skip to main content
1 of 1
coderodde
  • 33.6k
  • 15
  • 80
  • 212

Deterministic finite automaton in Java - Take II

(This is the continuation of Deterministic finite automaton in Java.)

(The repository is here.)

This time I have reworked the API and supplied a union DFA construction algorithm for additional fun.

Code

io.github.coderodde.dfa.DFA.java:

package io.github.coderodde.dfa;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Random;
import java.util.Set;

/**
 * This class implements the actual deterministic finite automaton.
 */
public class DFA {
    
    /**
     * The set of accepting states.
     */
    private final Set<Integer> acceptingStates = new HashSet<>();
    
    /**
     * The start state.
     */
    private final int startState;
    
    /**
     * The transition function.
     */
    private final TransitionFunction transitionFunction;

    /**
     * Constructs this DFA.
     * 
     * @param transitionFunction the transition function.
     * @param startState         the start state.
     * @param acceptingStates    the accepting states.
     */
    public DFA(TransitionFunction transitionFunction,
               int startState,
               Set<Integer> acceptingStates) {
        
        this.transitionFunction = new TransitionFunction(
                Objects.requireNonNull(transitionFunction,
                                       "Transition function is null."));
        this.startState = startState;
        
        for (Integer state : acceptingStates) {
            addAcceptingState(state);
        }
    }
    
    /**
     * Returns the start state.
     * 
     * @return the start state.
     */
    public int getStartState() {
        return startState;
    }
    
    /**
     * Returns an unmodifiable view over the accepting states.
     * 
     * @return the accepting states.
     */
    public Set<Integer> getAcceptingStates() {
        return Collections.unmodifiableSet(acceptingStates);
    }
    
    /**
     * Adds the accepting state to this DFA.
     * 
     * @param state the accepting state to add.
     */
    public void addAcceptingState(int state) {
        if (!transitionFunction.getAllStates().contains(state)) {
            throw new IllegalArgumentException(
                    "Trying to add an accepting states that is not present " +
                    "in this DFA: " + state);
        }
        
        acceptingStates.add(state);
    }
    
    /**
     * Processes a single state transition.
     * 
     * @param state  the source state.
     * @param symbol the symbol.
     * 
     * @return the next state or {@code null} if the transition is not found.
     */
    public Integer process(int state, char symbol) {
        return transitionFunction.process(state, symbol);
    }
    
    /**
     * Returns the transition function.
     * 
     * @return the transition function.
     */
    public TransitionFunction getTransitionFunction() {
        return transitionFunction;
    }

    /**
     * Checks whether the input string {@code text} belongs to the regular 
     * language modelled by this DFA.
     * 
     * @param text the text to check.
     * 
     * @return {@code true} if and only if the input string belongs to the 
     *         regular language imposed by this DFA.
     */
    public boolean matches(String text) {
        Integer currentState = startState;

        for (char c : text.toCharArray()) {
            currentState = transitionFunction.process(currentState, c);

            if (currentState == null) {
                return false;
            }
        }

        return acceptingStates.contains(currentState);
    }
    
    /**
     * Computes all unreachable states in this DFA.
     * 
     * @return the set of unreachable states.
     */
    public Set<Integer> getUnreachableStates() {
        Deque<Integer> queue = new ArrayDeque<>();
        Set<Integer> visited = new HashSet<>();
        Set<Integer> allStates = 
                new HashSet<>(transitionFunction.getAllStates());
        
        queue.add(startState);
        
        while (!queue.isEmpty()) {
            int state = queue.removeFirst();
            
            if (visited.contains(state)) {
                continue;
            }
            
            visited.add(state);
            
            Map<Character, Integer> children = 
                    transitionFunction.function.get(state);
            
            if (children == null) {
                continue;
            }
            
            for (int child : children.values()) {
                if (!visited.contains(child)) {
                    queue.addLast(child);
                }
            }
        }
        
        allStates.removeAll(visited);
        return allStates;
    }
    
    /**
     * Adds all missing transitions to this DFA.
     */
    public void addMissingTransiitions() {
        Integer sink = null;
        
        for (int state : transitionFunction.getAllStates()) {
            for (char symbol : transitionFunction.getAlphabet()) {
                if (transitionFunction.process(state, symbol) == null) {
                    if (sink == null) {
                        sink = createRandomSinkState();
                    }
                    
                    transitionFunction.addStateTransition(state,
                                                     sink, 
                                                     symbol);
                }
            }
        }
    }
    
    /**
     * Computes and returns an union DFA composed of this DFA and {@code dfa}.
     * 
     * @param dfa the second DFA.
     * 
     * @return the union of this DFA and {@code dfa}.
     */
    public DFA union(DFA dfa) {
        Set<Character> alphabet = new HashSet<>();
        
        alphabet.addAll(this.getTransitionFunction().getAlphabet());
        alphabet.addAll( dfa.getTransitionFunction().getAlphabet());
        
        DFA dfa1 = this.normalizeAlphabet(alphabet);
        DFA dfa2 =  dfa.normalizeAlphabet(alphabet);
        
        dfa1.addMissingTransiitions();
        dfa2.addMissingTransiitions();
        
        dfa1.pruneUnreachableStates();
        dfa2.pruneUnreachableStates();  
        
        TransitionFunction productTransitionFunction = 
                new TransitionFunction();
        
        Set<Integer> productAcceptingState = new HashSet<>();
        Map<IntegerPair, Integer> m        = new HashMap<>();
        int id = 0;
        
        for (int p : dfa1.getTransitionFunction().getAllStates()) {
            for (int q : dfa2.getTransitionFunction().getAllStates()) {
                m.put(new IntegerPair(p, q), id++);
            }
        }
        
        IntegerPair q0 = new IntegerPair(dfa1.getStartState(),
                                         dfa2.getStartState());
        
        int productStartState = m.get(q0);
        
        for (IntegerPair ip : m.keySet()) {
            int p = ip.first;
            int q = ip.second;
            
            if (dfa1.getAcceptingStates().contains(p) || 
                dfa2.getAcceptingStates().contains(q)) {
                productAcceptingState.add(m.get(ip));
            }
        }
        
        for (IntegerPair ip : m.keySet()) {
            for (char symbol : alphabet) {
                int q = ip.first;
                int p = ip.second;
                
                Integer nq = dfa1.process(q, symbol);
                Integer np = dfa2.process(p, symbol);
                
                if (nq == null || np == null) {
                    continue;
                }
                
                IntegerPair nip = new IntegerPair(nq, np);
                
                productTransitionFunction.addStateTransition(m.get(ip),
                                                        m.get(nip),
                                                        symbol);
            }
        }
        
        return new DFA(
                productTransitionFunction,
                productStartState,
                productAcceptingState);
    }
    
    public DFA normalizeAlphabet(Set<Character> alphabet) {
        
        class Entry {
            
            Entry(int state, int sink, char symbol) {
                this.state  = state;
                this.sink   = sink;
                this.symbol = symbol;
            }
            
            final int state;
            final int sink;
            char symbol;
        }
        
        List<Entry> entries = new ArrayList<>();
        
        TransitionFunction delta = new TransitionFunction(transitionFunction);
        int sink = createRandomSinkState();
        boolean sinkUsed = false;
        
        for (int state : delta.getAllStates()) {
            for (char symbol : alphabet) {
                if (transitionFunction.process(state, symbol) == null) {
                    if (!sinkUsed) {
                        sinkUsed = true;
                    }
                    
                    entries.add(new Entry(state, sink, symbol));
                }
            }
        }
        
        for (Entry e : entries) {
            delta.addStateTransition(e.state, e.sink, e.symbol);
        }
        
        if (sinkUsed) {
            for (char symbol : alphabet) {
                delta.addStateTransition(sink, sink, symbol);
            }
        }
        
        return new DFA(delta, 
                       startState, 
                       acceptingStates);
    }
    
    public void pruneUnreachableStates() {
        Set<Integer> unreachableStates = getUnreachableStates();
        
        for (Integer s : unreachableStates) {
            transitionFunction.function.remove(s);
        }
    }
    
    /** 
     * Creates a random sink state that does not yet appear in this DFA.
     * 
     * @return a random sink state. 
     */
    private Integer createRandomSinkState() {
        Random random = new Random();
        Integer sink;
        
        do {
            sink = random.nextInt();
        } while (transitionFunction.getAllStates().contains(sink));
        
        return sink;
    }

    /**
     * Used for coupling state pairs.
     */
    private static final class IntegerPair {

        public final int first;
        public final int second;
        
        public IntegerPair(int first, int second) {
            this.first = first;
            this.second = second;
        }
        
        @Override
        public int hashCode() {
            return first ^ ~second;
        }
        
        @Override
        public boolean equals(Object object) {
            if (object instanceof IntegerPair other) {
                return first == other.first && second == other.second;
            }
            
            return false;
        }
        
        @Override
        public String toString() {
            return String.format("[%d, %d]", first, second);
        }
    }
}

io.github.coderodde.dfa.TransitionFunction.java:

package io.github.coderodde.dfa;

import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/**
 * This class implements a transition function for DFAs.
 */
public class TransitionFunction {
    
    /**
     * The number of state transitions.
     */
    private int size;

    /**
     * The actual data store for holding the state transitions. Is accessed by 
     * {@link DFA}, therefore package-private.
     */
    final Map<Integer, Map<Character, Integer>> function = new HashMap<>();
    
    /**
     * The current used alphabet.
     */
    private final Set<Character> alphabet = new HashSet<>();
    
    /**
     * The current used states.
     */
    private final Set<Integer> states = new HashSet<>();
    
    /**
     * Constructs an empty transition function with no state transitions.
     */
    public TransitionFunction() {
        
    }
    
    /**
     * Copy constructor.
     * 
     * @param tf the transition function to copy.
     */
    public TransitionFunction(TransitionFunction tf) {
        size = tf.size;
        
        for (Map.Entry<Integer, Map<Character, Integer>> e1 
                : tf.function.entrySet()) {
            
            int state = e1.getKey();
            
            if (!this.function.containsKey(state)) {
                this.function.put(state, new HashMap<>());
            }
            
            for (Map.Entry<Character, Integer> e2 : e1.getValue().entrySet()) {
                this.function.get(state).put(e2.getKey(), e2.getValue());
            }
            
            this.states.addAll(tf.function.get(state).values());
        }
        
        this.states.addAll(tf.function.keySet());
        this.alphabet.addAll(tf.getAlphabet());
    }

    /**
     * Adds a new state transition only if it is not already present.
     * 
     * @param startState the start state of the state transition.
     * @param goalState  the goal state of the state transition.
     * @param character  the target symbol.
     */
    public void addStateTransition(int startState, 
                                   int goalState,
                                   char character) {
        boolean added = 
                alphabet.add(character)
                && states.add(startState)
                && states.add(goalState);
        
        if (function.containsKey(startState)) {
            var mapping = function.get(startState);
            
            if (!mapping.containsKey(character)) {
                added = true;
                mapping.put(character, goalState);
            } else if (!mapping.get(character).equals(goalState)) {
                added = true;
                mapping.put(character, goalState);
            }
        } else {
            added = true;
            
            function.computeIfAbsent(
                    startState, 
                    _ -> new HashMap<>())
                    .put(character, goalState);
        }
        
        if (added) {
            ++size;
        }
    }

    /**
     * Processes a state transition.
     * 
     * @param startState the source start state.
     * @param character  the symbol on which to follow the link.
     * @return the next state or {@code null} if there is no such transition.
     */
    public Integer process(int startState, char character) {
        Map<Character, Integer> m = function.get(startState);
        return m == null ? null : m.get(character);
    }
    
    /**
     * Returns the unmodifiable view of the entire alphabet so far.
     * 
     * @return the alphabet.
     */
    public Set<Character> getAlphabet() {
        return Collections.unmodifiableSet(alphabet);
    }
    
    /**
     * Returns the unmodifiable view of the entire state set so far.
     * 
     * @return the state set. 
     */
    public Set<Integer> getAllStates() {
        return Collections.unmodifiableSet(states);
    }
    
    /**
     * Returns the number of distinct state transitions.
     * 
     * @return the number of state transitions.
     */
    public int numberOfTransitions() {
        return size;
    }
}

io.github.coderodde.dfa.DFATest.java:

package io.github.coderodde.dfa;

import java.util.Set;
import org.junit.Test;
import static org.junit.Assert.*;

public class DFATest {
    
    @Test
    public void getUnreachableStates() {
        TransitionFunction tf = new TransitionFunction();
        tf.addStateTransition(0, 1, 'a');
        tf.addStateTransition(2, 1, 'a');
        tf.addStateTransition(3, 1, 'b');
        tf.addStateTransition(2, 12, 'c');
        tf.addStateTransition(3, 12, 'a');
        DFA dfa = new DFA(tf, 0, Set.of());
        Set<Integer> unreachable = dfa.getUnreachableStates();
        assertEquals(Set.of(2, 3, 12), unreachable);
    }
    
    @Test
    public void normalizeAlphabet() {
        TransitionFunction tf = new TransitionFunction();
        tf.addStateTransition(0, 1, 'a');
        tf.addStateTransition(2, 1, 'a');
        tf.addStateTransition(3, 1, 'b');
        tf.addStateTransition(2, 12, 'c');
        tf.addStateTransition(3, 12, 'a');
        DFA dfa = new DFA(tf, 0, Set.of());
        DFA normalized = dfa.normalizeAlphabet(Set.of('a', 'b', 'c', 'd'));
        
        assertEquals(
                Set.of('a', 'b', 'c', 'd'), 
                normalized.getTransitionFunction().getAlphabet());
    }
    
    @Test
    public void union() {
        TransitionFunction tf1 = new TransitionFunction();
        TransitionFunction tf2 = new TransitionFunction();
        
        tf1.addStateTransition(0, 0, '0');
        tf1.addStateTransition(0, 1, '1');
        tf1.addStateTransition(1, 0, '0');
        tf1.addStateTransition(1, 1, '1');
        
        tf2.addStateTransition(0, 1, '1');
        tf2.addStateTransition(1, 2, '1');
        
        DFA dfa1 = new DFA(tf1, 0, Set.of(1));
        DFA dfa2 = new DFA(tf2, 0, Set.of(2));
        
        DFA union = dfa1.union(dfa2);
        
        assertFalse(union.matches(""));
        assertFalse(union.matches("0"));
        assertFalse(union.matches("00"));
        assertFalse(union.matches("000"));
        
        assertTrue(union.matches("1"));
        assertTrue(union.matches("01"));
        assertTrue(union.matches("01111"));
    }
}

io.github.coderodde.dfa.TransitionFunctionTest.java:

package io.github.coderodde.dfa;

import java.util.Set;
import org.junit.Test;
import static org.junit.Assert.*;

public class TransitionFunctionTest {

    @Test
    public void actualAlphabet() {
        TransitionFunction tf = new TransitionFunction();
        tf.addStateTransition(1, 2, 'a');
        tf.addStateTransition(1, 3, 'b');
        tf.addStateTransition(1, 4, 'c');
        tf.addStateTransition(1, 5, 'b');
        
        assertEquals(Set.of('a', 'b', 'c'), tf.getAlphabet());
    }
}

Critique request

Please, tell me whatever comes to mind.

coderodde
  • 33.6k
  • 15
  • 80
  • 212