Skip to main content
replaced http://codereview.stackexchange.com/ with https://codereview.stackexchange.com/
Source Link

The whole question is Simplified regular expression engineSimplified regular expression engine. I have solved the question, and in turn felt the need to get it reviewed. Any suggestions for clean up and optimization would help. Also please verify my complexity: Complexity: O (n), where n is the length of the longer or regex or string.

The whole question is Simplified regular expression engine. I have solved the question, and in turn felt the need to get it reviewed. Any suggestions for clean up and optimization would help. Also please verify my complexity: Complexity: O (n), where n is the length of the longer or regex or string.

The whole question is Simplified regular expression engine. I have solved the question, and in turn felt the need to get it reviewed. Any suggestions for clean up and optimization would help. Also please verify my complexity: Complexity: O (n), where n is the length of the longer or regex or string.

Rollback to Revision 5
Source Link
ChrisWue
  • 20.6k
  • 4
  • 43
  • 107

The whole question is Simplified regular expression engine. I have solved the question, and in turn felt the need to get it reviewed. Any suggestions for clean up and optimization would help. Also please verify my complexity: Complexity: O (nmn), where n is the length of the longer or regex and m of theor string.

public final class Regex {

private static final char STAR = '*';
private static final char DOTSTAR = '.';

private Regex() {};

/**
 * The index of first non-star character following first occurence of star
 * eg: if regex is a**b, then index is startIndex is 1 and return value is 3 (index of b)
 * 
 * @param regex         : the regex string.
 * @param firstStarIndex     : index of the first occurene of * after a non *, eg: a**b, then starIndex would be 1'*';
 * @return              : the index of first non-star character following first occurence of star
 */
private static final int getNonStarIndex(final String regex, final int firstStarIndex) {
    assertchar regexDOT != null;
    assert firstStarIndex >= 0 && firstStarIndex < regex'.length();';
    
    private Regex() {};
    
    /**
     * The index of first non-star character following first occurence of star
     * eg: if regex is a**b, then index is startIndex is 1 and return value is 3 (index of b)
     * 
     * @param regex         : the regex string.
     * @param starIndex     : index of the first occurene of * after a non *, eg: a**b, then starIndex would be 1
     * @return              : the index of first non-star character following first occurence of star
     */
    private static int getNonStarIndex(String regex, int starIndex) {
        assert regex != null;
        assert starIndex >= 0 && starIndex < regex.length();
        
        for (int k = firstStarIndex;starIndex + 1; k < regex.length(); k++) {
            if (regex.charAt(k) != '*') {
                return k;
            }
        }
        return regex.length();
    }
    return regex.length();
}

    /**
     * A * means 0 or more number of any char matches
     * A . means just a single any char match.
     * 
     * @param regex  : The regex to match the string again
     * @param str    : The string to be matched.
     * @return       : true / false
     */
    public static final boolean matchesmatch(String regex, String str) {
    return matcher   if (regex, 0, str,== 0null);
}

private static finalthrow booleannew matcherNullPointerException(final String"The regex, intcannot regexPos,be null");
 final String str, int strPos) {
  if (str assert== regexPosnull) >=throw 0new &&NullPointerException("The regexPosstr <=cannot regex.length(be null");
    assert strPos >= 0 && 
 strPos <= str.length(); // it should break whenint itsi === strPos,0;
 > strpos is an impossible condition.  int j = 0;
        
        while ((regexPosi < regex.length() && strPosj < str.length())) {
            if (regex.charAt(regexPosi) == str.charAt(strPosj) || regex.charAt(regexPosi) == DOT) {
            regexPos++; 
       i++; j++;
    strPos++;
        }  else if (regex.charAt(regexPosi) == STAR) {
                int nextNonStarIndex = getNonStarIndex(regex, regexPosi);
                /*
                 * For a case where regex and string, both have not yet reached the last char but regex is all *'s till end.
                 * Eg:  Regext abc*** && String is abcxyz
                 */
                if (nextNonStarIndex == regex.length()) return true;
              
            while    /*
                 * regex: a*.*c, abc
                 */
                if (strPosregex.charAt(nextNonStarIndex) == DOT) {i = nextNonStarIndex + 1; j++; continue;}
              
                boolean match = false;
                for (int k = j; k < str.length(); k++) {
                    if (matcher(regex, .charAt(nextNonStarIndex,) == str.charAt(k)) {
                            // now move to the next char, strPos++)after a match succeeds with first char after adjacent stars.
                            i = nextNonStarIndex + 1;
                            j = k + 1;
                            match = true;
                            break;
                    }
                }
                if (!match) {
                    return true;false;
                }
            } else {
                return false;
            }
        }
        
        /*
         * String has reached the end but not the regex.
         * regx = "abc**" and  str  = "abc";
         */
        if (i != regex.length()) {
            for (int x = i; x < regex.length() ; x++) {
                if (regex.charAt(x) != STAR) return false;
            } 
 else       }
        
        if (j != str.length()) {
            return false;
        }
    }
    
    if (strPos != str.length()) return false;

    // regex: abcd, str: abc should be false,  while regex: abc** and abc, should print true.
    return  getNonStarIndex(regex, regexPos) == regex.length();
}

public static void main(String[] args) {
    
    // Success cases
    System.out.println(Regex.matches("", ""));
    System.out.println(Regex.matches("***", ""));
  
    String[] regexList = { "abc****", "abc", "a*b*c", "****abc", "**a**b**c****", "abc*"};
    String str1 = "abc";
    
    for (String regex : regexList) {
        System.out.println(Regex.matches(regex, str1));
    }
    
    String regex = "abc****";
    String[] strList1 = {"abcxyz", "abcx", "abc"};
    for (String str : strList1) {
        System.out.println(Regex.matches(regex, str));
    }
    
    regex = "***abc";
    String[] strList2 = {"xyzabc", "xabc", "abc"};
    for (String str : strList2) {
        System.out.println(Regex.matches(regex, str));true;
    }
    
    System.out.println(Regex.matches("a.c", "abc"));
 public static void System.out.println(Regex.matchesmain("a*.*c",String[] "abc")args);
    System.out.println(Regex.matches("a*.b.*c", "axxbxxbxxc"));{
      
    // Fail cases.
  String[] regexList = { "abc****", "abc", "a*b*c", "****abc", "**a**b**c****", "abc*"};
    System.out.println("Should be false from here");String str1 = "abc";
        
    System.out.println(Regex.matches    for ("abc",String "abcd")regex : regexList); {
            System.out.println(Regex.matchesmatch("*a"regex, "abcd"str1));
    System.out.println(Regex.matches("a", ""));   }
    System.out.println(Regex.matches(".a*c", "abc"));   
    System.out.println(Regex.matches("a.*b"    
        String regex = "abc****";
        String[] strList1 = {"abcxyz", "abcx", "abc"))};
    System.out.println(Regex.matches    for ("..",String "abc")str : strList1); {
            System.out.println(Regex.matchesmatch(""regex, "abc"str));
        }
    try {   
        Regex.matches(nullregex = "***abc";
        String[] strList2 = {"xyzabc", "xabc", "abc")};
    } catch   for (NullPointerExceptionString estr : strList2) {
            System.out.println("ExpectedRegex.match(regex, NPE"str));
    }
    }
    try {   
        System.out.println(Regex.matchesmatch("abc""a.c", null"abc"));
    } catch (NullPointerException e) {
        System.out.println("ExpectedRegex.match("a*.*c", NPE""abc"));
    }
      
}

}

NOTE: this code has been revised from earlier version after a very good review comments of poster - rolfl

The whole question is Simplified regular expression engine. I have solved the question, and in turn felt the need to get it reviewed. Any suggestions for clean up and optimization would help. Also please verify my complexity: Complexity: O (nm), where n is the length of the regex and m of the string.

public final class Regex {

private static final char STAR = '*';
private static final char DOT = '.';

private Regex() {};

/**
 * The index of first non-star character following first occurence of star
 * eg: if regex is a**b, then index is startIndex is 1 and return value is 3 (index of b)
 * 
 * @param regex         : the regex string.
 * @param firstStarIndex     : index of the first occurene of * after a non *, eg: a**b, then starIndex would be 1
 * @return              : the index of first non-star character following first occurence of star
 */
private static final int getNonStarIndex(final String regex, final int firstStarIndex) {
    assert regex != null;
    assert firstStarIndex >= 0 && firstStarIndex < regex.length();
    
    for (int k = firstStarIndex; k < regex.length(); k++) {
        if (regex.charAt(k) != '*') {
            return k;
        }
    }
    return regex.length();
}

/**
 * A * means 0 or more number of any char matches
 * A . means just a single any char match.
 * 
 * @param regex  : The regex to match the string again
 * @param str    : The string to be matched.
 * @return       : true / false
 */
public static final boolean matches(String regex, String str) {
    return matcher(regex, 0, str, 0);
}

private static final boolean matcher(final String regex, int regexPos,  final String str, int strPos) {
    assert regexPos >= 0 && regexPos <= regex.length();
    assert strPos >= 0 && strPos <= str.length(); // it should break when its == strPos, > strpos is an impossible condition.
   
    while ((regexPos < regex.length() && strPos < str.length())) {
        if (regex.charAt(regexPos) == str.charAt(strPos) || regex.charAt(regexPos) == DOT) {
            regexPos++; 
            strPos++;
        }  else if (regex.charAt(regexPos) == STAR) {
            int nextNonStarIndex = getNonStarIndex(regex, regexPos);
            /*
             * For a case where regex and string, both have not yet reached the last char but regex is all *'s till end.
             * Eg:  Regext abc*** && String is abcxyz
             */
            if (nextNonStarIndex == regex.length()) return true;
          
            while (strPos < str.length()) {
                if (matcher(regex, nextNonStarIndex, str, strPos++)) return true;
            }
            return false;
        } else {
            return false;
        }
    }
    
    if (strPos != str.length()) return false;

    // regex: abcd, str: abc should be false,  while regex: abc** and abc, should print true.
    return  getNonStarIndex(regex, regexPos) == regex.length();
}

public static void main(String[] args) {
    
    // Success cases
    System.out.println(Regex.matches("", ""));
    System.out.println(Regex.matches("***", ""));
  
    String[] regexList = { "abc****", "abc", "a*b*c", "****abc", "**a**b**c****", "abc*"};
    String str1 = "abc";
    
    for (String regex : regexList) {
        System.out.println(Regex.matches(regex, str1));
    }
    
    String regex = "abc****";
    String[] strList1 = {"abcxyz", "abcx", "abc"};
    for (String str : strList1) {
        System.out.println(Regex.matches(regex, str));
    }
    
    regex = "***abc";
    String[] strList2 = {"xyzabc", "xabc", "abc"};
    for (String str : strList2) {
        System.out.println(Regex.matches(regex, str));
    }
    
    System.out.println(Regex.matches("a.c", "abc"));
    System.out.println(Regex.matches("a*.*c", "abc"));
    System.out.println(Regex.matches("a*.b.*c", "axxbxxbxxc"));
      
    // Fail cases.
    
    System.out.println("Should be false from here");

    System.out.println(Regex.matches("abc", "abcd"));
    System.out.println(Regex.matches("*a", "abcd"));
    System.out.println(Regex.matches("a", ""));
    System.out.println(Regex.matches(".a*c", "abc"));
    System.out.println(Regex.matches("a.*b", "abc"));
    System.out.println(Regex.matches("..", "abc"));
    System.out.println(Regex.matches("", "abc"));
    
    try {
        Regex.matches(null, "abc");
    } catch (NullPointerException e) {
        System.out.println("Expected NPE");
    }
    
    try {
        Regex.matches("abc", null);
    } catch (NullPointerException e) {
        System.out.println("Expected NPE");
    }
      
}

}

NOTE: this code has been revised from earlier version after a very good review comments of poster - rolfl

The whole question is Simplified regular expression engine. I have solved the question, and in turn felt the need to get it reviewed. Any suggestions for clean up and optimization would help. Also please verify my complexity: Complexity: O (n), where n is the length of the longer or regex or string.

public final class Regex {

    private static final char STAR = '*';
    private static final char DOT = '.';
    
    private Regex() {};
    
    /**
     * The index of first non-star character following first occurence of star
     * eg: if regex is a**b, then index is startIndex is 1 and return value is 3 (index of b)
     * 
     * @param regex         : the regex string.
     * @param starIndex     : index of the first occurene of * after a non *, eg: a**b, then starIndex would be 1
     * @return              : the index of first non-star character following first occurence of star
     */
    private static int getNonStarIndex(String regex, int starIndex) {
        assert regex != null;
        assert starIndex >= 0 && starIndex < regex.length();
        
        for (int k = starIndex + 1; k < regex.length(); k++) {
            if (regex.charAt(k) != '*') {
                return k;
            }
        }
        return regex.length();
    }
    
    /**
     * A * means 0 or more number of any char matches
     * A . means just a single any char match.
     * 
     * @param regex  : The regex to match the string again
     * @param str    : The string to be matched.
     * @return       : true / false
     */
    public static boolean match(String regex, String str) {
        if (regex == null)  throw new NullPointerException("The regex cannot be null");
        if (str == null) throw new NullPointerException("The str cannot be null");
         
        int i = 0;
        int j = 0;
        
        while ((i < regex.length() && j < str.length())) {
            if (regex.charAt(i) == str.charAt(j) || regex.charAt(i) == DOT) {
                i++; j++;
            } else if (regex.charAt(i) == STAR) {
                int nextNonStarIndex = getNonStarIndex(regex, i);
                /*
                 * For a case where regex and string, both have not yet reached the last char but regex is all *'s till end.
                 * Eg:  Regext abc*** && String is abcxyz
                 */
                if (nextNonStarIndex == regex.length()) return true;
              
                /*
                 * regex: a*.*c, abc
                 */
                if (regex.charAt(nextNonStarIndex) == DOT) {i = nextNonStarIndex + 1; j++; continue;}
              
                boolean match = false;
                for (int k = j; k < str.length(); k++) {
                    if (regex.charAt(nextNonStarIndex) == str.charAt(k)) {
                            // now move to the next char, after a match succeeds with first char after adjacent stars.
                            i = nextNonStarIndex + 1;
                            j = k + 1;
                            match = true;
                            break;
                    }
                }
                if (!match) {
                    return false;
                }
            } else {
                return false;
            }
        }
        
        /*
         * String has reached the end but not the regex.
         * regx = "abc**" and  str  = "abc";
         */
        if (i != regex.length()) {
            for (int x = i; x < regex.length() ; x++) {
                if (regex.charAt(x) != STAR) return false;
            } 
        }
        
        if (j != str.length()) {
            return false;
        }
        
        return true;
    }
    
    public static void main(String[] args) {
      
        String[] regexList = { "abc****", "abc", "a*b*c", "****abc", "**a**b**c****", "abc*"};
        String str1 = "abc";
        
        for (String regex : regexList) {
            System.out.println(Regex.match(regex, str1));
        }
        
        
        String regex = "abc****";
        String[] strList1 = {"abcxyz", "abcx", "abc"};
        for (String str : strList1) {
            System.out.println(Regex.match(regex, str));
        }
        
        regex = "***abc";
        String[] strList2 = {"xyzabc", "xabc", "abc"};
        for (String str : strList2) {
            System.out.println(Regex.match(regex, str));
        }
        
        System.out.println(Regex.match("a.c", "abc"));
        
        System.out.println(Regex.match("a*.*c", "abc"));
    }
}
added 139 characters in body
Source Link
JavaDeveloper
  • 8.5k
  • 29
  • 93
  • 162

The whole question is Simplified regular expression engine. I have solved the question, and in turn felt the need to get it reviewed. Any suggestions for clean up and optimization would help. Also please verify my complexity: Complexity: O (nnm), where n is the length of the longer or regex orand m of the string.

public final class Regex {

    private static final char STAR = '*';
    private static final char DOT = '.';

private Regex() {};

/**
 * The index of first non-star character following first occurence of star
 * eg: if regex is a**b, then index is startIndex is 1 and return value is 3 (index of b)
 * 
 * @param regex         : the regex string.
 * @param firstStarIndex     : index of the first occurene of * after a non *, eg: a**b, then starIndex would be 1
 * @return              : the index of first non-star character following first occurence of star
 */
private Regexstatic final int getNonStarIndex(final String regex, final int firstStarIndex) {}
    assert regex != null;
    assert firstStarIndex >= 0 && firstStarIndex < regex.length();
    
    /**
     * The index of first non-star character following first occurence of star
     * eg: if regex is a**b, then index is startIndex is 1 and return value is 3 (index of b)
     * 
     * @param regex         : the regex string.
     * @param starIndex     : index of the first occurene of * after a non *, eg: a**b, then starIndex would be 1
     * @return              : the index of first non-star character following first occurence of star
     */
    private static int getNonStarIndex(String regex, int starIndex) {
        assert regex != null;
        assert starIndex >= 0 && starIndex < regex.length();
        
        for (int k = starIndex + 1;firstStarIndex; k < regex.length(); k++) {
            if (regex.charAt(k) != '*') {
                return k;
            }
        }
        return regex.length();
    }
    
   return regex.length();
}

/**
     * A * means 0 or more number of any char matches
     * A . means just a single any char match.
     * 
     * @param regex  : The regex to match the string again
     * @param str    : The string to be matched.
     * @return       : true / false
     */
    public static final boolean matchmatches(String regex, String str) {
        if (regex == null)  throw newreturn NullPointerExceptionmatcher("The regex, cannot0, bestr, null"0);
   }

private static final boolean matcher(final ifString (strregex, ==int null)regexPos, throw newfinal NullPointerException("TheString str cannot, beint null"strPos);
  {
    assert regexPos 
 >= 0 && regexPos <= regex.length();
   int iassert =strPos 0;
>= 0 && strPos <= str.length(); // it intshould jbreak =when 0;
its == strPos, > strpos is an impossible condition.
     
    while ((iregexPos < regex.length() && jstrPos < str.length())) {
            if (regex.charAt(iregexPos) == str.charAt(jstrPos) || regex.charAt(iregexPos) == DOT) {
            regexPos++;  
   i++; j++;
        strPos++;
        }  else if (regex.charAt(iregexPos) == STAR) {
                int nextNonStarIndex = getNonStarIndex(regex, iregexPos);
                /*
                 * For a case where regex and string, both have not yet reached the last char but regex is all *'s till end.
                 * Eg:  Regext abc*** && String is abcxyz
                 */
                if (nextNonStarIndex == regex.length()) return true;
              
                /*
                 * regex: a*.*c, abc
                 */
                if (regex.charAt(nextNonStarIndex) == DOT) {i = nextNonStarIndex + 1; j++; continue;}
              
                boolean match = false;
                forwhile (int k = j; kstrPos < str.length(); k++) {
                    if (regex.charAt(nextNonStarIndex) == str.charAtmatcher(k)) {
                            // now move to the next charregex, after a match succeeds with first char after adjacent stars.
                            i = nextNonStarIndex + 1;
                            j = k + 1;
                            match = true;
                            break;
                    }
                }
                if (!match) {
                    return false;
                }
            } else {
                return false;
            }
        }
        
        /*
         * String has reached the end but not the regex.
         * regx = "abc**" and , str  = "abc";
         */
        if (i != regex.length()) {
            for (int x = i; x < regex.length() ; x++) {
                if, (regex.charAt(xstrPos++) != STAR) return false;true;
            }
        }
        
        if (j != str.length()) {
            return false;
        } else {
            return false;
        }
        
        return true;
    }
    
    if (strPos != str.length()) return false;

    // regex: abcd, str: abc should be false,  while regex: abc** and abc, should print true.
    return  getNonStarIndex(regex, regexPos) == regex.length();
}

public static void main(String[] args) {
    
    // Success cases
    System.out.println(Regex.matches("", ""));
    System.out.println(Regex.matches("***", ""));
  
    String[] regexList = { "abc****", "abc", "a*b*c", "****abc", "**a**b**c****", "abc*"};
    String str1 = "abc";
    
    for (String regex : regexList) {
        System.out.println(Regex.matches(regex, str1));
    }
    
    String regex = "abc****";
    String[] strList1 = {"abcxyz", "abcx", "abc"};
    for (String str : strList1) {
        System.out.println(Regex.matches(regex, str));
    }
    
    regex = "***abc";
    String[] strList2 = {"xyzabc", "xabc", "abc"};
    for (String str : strList2) {
        System.out.println(Regex.matches(regex, str));
    }
    
    System.out.println(Regex.matches("a.c", "abc"));
    System.out.println(Regex.matches("a*.*c", "abc"));
    System.out.println(Regex.matches("a*.b.*c", "axxbxxbxxc"));
      
        String[] regexList = { "abc****", "abc", "a*b*c", "****abc", "**a**b**c****", "abc*"};
        String str1// =Fail "abc";cases.
        
        for System.out.println(String"Should regexbe :false regexList)from {here");
         
    System.out.println(Regex.matchmatches(regex"abc", str1"abcd"));
        }
        
        
        String regex = "abc****";
        String[] strList1 = {"abcxyz", "abcx"System.out.println(Regex.matches("*a", "abc"}"abcd"));
        for System.out.println(String str :Regex.matches("a", strList1"") {);
            System.out.println(Regex.matchmatches(regex".a*c", str"abc"));
        }
        
        regex = "***abc";
        String[] strList2 = {"xyzabc", "xabc"System.out.println(Regex.matches("a.*b", "abc"}));
        for System.out.println(String str :Regex.matches("..", strList2"abc") {);
            System.out.println(Regex.matchmatches(regex"", str"abc"));
        }
       try {
        System.out.println(Regex.matchmatches("a.c"null, "abc"));
    } catch (NullPointerException e) {
        System.out.println(Regex.match("a*.*c","Expected "abc")NPE");
    }
    
    try {
        Regex.matches("abc", null);
    } catch (NullPointerException e) {
        System.out.println("Expected NPE");
    }
      
}

}

NOTE: this code has been revised from earlier version after a very good review comments of poster - rolfl

The whole question is Simplified regular expression engine. I have solved the question, and in turn felt the need to get it reviewed. Any suggestions for clean up and optimization would help. Also please verify my complexity: Complexity: O (n), where n is the length of the longer or regex or string.

public final class Regex {

    private static final char STAR = '*';
    private static final char DOT = '.';
    
    private Regex() {};
    
    /**
     * The index of first non-star character following first occurence of star
     * eg: if regex is a**b, then index is startIndex is 1 and return value is 3 (index of b)
     * 
     * @param regex         : the regex string.
     * @param starIndex     : index of the first occurene of * after a non *, eg: a**b, then starIndex would be 1
     * @return              : the index of first non-star character following first occurence of star
     */
    private static int getNonStarIndex(String regex, int starIndex) {
        assert regex != null;
        assert starIndex >= 0 && starIndex < regex.length();
        
        for (int k = starIndex + 1; k < regex.length(); k++) {
            if (regex.charAt(k) != '*') {
                return k;
            }
        }
        return regex.length();
    }
    
    /**
     * A * means 0 or more number of any char matches
     * A . means just a single any char match.
     * 
     * @param regex  : The regex to match the string again
     * @param str    : The string to be matched.
     * @return       : true / false
     */
    public static boolean match(String regex, String str) {
        if (regex == null)  throw new NullPointerException("The regex cannot be null");
        if (str == null) throw new NullPointerException("The str cannot be null");
        
         int i = 0;
        int j = 0;
        
        while ((i < regex.length() && j < str.length())) {
            if (regex.charAt(i) == str.charAt(j) || regex.charAt(i) == DOT) {
                i++; j++;
            } else if (regex.charAt(i) == STAR) {
                int nextNonStarIndex = getNonStarIndex(regex, i);
                /*
                 * For a case where regex and string, both have not yet reached the last char but regex is all *'s till end.
                 * Eg:  Regext abc*** && String is abcxyz
                 */
                if (nextNonStarIndex == regex.length()) return true;
              
                /*
                 * regex: a*.*c, abc
                 */
                if (regex.charAt(nextNonStarIndex) == DOT) {i = nextNonStarIndex + 1; j++; continue;}
              
                boolean match = false;
                for (int k = j; k < str.length(); k++) {
                    if (regex.charAt(nextNonStarIndex) == str.charAt(k)) {
                            // now move to the next char, after a match succeeds with first char after adjacent stars.
                            i = nextNonStarIndex + 1;
                            j = k + 1;
                            match = true;
                            break;
                    }
                }
                if (!match) {
                    return false;
                }
            } else {
                return false;
            }
        }
        
        /*
         * String has reached the end but not the regex.
         * regx = "abc**" and  str  = "abc";
         */
        if (i != regex.length()) {
            for (int x = i; x < regex.length() ; x++) {
                if (regex.charAt(x) != STAR) return false;
            }
        }
        
        if (j != str.length()) {
            return false;
        }
        
        return true;
    }
    
    public static void main(String[] args) {
      
        String[] regexList = { "abc****", "abc", "a*b*c", "****abc", "**a**b**c****", "abc*"};
        String str1 = "abc";
        
        for (String regex : regexList) {
            System.out.println(Regex.match(regex, str1));
        }
        
        
        String regex = "abc****";
        String[] strList1 = {"abcxyz", "abcx", "abc"};
        for (String str : strList1) {
            System.out.println(Regex.match(regex, str));
        }
        
        regex = "***abc";
        String[] strList2 = {"xyzabc", "xabc", "abc"};
        for (String str : strList2) {
            System.out.println(Regex.match(regex, str));
        }
        
        System.out.println(Regex.match("a.c", "abc"));
        
        System.out.println(Regex.match("a*.*c", "abc"));
    }
}

The whole question is Simplified regular expression engine. I have solved the question, and in turn felt the need to get it reviewed. Any suggestions for clean up and optimization would help. Also please verify my complexity: Complexity: O (nm), where n is the length of the regex and m of the string.

public final class Regex {

private static final char STAR = '*';
private static final char DOT = '.';

private Regex() {};

/**
 * The index of first non-star character following first occurence of star
 * eg: if regex is a**b, then index is startIndex is 1 and return value is 3 (index of b)
 * 
 * @param regex         : the regex string.
 * @param firstStarIndex     : index of the first occurene of * after a non *, eg: a**b, then starIndex would be 1
 * @return              : the index of first non-star character following first occurence of star
 */
private static final int getNonStarIndex(final String regex, final int firstStarIndex) {
    assert regex != null;
    assert firstStarIndex >= 0 && firstStarIndex < regex.length();
    
    for (int k = firstStarIndex; k < regex.length(); k++) {
        if (regex.charAt(k) != '*') {
            return k;
        }
    }
    return regex.length();
}

/**
 * A * means 0 or more number of any char matches
 * A . means just a single any char match.
 * 
 * @param regex  : The regex to match the string again
 * @param str    : The string to be matched.
 * @return       : true / false
 */
public static final boolean matches(String regex, String str) {
    return matcher(regex, 0, str, 0);
}

private static final boolean matcher(final String regex, int regexPos,  final String str, int strPos) {
    assert regexPos >= 0 && regexPos <= regex.length();
    assert strPos >= 0 && strPos <= str.length(); // it should break when its == strPos, > strpos is an impossible condition.
    
    while ((regexPos < regex.length() && strPos < str.length())) {
        if (regex.charAt(regexPos) == str.charAt(strPos) || regex.charAt(regexPos) == DOT) {
            regexPos++;  
            strPos++;
        }  else if (regex.charAt(regexPos) == STAR) {
            int nextNonStarIndex = getNonStarIndex(regex, regexPos);
            /*
             * For a case where regex and string, both have not yet reached the last char but regex is all *'s till end.
             * Eg:  Regext abc*** && String is abcxyz
             */
            if (nextNonStarIndex == regex.length()) return true;
          
            while (strPos < str.length()) {
                if (matcher(regex, nextNonStarIndex, str, strPos++)) return true;
            }
            return false;
        } else {
            return false;
        }
    }
    
    if (strPos != str.length()) return false;

    // regex: abcd, str: abc should be false,  while regex: abc** and abc, should print true.
    return  getNonStarIndex(regex, regexPos) == regex.length();
}

public static void main(String[] args) {
    
    // Success cases
    System.out.println(Regex.matches("", ""));
    System.out.println(Regex.matches("***", ""));
  
    String[] regexList = { "abc****", "abc", "a*b*c", "****abc", "**a**b**c****", "abc*"};
    String str1 = "abc";
    
    for (String regex : regexList) {
        System.out.println(Regex.matches(regex, str1));
    }
    
    String regex = "abc****";
    String[] strList1 = {"abcxyz", "abcx", "abc"};
    for (String str : strList1) {
        System.out.println(Regex.matches(regex, str));
    }
    
    regex = "***abc";
    String[] strList2 = {"xyzabc", "xabc", "abc"};
    for (String str : strList2) {
        System.out.println(Regex.matches(regex, str));
    }
    
    System.out.println(Regex.matches("a.c", "abc"));
    System.out.println(Regex.matches("a*.*c", "abc"));
    System.out.println(Regex.matches("a*.b.*c", "axxbxxbxxc"));
      
    // Fail cases.
    
    System.out.println("Should be false from here");
 
    System.out.println(Regex.matches("abc", "abcd"));
    System.out.println(Regex.matches("*a", "abcd"));
    System.out.println(Regex.matches("a", ""));
    System.out.println(Regex.matches(".a*c", "abc"));
    System.out.println(Regex.matches("a.*b", "abc"));
    System.out.println(Regex.matches("..", "abc"));
    System.out.println(Regex.matches("", "abc"));
    
    try {
        Regex.matches(null, "abc");
    } catch (NullPointerException e) {
        System.out.println("Expected NPE");
    }
    
    try {
        Regex.matches("abc", null);
    } catch (NullPointerException e) {
        System.out.println("Expected NPE");
    }
      
}

}

NOTE: this code has been revised from earlier version after a very good review comments of poster - rolfl

added 111 characters in body
Source Link
JavaDeveloper
  • 8.5k
  • 29
  • 93
  • 162
Loading
edited tags
Link
Jamal
  • 35.2k
  • 13
  • 134
  • 238
Loading
added 2706 characters in body
Source Link
JavaDeveloper
  • 8.5k
  • 29
  • 93
  • 162
Loading
edited tags
Link
Mathieu Guindon
  • 75.6k
  • 18
  • 195
  • 469
Loading
Source Link
JavaDeveloper
  • 8.5k
  • 29
  • 93
  • 162
Loading