I have a large word-file which is fixed with over 240 000 words. I need to check if a specific word exists in this list. I thought that it would be a good idea to create a RadixTree for this problem.
The text file consists of 240 000 lines, each of which contains one word, which is read into a String. Each string is then parsed into a char[] to access the individual characters, each char is then placed into the tree separately.
Android puts a limit of 16-26MB of RAM usage per application if I am correct. When my application runs it consistently hits this mark during the initialization of the large Vocabulary object. It takes around 47 seconds to initialize the Vocabulary object and about 1/10 of a ms to see if a word exists in this Vocabulary. Right now I am afraid that I made a mistake by implementing the RadixTree data-structure since it costs too much RAM and takes way too long with 47 seconds...
Would this every be possible to use on a real device? E.g. that the speed would be under 0.5 seconds instead of 47.
My idea was to load the object to later restore it. However this is even slower, loading an object took about 5x longer. Oh dear.
Vocabulary Class
package nl.mprog.ghost.datastructure;
import android.app.Activity;
import android.content.Context;
import android.util.Log;
import java.io.Serializable;
import nl.mprog.ghost.enumeration.Language;
public class Vocabulary implements Serializable {
RadixTree mRadixTree;
Language mLanguage;
public Vocabulary(Context context, Language language) {
this.mLanguage = language;
mRadixTree = new RadixTree(context, language);
}
public boolean isWord(String word) {
return mRadixTree.isWord(word);
}
RadixTree class
package nl.mprog.ghost.datastructure;
import android.content.Context;
import android.util.Log;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Scanner;
import nl.mprog.ghost.R;
import nl.mprog.ghost.enumeration.Language;
public class RadixTree implements Serializable {
private RadixTreeNode mRoot = new RadixTreeNode();
private ArrayList<RadixTreeNode> mActiveNodes;
public RadixTree(Context context, Language language) {
readInWords(getFileScanner(context, language));
}
public void readInWords(Scanner scanner) {
String word;
while(scanner.hasNextLine()) {
word = scanner.nextLine();
mRoot.insertWord(word, mRoot);
}
scanner.close();
}
public Scanner getFileScanner(Context context, Language language) {
Scanner scanner;
switch (language) {
case DUTCH:
scanner = new Scanner(context.getResources().openRawResource(R.raw.dutch));
break;
case ENGLISH:
scanner = new Scanner(context.getResources().openRawResource(R.raw.english));
break;
default:
scanner = new Scanner(context.getResources().openRawResource(R.raw.english));
break;
}
return scanner;
}
public boolean isWord(String word) {
RadixTreeNode activeNode = mRoot;
char[] chars = word.toCharArray();
for(int i = 0; i < chars.length; i++) {
activeNode = activeNode.findNode(activeNode, chars[i]);
if(activeNode == null) return false;
}
if(activeNode.mIsWordEnd) {
return true;
} else {
return false;
}
}
}
RadixTreeNode class
import android.util.Log;
import java.io.Serializable;
public class RadixTreeNode implements Serializable {
RadixTreeNode[] mChildren = new RadixTreeNode[0];
char mCharacter = '0';
boolean mIsWordEnd;
public RadixTreeNode() {
}
public RadixTreeNode(char character) {
this.mCharacter = character;
}
public RadixTreeNode(char character, boolean isWordEnd) {
this.mCharacter = character;
this.mIsWordEnd = isWordEnd;
}
public void insert(char character) {
for(RadixTreeNode node : mChildren) {
if(node.mCharacter == character) {
return;
}
}
addChild(new RadixTreeNode(character));
}
public void insert(char character, boolean isWordEnd) {
for(RadixTreeNode node : mChildren) {
if(node.mCharacter == character) {
return;
}
}
addChild(new RadixTreeNode(character, isWordEnd));
}
private void addChild(RadixTreeNode node) {
if(mChildren.length == 0) {
mChildren = new RadixTreeNode[1];
mChildren[0] = node;
} else {
RadixTreeNode[] temp = new RadixTreeNode[mChildren.length + 1];
System.arraycopy(mChildren, 0, temp, 0, mChildren.length);
temp[mChildren.length] = node;
mChildren = temp;
}
}
public RadixTreeNode[] getChildren() {
return mChildren;
}
public void insertWord(String word, RadixTreeNode root) {
RadixTreeNode activeNode = root;
char[] array = word.toCharArray();
for(int i = 0; i < (array.length - 1); i++) {
activeNode.insert(array[i]);
activeNode = findNode(activeNode, array[i]);
}
activeNode.insert(array[array.length - 1], true);
}
public RadixTreeNode findNode(RadixTreeNode startPosition, char character) {
RadixTreeNode[] children = startPosition.getChildren();
if(children.length == 0) {
return null;
} else {
for(RadixTreeNode rtn : children) {
if(rtn.mCharacter == character) return rtn;
}
}
return null;
}
}