Intro
In this post, I will elaborate on A simple method for compressing white space in text (Java). Here, I have incorporated some advice offered by Chris. Also, this version preserves a single new line character if a white space substring contains some of them.
Code
package io.github.coderodde.text;
import java.util.Objects;
import java.util.Random;
import java.util.stream.Collectors;
/**
* This class provides a linear time method for compressing space
* @author Rodion "rodde" Efremov
* @version 1.1.0 (Oct 31, 2025)
* @since 1.0.0 (Oct 30, 2025)
*/
public final class TextWhitespaceCompressor {
/**
* Compresses the white space in the input text. Single new line characters
* are preserved.
*
* @param text the text to compress.
* @return compressed text.
*/
public static String whitespaceCompress(String text) {
Objects.requireNonNull(text);
int textLength = text.length();
int loIndex = 0;
int hiIndex = textLength - 1;
// Scan empty prefix if any:
for (; loIndex < hiIndex; ++loIndex) {
if (!Character.isWhitespace(text.charAt(loIndex))) {
break;
}
}
// Scan empty suffix is any:
for (; hiIndex > loIndex; --hiIndex) {
if (!Character.isWhitespace(text.charAt(hiIndex))) {
break;
}
}
if (loIndex == hiIndex) {
// The input text is blank:
return "";
}
StringBuilder sb = new StringBuilder();
while (true) {
if (loIndex > hiIndex) {
return sb.toString();
}
char ch1 = text.charAt(loIndex);
if (!Character.isWhitespace(ch1)) {
// ch1 not a white space character, append to builder:
sb.append(ch1);
++loIndex;
} else {
boolean newlineEncountered = false;
while (true) {
char ch2 = text.charAt(loIndex);
if (!Character.isWhitespace(ch2)) {
// The current character is not a white space character,
// stop scanning a white space sequence:
break;
}
if (ch2 == '\n') {
newlineEncountered = true;
}
++loIndex;
}
sb.append((newlineEncountered ? '\n' : ' '));
}
}
}
// Chris's version:
public static String whitespaceCompressV2(String text) {
return text.strip()
.lines()
.map(line -> line.strip().replaceAll("\\s+", " "))
.collect(Collectors.joining("\n"));
}
private static final String[] TEST_STRINGS = {
" hello ",
" hello world \t ",
" hello \n cat \t dog \n ",
};
public static void main(String[] args) {
for (String testString : TEST_STRINGS) {
String s1 = whitespaceCompress(testString);
String s2 = whitespaceCompressV2(testString);
System.out.printf("\"%s\" vs. \"%s\"\n", s1, s2);
}
System.out.println("--- Benchmark ---");
benchmark();
}
private static void benchmark() {
String s = generateRandomString();
long ta = System.currentTimeMillis();
String r1 = whitespaceCompress(s);
long tb = System.currentTimeMillis();
System.out.println(
"rodde's compress in " + (tb - ta) + " milliseconds.");
ta = System.currentTimeMillis();
String r2 = whitespaceCompress(s);
tb = System.currentTimeMillis();
System.out.println(
"Chris's compress in " + (tb - ta) + " milliseconds.");
System.out.println("Equals: " + (r1.equals(r2)));
}
private static String generateRandomString() {
final int N = 100_000_000;
Random random = new Random();
StringBuilder sb = new StringBuilder(N);
for (int i = 0; i < N; ++i) {
if (random.nextDouble() < 0.35) {
sb.append(' ');
} else {
sb.append((char) random.nextInt());
}
}
return sb.toString();
}
}
Typical output
"hello" vs. "hello"
"hello world" vs. "hello world"
"hello
cat dog" vs. "hello
cat dog"
--- Benchmark ---
rodde's compress in 1929 milliseconds.
Chris's compress in 2106 milliseconds.
Equals: true
As you can see, my version is no slower than Chris's.
Critique request
As always, I would like to hear some constructive commentary on my work.