Skip to main content
added 2 characters in body
Source Link
toolic
  • 11k
  • 4
  • 25
  • 156
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
  static class LineCounts {
    final ArrayList<String> lines = new ArrayList<>();
    int count;
  }

  /**
   * Main method to aggregate and count the occurrences of a regex pattern in a text file.
   *
   * @param args input text file, aggregate regex, and optional ignore regex.
   * @throws Exception if the input file is not found or cannot be read.
   */
  public static void main(String[] args) throws Exception {

    if (args.length < 2) {
      System.out.println("""
          Usage: java -jar <jar file> <input text file> <aggregate regex> (<ignore regex>)
          Example: java -jar <jar file> "input.txt" ".*? (\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}) .*"
           to aggregate IP addresses from the input file.""");
      throw new IllegalArgumentException("Invalid number of arguments");
    }
    Pattern aggregatePattern = Pattern.compile(args[1]);
    Pattern ignorePattern = Pattern.compile(args.length > 2 ? args[2] : "(?!x)x");

    LinkedHashMap<String, LineCounts> map = new LinkedHashMap<>();
    Files.lines(Paths.get(args[0]), Charset.defaultCharset()).forEach(line -> {
      Matcher aggregateMatcher = aggregatePattern.matcher(line);
      if (aggregateMatcher.find() && !ignorePattern.matcher(line).find()) {
        String key = aggregateMatcher.group(1);
        LineCounts entry = map.computeIfAbsent(key, k -> new LineCounts());
        entry.lines.add(line);
        entry.count++;
      }
    });

    map.entrySet().stream()
      // Sort by count in descending order, if counts are equal, sort by input order (earlier first).
      .sorted((o1, o2) -> o2.getValue().count - o1.getValue().count)
      .forEach(entry -> {
        System.out.printf("%04d - %s - %s%n", entry.getValue().count, entry.getKey(), entry.getValue().lines.get(0));
      });
  }
}
```
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
  static class LineCounts {
    final ArrayList<String> lines = new ArrayList<>();
    int count;
  }

  /**
   * Main method to aggregate and count the occurrences of a regex pattern in a text file.
   *
   * @param args input text file, aggregate regex, and optional ignore regex.
   * @throws Exception if the input file is not found or cannot be read.
   */
  public static void main(String[] args) throws Exception {

    if (args.length < 2) {
      System.out.println("""
          Usage: java -jar <jar file> <input text file> <aggregate regex> (<ignore regex>)
          Example: java -jar <jar file> "input.txt" ".*? (\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}) .*"
           to aggregate IP addresses from the input file.""");
      throw new IllegalArgumentException("Invalid number of arguments");
    }
    Pattern aggregatePattern = Pattern.compile(args[1]);
    Pattern ignorePattern = Pattern.compile(args.length > 2 ? args[2] : "(?!x)x");

    LinkedHashMap<String, LineCounts> map = new LinkedHashMap<>();
    Files.lines(Paths.get(args[0]), Charset.defaultCharset()).forEach(line -> {
      Matcher aggregateMatcher = aggregatePattern.matcher(line);
      if (aggregateMatcher.find() && !ignorePattern.matcher(line).find()) {
        String key = aggregateMatcher.group(1);
        LineCounts entry = map.computeIfAbsent(key, k -> new LineCounts());
        entry.lines.add(line);
        entry.count++;
      }
    });

    map.entrySet().stream()
      // Sort by count in descending order, if counts are equal, sort by input order (earlier first).
      .sorted((o1, o2) -> o2.getValue().count - o1.getValue().count)
      .forEach(entry -> {
        System.out.printf("%04d - %s - %s%n", entry.getValue().count, entry.getKey(), entry.getValue().lines.get(0));
      });
  }
}
```
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
  static class LineCounts {
    final ArrayList<String> lines = new ArrayList<>();
    int count;
  }

  /**
   * Main method to aggregate and count the occurrences of a regex pattern in a text file.
   *
   * @param args input text file, aggregate regex, and optional ignore regex.
   * @throws Exception if the input file is not found or cannot be read.
   */
  public static void main(String[] args) throws Exception {

    if (args.length < 2) {
      System.out.println("""
          Usage: java -jar <jar file> <input text file> <aggregate regex> (<ignore regex>)
          Example: java -jar <jar file> "input.txt" ".*? (\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}) .*"
           to aggregate IP addresses from the input file.""");
      throw new IllegalArgumentException("Invalid number of arguments");
    }
    Pattern aggregatePattern = Pattern.compile(args[1]);
    Pattern ignorePattern = Pattern.compile(args.length > 2 ? args[2] : "(?!x)x");

    LinkedHashMap<String, LineCounts> map = new LinkedHashMap<>();
    Files.lines(Paths.get(args[0]), Charset.defaultCharset()).forEach(line -> {
      Matcher aggregateMatcher = aggregatePattern.matcher(line);
      if (aggregateMatcher.find() && !ignorePattern.matcher(line).find()) {
        String key = aggregateMatcher.group(1);
        LineCounts entry = map.computeIfAbsent(key, k -> new LineCounts());
        entry.lines.add(line);
        entry.count++;
      }
    });

    map.entrySet().stream()
      // Sort by count in descending order, if counts are equal, sort by input order (earlier first).
      .sorted((o1, o2) -> o2.getValue().count - o1.getValue().count)
      .forEach(entry -> {
        System.out.printf("%04d - %s - %s%n", entry.getValue().count, entry.getKey(), entry.getValue().lines.get(0));
      });
  }
}
Source Link
swpalmer
  • 281
  • 2
  • 6

As @TorbenPutkonen says, use a proper type instead of abusing Map.Entry

I would further make use of the Stream APIs. Don't do a get on your map after calling computeIfAbsent .. you already have the entry.

This is slightly cleaner...

import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
  static class LineCounts {
    final ArrayList<String> lines = new ArrayList<>();
    int count;
  }

  /**
   * Main method to aggregate and count the occurrences of a regex pattern in a text file.
   *
   * @param args input text file, aggregate regex, and optional ignore regex.
   * @throws Exception if the input file is not found or cannot be read.
   */
  public static void main(String[] args) throws Exception {

    if (args.length < 2) {
      System.out.println("""
          Usage: java -jar <jar file> <input text file> <aggregate regex> (<ignore regex>)
          Example: java -jar <jar file> "input.txt" ".*? (\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}) .*"
           to aggregate IP addresses from the input file.""");
      throw new IllegalArgumentException("Invalid number of arguments");
    }
    Pattern aggregatePattern = Pattern.compile(args[1]);
    Pattern ignorePattern = Pattern.compile(args.length > 2 ? args[2] : "(?!x)x");

    LinkedHashMap<String, LineCounts> map = new LinkedHashMap<>();
    Files.lines(Paths.get(args[0]), Charset.defaultCharset()).forEach(line -> {
      Matcher aggregateMatcher = aggregatePattern.matcher(line);
      if (aggregateMatcher.find() && !ignorePattern.matcher(line).find()) {
        String key = aggregateMatcher.group(1);
        LineCounts entry = map.computeIfAbsent(key, k -> new LineCounts());
        entry.lines.add(line);
        entry.count++;
      }
    });

    map.entrySet().stream()
      // Sort by count in descending order, if counts are equal, sort by input order (earlier first).
      .sorted((o1, o2) -> o2.getValue().count - o1.getValue().count)
      .forEach(entry -> {
        System.out.printf("%04d - %s - %s%n", entry.getValue().count, entry.getKey(), entry.getValue().lines.get(0));
      });
  }
}
```