Skip to main content
fix up ordering
Source Link
Reinderien
  • 71.2k
  • 5
  • 76
  • 257

Later I will demonstratefind the double-reduce operation difficult to understand, and I also find difficult to understand the magic around your sorted only needing to care about the count and relying on an alternative wayimplicit ordering for the original characters. When you say

The order of characters that occur equally often should be preserved.

this is troubled, and I think you actually mean "after sorting in decreasing order of frequency, characters should be output in order of their first appearance in the original string".

I find it easier to aggregateunderstand when the actual groupingBy(classifier, downstream) is used, and sortI find it easier to understand when the comparator explicitly references the index.

Suggested

package com.stackexchange;

import java.util.Comparator;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

public class Main {
    public static class CountChars {
        private record IndexChar(int index, Integer character, long count) {
            IndexChar reduce(IndexChar other) {
                return new IndexChar(
                    Integer.min(index, other.index),
                    character,
                    count + other.count
                );
            }
        }
        
        public static Stream<IndexChar> streamMostOccurring(String s) {
            return IntStream
                .range(0, s.length())
                .mapToObj(i -> new IndexChar(i, (int)s.charAt(i), 1))
                .collect(
                    Collectors.groupingBy(
                        IndexChar::character,
                        Collectors.reducing(IndexChar::reduce)
                    )
                )
                .values()
                .stream()
                .filter(Optional::isPresent)
                .map(Optional::get)
                .sorted(
                    Comparator.comparing(IndexChar::count)
                        .reversed()
                        .thenComparing(IndexChar::index)
                );
        }

        public static String formatMostOccurring(String s, int limit) {
            return streamMostOccurring(s)
                .limit(limit)
                .map(
                    ix -> "%c %d%n".formatted(ix.character(), ix.count())
                )
                .collect(Collectors.joining());
        }

        public static void printMostOccurring(String s, int limit) {
            System.out.println(formatMostOccurring(s, limit));
        }

        public static void printMostOccurring(String s) {
            printMostOccurring(s, 3);
        }
    }

    public static void main(final String[] args) {
        // Examples:
        CountChars.printMostOccurring("the quick brown fox jumped over the lazy dog");
        CountChars.printMostOccurring("abracadabra");
        CountChars.printMostOccurring("arbacadabra");
        /*
          8
        e 4
        o 4

        a 5
        b 2
        r 2

        a 5
        r 2
        b 2 */
    }
}

Or, with pre-computed stream helper objects,

package com.stackexchange;

import java.util.Comparator;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

public class Main {
    public static class CountChars {
        private record IndexChar(int index, Integer character, long count) {
            IndexChar reduce(IndexChar other) {
                return new IndexChar(
                    Integer.min(index, other.index),
                    character,
                    count + other.count
                );
            }
        }

        private static final Collector<
            IndexChar, ?, Map<Integer, Optional<IndexChar>>
        > grouper = Collectors.groupingBy(
            IndexChar::character,
            Collectors.reducing(IndexChar::reduce)
        );

        private static final Comparator<IndexChar> comparator =
            Comparator.comparing(IndexChar::count)
            .reversed()
            .thenComparing(IndexChar::index);

        private static final Collector<CharSequence, ?, String>
            joiner = Collectors.joining();

        public static Stream<IndexChar> streamMostOccurring(String s) {
            return IntStream
                .range(0, s.length())
                .mapToObj(i -> new IndexChar(i, (int)s.charAt(i), 1))
                .collect(grouper)
                .values()
                .stream()
                .filter(Optional::isPresent)
                .map(Optional::get)
                .sorted(comparator);
        }

        public static String formatMostOccurring(String s, int limit) {
            return streamMostOccurring(s)
                .limit(limit)
                .map(
                    ix -> "%c %d%n".formatted(ix.character(), ix.count())
                )
                .collect(joiner);
        }

        public static void printMostOccurring(String s, int limit) {
            System.out.println(formatMostOccurring(s, limit));
        }

        public static void printMostOccurring(String s) {
            printMostOccurring(s, 3);
        }
    }

    public static void main(final String[] args) {
        // Examples:
        CountChars.printMostOccurring("the quick brown fox jumped over the lazy dog");
        CountChars.printMostOccurring("abracadabra");
        CountChars.printMostOccurring("arbacadabra");
        /*
          8
        e 4
        o 4

        a 5
        b 2
        r 2

        a 5
        r 2
        b 2 */
    }
}

Later I will demonstrate an alternative way to aggregate and sort.

I find the double-reduce operation difficult to understand, and I also find difficult to understand the magic around your sorted only needing to care about the count and relying on an implicit ordering for the original characters. When you say

The order of characters that occur equally often should be preserved.

this is troubled, and I think you actually mean "after sorting in decreasing order of frequency, characters should be output in order of their first appearance in the original string".

I find it easier to understand when the actual groupingBy(classifier, downstream) is used, and I find it easier to understand when the comparator explicitly references the index.

Suggested

package com.stackexchange;

import java.util.Comparator;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

public class Main {
    public static class CountChars {
        private record IndexChar(int index, Integer character, long count) {
            IndexChar reduce(IndexChar other) {
                return new IndexChar(
                    Integer.min(index, other.index),
                    character,
                    count + other.count
                );
            }
        }
        
        public static Stream<IndexChar> streamMostOccurring(String s) {
            return IntStream
                .range(0, s.length())
                .mapToObj(i -> new IndexChar(i, (int)s.charAt(i), 1))
                .collect(
                    Collectors.groupingBy(
                        IndexChar::character,
                        Collectors.reducing(IndexChar::reduce)
                    )
                )
                .values()
                .stream()
                .filter(Optional::isPresent)
                .map(Optional::get)
                .sorted(
                    Comparator.comparing(IndexChar::count)
                        .reversed()
                        .thenComparing(IndexChar::index)
                );
        }

        public static String formatMostOccurring(String s, int limit) {
            return streamMostOccurring(s)
                .limit(limit)
                .map(
                    ix -> "%c %d%n".formatted(ix.character(), ix.count())
                )
                .collect(Collectors.joining());
        }

        public static void printMostOccurring(String s, int limit) {
            System.out.println(formatMostOccurring(s, limit));
        }

        public static void printMostOccurring(String s) {
            printMostOccurring(s, 3);
        }
    }

    public static void main(final String[] args) {
        // Examples:
        CountChars.printMostOccurring("the quick brown fox jumped over the lazy dog");
        CountChars.printMostOccurring("abracadabra");
        CountChars.printMostOccurring("arbacadabra");
        /*
          8
        e 4
        o 4

        a 5
        b 2
        r 2

        a 5
        r 2
        b 2 */
    }
}

Or, with pre-computed stream helper objects,

package com.stackexchange;

import java.util.Comparator;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

public class Main {
    public static class CountChars {
        private record IndexChar(int index, Integer character, long count) {
            IndexChar reduce(IndexChar other) {
                return new IndexChar(
                    Integer.min(index, other.index),
                    character,
                    count + other.count
                );
            }
        }

        private static final Collector<
            IndexChar, ?, Map<Integer, Optional<IndexChar>>
        > grouper = Collectors.groupingBy(
            IndexChar::character,
            Collectors.reducing(IndexChar::reduce)
        );

        private static final Comparator<IndexChar> comparator =
            Comparator.comparing(IndexChar::count)
            .reversed()
            .thenComparing(IndexChar::index);

        private static final Collector<CharSequence, ?, String>
            joiner = Collectors.joining();

        public static Stream<IndexChar> streamMostOccurring(String s) {
            return IntStream
                .range(0, s.length())
                .mapToObj(i -> new IndexChar(i, (int)s.charAt(i), 1))
                .collect(grouper)
                .values()
                .stream()
                .filter(Optional::isPresent)
                .map(Optional::get)
                .sorted(comparator);
        }

        public static String formatMostOccurring(String s, int limit) {
            return streamMostOccurring(s)
                .limit(limit)
                .map(
                    ix -> "%c %d%n".formatted(ix.character(), ix.count())
                )
                .collect(joiner);
        }

        public static void printMostOccurring(String s, int limit) {
            System.out.println(formatMostOccurring(s, limit));
        }

        public static void printMostOccurring(String s) {
            printMostOccurring(s, 3);
        }
    }

    public static void main(final String[] args) {
        // Examples:
        CountChars.printMostOccurring("the quick brown fox jumped over the lazy dog");
        CountChars.printMostOccurring("abracadabra");
        CountChars.printMostOccurring("arbacadabra");
        /*
          8
        e 4
        o 4

        a 5
        b 2
        r 2

        a 5
        r 2
        b 2 */
    }
}
deleted 3075 characters in body
Source Link
Reinderien
  • 71.2k
  • 5
  • 76
  • 257

This seems over-complex for what it's supposed to do. It can be simplified by the use of Collectors.counting and Entry.comparingByValue.

You should split this up into multiple functions for reuse and testability; especially, do not force a print - near the top level should be a format.

Suggested

package com.stackexchange;

import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class Main {
    public static Stream<Map.Entry<Integer, Long>> streamMostOccurring(String s) {
        return s.chars().boxed()
            .collect(
                Collectors.groupingBy(
                    Function.identity(), Collectors.counting()
                )
            )
            .entrySet().stream()
            .sorted(
                Map.Entry.<Integer, Long>comparingByValue().reversed()
            );
    }

    public static String formatMostOccurring(String s, int limit) {
        return streamMostOccurring(s)
            .limit(limit)
            .map(kv -> "%c %d%n".formatted(kv.getKey(), kv.getValue()))
            .collect(Collectors.joining());
    }

    public static void printMostOccurring(String s, int limit) {
        System.out.println(formatMostOccurring(s, limit));
    }

    public static void printMostOccurring(String s) {
        printMostOccurring(s, 3);
    }

    public static void main(final String[] args) {
        // Examples:
        printMostOccurring("the quick brown fox jumped over the lazy dog");
        printMostOccurring("abracadabra");
        printMostOccurring("arbacadabra");
        /*
          8
        e 4
        o 4

        a 5
        b 2
        r 2

        a 5
        r 2
        b 2 */
    }
}

Supporting objects

One thingLater I likewill demonstrate an alternative way to do for classes whose focus is stream operations - you can pre-compute your stream-supporting objects (comparators, collectors, etc.)aggregate and sort. There will be some (small) speedup, but perhaps more importantly, types will be made more explicit:

    public static class CountChars {
        private static final Collector<
            Integer, ?, Map<Integer, Long>
        > grouper = Collectors.groupingBy(
            Function.<Integer>identity(),
            Collectors.<Integer>counting()
        );

        private static final Comparator<
            Map.Entry<Integer, Long>
        > comparator = Map.Entry
            .<Integer, Long>comparingByValue()
            .reversed();

        private static final Collector<
            CharSequence, ?, String
        > joiner = Collectors.joining();

        public static Stream<Map.Entry<Integer, Long>> streamMostOccurring(String s) {
            return s.chars().boxed()
                .collect(grouper)
                .entrySet().stream()
                .sorted(comparator);
        }

        public static String formatMostOccurring(String s, int limit) {
            return streamMostOccurring(s)
                .limit(limit)
                .map(
                    kv -> "%c %d%n".formatted(kv.getKey(), kv.getValue())
                )
                .collect(joiner);
        }

        // ...

This seems over-complex for what it's supposed to do. It can be simplified by the use of Collectors.counting and Entry.comparingByValue.

You should split this up into multiple functions for reuse and testability; especially, do not force a print - near the top level should be a format.

Suggested

package com.stackexchange;

import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class Main {
    public static Stream<Map.Entry<Integer, Long>> streamMostOccurring(String s) {
        return s.chars().boxed()
            .collect(
                Collectors.groupingBy(
                    Function.identity(), Collectors.counting()
                )
            )
            .entrySet().stream()
            .sorted(
                Map.Entry.<Integer, Long>comparingByValue().reversed()
            );
    }

    public static String formatMostOccurring(String s, int limit) {
        return streamMostOccurring(s)
            .limit(limit)
            .map(kv -> "%c %d%n".formatted(kv.getKey(), kv.getValue()))
            .collect(Collectors.joining());
    }

    public static void printMostOccurring(String s, int limit) {
        System.out.println(formatMostOccurring(s, limit));
    }

    public static void printMostOccurring(String s) {
        printMostOccurring(s, 3);
    }

    public static void main(final String[] args) {
        // Examples:
        printMostOccurring("the quick brown fox jumped over the lazy dog");
        printMostOccurring("abracadabra");
        printMostOccurring("arbacadabra");
        /*
          8
        e 4
        o 4

        a 5
        b 2
        r 2

        a 5
        r 2
        b 2 */
    }
}

Supporting objects

One thing I like to do for classes whose focus is stream operations - you can pre-compute your stream-supporting objects (comparators, collectors, etc.). There will be some (small) speedup, but perhaps more importantly, types will be made more explicit:

    public static class CountChars {
        private static final Collector<
            Integer, ?, Map<Integer, Long>
        > grouper = Collectors.groupingBy(
            Function.<Integer>identity(),
            Collectors.<Integer>counting()
        );

        private static final Comparator<
            Map.Entry<Integer, Long>
        > comparator = Map.Entry
            .<Integer, Long>comparingByValue()
            .reversed();

        private static final Collector<
            CharSequence, ?, String
        > joiner = Collectors.joining();

        public static Stream<Map.Entry<Integer, Long>> streamMostOccurring(String s) {
            return s.chars().boxed()
                .collect(grouper)
                .entrySet().stream()
                .sorted(comparator);
        }

        public static String formatMostOccurring(String s, int limit) {
            return streamMostOccurring(s)
                .limit(limit)
                .map(
                    kv -> "%c %d%n".formatted(kv.getKey(), kv.getValue())
                )
                .collect(joiner);
        }

        // ...

You should split this up into multiple functions for reuse and testability; especially, do not force a print - near the top level should be a format.

Later I will demonstrate an alternative way to aggregate and sort.

added 1467 characters in body
Source Link
Reinderien
  • 71.2k
  • 5
  • 76
  • 257

Supporting objects

One thing I like to do for classes whose focus is stream operations - you can pre-compute your stream-supporting objects (comparators, collectors, etc.). There will be some (small) speedup, but perhaps more importantly, types will be made more explicit:

    public static class CountChars {
        private static final Collector<
            Integer, ?, Map<Integer, Long>
        > grouper = Collectors.groupingBy(
            Function.<Integer>identity(),
            Collectors.<Integer>counting()
        );

        private static final Comparator<
            Map.Entry<Integer, Long>
        > comparator = Map.Entry
            .<Integer, Long>comparingByValue()
            .reversed();

        private static final Collector<
            CharSequence, ?, String
        > joiner = Collectors.joining();

        public static Stream<Map.Entry<Integer, Long>> streamMostOccurring(String s) {
            return s.chars().boxed()
                .collect(grouper)
                .entrySet().stream()
                .sorted(comparator);
        }

        public static String formatMostOccurring(String s, int limit) {
            return streamMostOccurring(s)
                .limit(limit)
                .map(
                    kv -> "%c %d%n".formatted(kv.getKey(), kv.getValue())
                )
                .collect(joiner);
        }

        // ...

Supporting objects

One thing I like to do for classes whose focus is stream operations - you can pre-compute your stream-supporting objects (comparators, collectors, etc.). There will be some (small) speedup, but perhaps more importantly, types will be made more explicit:

    public static class CountChars {
        private static final Collector<
            Integer, ?, Map<Integer, Long>
        > grouper = Collectors.groupingBy(
            Function.<Integer>identity(),
            Collectors.<Integer>counting()
        );

        private static final Comparator<
            Map.Entry<Integer, Long>
        > comparator = Map.Entry
            .<Integer, Long>comparingByValue()
            .reversed();

        private static final Collector<
            CharSequence, ?, String
        > joiner = Collectors.joining();

        public static Stream<Map.Entry<Integer, Long>> streamMostOccurring(String s) {
            return s.chars().boxed()
                .collect(grouper)
                .entrySet().stream()
                .sorted(comparator);
        }

        public static String formatMostOccurring(String s, int limit) {
            return streamMostOccurring(s)
                .limit(limit)
                .map(
                    kv -> "%c %d%n".formatted(kv.getKey(), kv.getValue())
                )
                .collect(joiner);
        }

        // ...
edited body
Source Link
Reinderien
  • 71.2k
  • 5
  • 76
  • 257
Loading
Source Link
Reinderien
  • 71.2k
  • 5
  • 76
  • 257
Loading