Formatted question description: https://leetcode.ca/all/1520.html

1520. Maximum Number of Non-Overlapping Substrings (Medium)

Given a string s of lowercase letters, you need to find the maximum number of non-empty substrings of s that meet the following conditions:

  1. The substrings do not overlap, that is for any two substrings s[i..j] and s[k..l], either j < k or i > l is true.
  2. A substring that contains a certain character c must also contain all occurrences of c.

Find the maximum number of substrings that meet the above conditions. If there are multiple solutions with the same number of substrings, return the one with minimum total length. It can be shown that there exists a unique solution of minimum total length.

Notice that you can return the substrings in any order.

 

Example 1:

Input: s = "adefaddaccc"
Output: ["e","f","ccc"]
Explanation: The following are all the possible substrings that meet the conditions:
[
  "adefaddaccc"
  "adefadda",
  "ef",
  "e",
  "f",
  "ccc",
]
If we choose the first string, we cannot choose anything else and we'd get only 1. If we choose "adefadda", we are left with "ccc" which is the only one that doesn't overlap, thus obtaining 2 substrings. Notice also, that it's not optimal to choose "ef" since it can be split into two. Therefore, the optimal way is to choose ["e","f","ccc"] which gives us 3 substrings. No other solution of the same number of substrings exist.

Example 2:

Input: s = "abbaccd"
Output: ["d","bb","cc"]
Explanation: Notice that while the set of substrings ["d","abba","cc"] also has length 3, it's considered incorrect since it has larger total length.

 

Constraints:

  • 1 <= s.length <= 10^5
  • s contains only lowercase English letters.

Related Topics:
Greedy

Solution 1. DP

For s[0..i], assume S(i+1) is the optimal substring set satisfying those conditions.

Let dp[i+1] be the length of S(i+1), len[i+1] be the sum of lengths of substrings in S(i+1), pick[i+1] be the character we pick to get the optimal solution S(i+1).

// OJ: https://leetcode.com/problems/maximum-number-of-non-overlapping-substrings/

// Time: O(N)
// Space: O(N)
class Solution {
public:
    vector<string> maxNumOfSubstrings(string s) {
        int N = s.size();
        vector<int> left(26, -1), right(26, -1); // the range of each character
        for (int i = 0; i < N; ++i) {
            int c = s[i] - 'a';
            if (left[c] == -1) left[c] = i;
            right[c] = i;
        }
        for (int i = 0; i < 26; ++i) { // An inefficient way of generating the ranges satisfying condition 2.
            if (left[i] == -1) continue;
            for (int j = left[i] + 1; j < right[i]; ++j) {
                int L = left[s[j] - 'a'], R = right[s[j] - 'a'];
                if (L < left[i]) j = L; // rewind
                left[i] = min(left[i], L);
                right[i] = max(right[i], R);
            }
        }
        vector<int> dp(N + 1), pick(N + 1, -1), len(N + 1, N + 1);
        for (int i = 0; i < N; ++i) {
            len[i + 1] = len[i];
            pick[i + 1] = pick[i];
            dp[i + 1] = dp[i];
            int j = 0;
            for (; j < 26; ++j) {
                if (right[j] == i) break;
            }
            if (j == 26 || dp[left[j]] + 1 < dp[i + 1]) continue;
            if (dp[left[j]] + 1 > dp[i + 1] || len[left[j]] + right[j] - left[j] + 1 < len[i]) {
                len[i + 1] = len[left[j]] + right[j] - left[j] + 1; // find a better choice, update the choice.
                pick[i + 1] = j;
            }
            dp[i + 1] = max(dp[i + 1], dp[left[j]] + 1);
        }
        vector<string> ans;
        for (int p = pick[N]; p != -1;) { // reconstruct the substrings.
            int L = left[p], R = right[p];
            ans.push_back(s.substr(L, R - L + 1));
            p = pick[L];
        }
        return ans;
    }
};

Solution 2. Greedy

// OJ: https://leetcode.com/problems/maximum-number-of-non-overlapping-substrings/

// Time: O(N)
// Space: O(1)
// Ref: https://leetcode.com/problems/maximum-number-of-non-overlapping-substrings/discuss/743223/C%2B%2BJava-Greedy-O(n)
class Solution {
    int getRightEdge(string &s, int i, vector<int> &left, vector<int> &right) { // get the corresponding right edge of `s[i]`
        int R = right[s[i] - 'a'];
        for (int j = i; j <= R; ++j) {
            if (left[s[j] - 'a'] < i) return -1; // If this range extends leftwards, it must be handled before and should be skipped.
            R = max(R, right[s[j] - 'a']);
        }
        return R;
    }
public:
    vector<string> maxNumOfSubstrings(string s) {
        int N = s.size();
        vector<int> left(26, -1), right(26, -1); // The range of the character
        for (int i = 0; i < s.size(); ++i) {
            int c = s[i] - 'a';
            if (left[c] == -1) left[c] = i;
            right[c] = i;
        }
        int R = -1;
        vector<string> ans;
        for (int i = 0; i < N; ++i) {
            int nextR = getRightEdge(s, i, left, right);
            if (nextR == -1) continue;
            if (i > R) ans.push_back(""); // If this substring doesn't overlap with the previous substring, push new item instead of updating the previous one.
            R = nextR;
            ans.back() = s.substr(i, R - i + 1);
        }
        return ans;
    }
};

Solution 3. Interval Scheduling Maximization Problem (ISMP)

First get the covering ranges of all the characters in s. There are at most 26 ranges.

Then the problem becomes a classic problem – finding the maximum set of non-overlapping intervals, aka, Interval Scheduling Maximization Problem.

// OJ: https://leetcode.com/problems/maximum-number-of-non-overlapping-substrings/

// Time: O(N)
// Space: O(1)
// Ref: https://leetcode.com/problems/maximum-number-of-non-overlapping-substrings/discuss/744420/C%2B%2BJavaPython-Interval-Scheduling-Maximization-(ISMP)
class Solution {
public:
    vector<string> maxNumOfSubstrings(string s) {
        int N = s.size();
        vector<int> left(26, -1), right(26, -1);
        for (int i = 0; i < N; ++i) {
            int c = s[i] - 'a';
            if (left[c] == -1) left[c] = i;
            right[c] = i;
        }
        vector<pair<int, int>> ranges; // last, first
        for (int i = 0; i < 26; ++i) {
            if (left[i] == -1) continue;
            int first = left[i], last = right[i];
            for (int j = first; j <= last && first == left[i]; ++j) { // If `first != left[i]`, this character is not the representative of its covering range. The representative character is the first character of that range.
                first = min(first, left[s[j] - 'a']);
                last = max(last, right[s[j] - 'a']);
            }
            if (first == left[i]) ranges.emplace_back(last, first); // Only push representative ranges.
        }
        sort(begin(ranges), end(ranges)); // Greedily pick the interval ends first.
        int end = -1;
        vector<string> ans;
        for (auto &[last, first] : ranges) {
            if (first <= end) continue; // If this range overlaps with the previously picked range, skip.
            ans.push_back(s.substr(first, last - first + 1));
            end = last;
        }
        return ans;
    }
};

Note

Lesson learned: During the context I came up with similar idea but I was stuck at generating the smallest covering range of each character. When scaning within left[i], right[i] range for 'a' + i, if we extend the left edge due to another character range, we need to restart the scanning from the newly extended left edge. This is not efficient but it does work.

In the above solutions, instead of generating 26 intervals, they just generate representative intervals using the starting character to ensure the uniqueness of the intervals.

Java

class Solution {
    public List<String> maxNumOfSubstrings(String s) {
        Map<Character, int[]> startEndMap = new HashMap<Character, int[]>();
        int length = s.length();
        for (int i = 0; i < length; i++) {
            char c = s.charAt(i);
            int[] startEnd = startEndMap.getOrDefault(c, new int[]{-1, -1});
            if (startEnd[0] < 0)
                startEnd[0] = i;
            startEnd[1] = i;
            startEndMap.put(c, startEnd);
        }
        int[] endIndices = new int[length];
        Arrays.fill(endIndices, -1);
        for (int i = 0; i < length; i++) {
            char c = s.charAt(i);
            int[] startEnd = startEndMap.get(c);
            if (startEnd[0] != i)
                continue;
            boolean flag = true;
            int curEnd = startEnd[1];
            for (int j = i + 1; j < length; j++) {
                if (j > curEnd)
                    break;
                char nextC = s.charAt(j);
                int[] nextStartEnd = startEndMap.get(nextC);
                if (nextStartEnd[0] < i) {
                    flag = false;
                    break;
                }
                curEnd = Math.max(curEnd, nextStartEnd[1]);
            }
            if (flag)
                endIndices[i] = curEnd;
        }
        List<String> list = new ArrayList<String>();
        int curStart = -1, curEnd = -1;
        for (int i = 0; i < length; i++) {
            if (i == curEnd) {
                list.add(s.substring(curStart, curEnd + 1));
                continue;
            }
            int end = endIndices[i];
            if (end < 0)
                continue;
            if (curEnd < i) {
                curStart = i;
                curEnd = end;
            } else if (i > curStart && end < curEnd) {
                curStart = i;
                curEnd = end;
            }
            if (i == curEnd)
                list.add(s.substring(curStart, curEnd + 1));
        }
        return list;
    }
}

All Problems

All Solutions