Question

Formatted question description: https://leetcode.ca/all/30.html

30. Substring with Concatenation of All Words

You are given a string, s, and a list of words, words, that are all of the same length.

Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once
and without any intervening characters.

For example, given:
s: "barfoothefoobarman"
words: ["foo", "bar"]

You should return the indices: [0,9].
(order does not matter).

@tag-string

Algorithm

It is necessary to frequently judge whether the substring of length len in the s string is a word in words. For quick judgment, you can use HashMap. At the same time, because the words array may have repeated words, you must use HashMap to create all words and their appearance times The mapping between, that is, count the number of times each word appears.

Traverse all the substrings of length nxlen in s. When the length of the remaining substrings is less than nxlen, there is no need to judge. So i starts from 0 and ends at (int)s.size()-nxlen. Note that you must first convert s.size() to an integer before performing the solution. Be sure to form such a habit. Once size() is to be subtracted from a number, first switch to int type, because the return value of size() is unsigned.

Once you subtract a number larger than yourself, an error will occur. For each traversed substring of length nxlen, it is necessary to verify whether it is exactly composed of all words in words. The checking method is to take the substring of length len each time to see if it is a word in words. To facilitate comparison, create another HashMap.

When the extracted word is not in words, break it directly, otherwise, add 1 to its mapping value in the new HashMap, and check if its mapping value exceeds the mapping value in the original HashMap, Also break off, because even if the current word is in words, if it appears more than the number of times in words, it is still unsatisfactory. Outside the for loop, if j is exactly equal to n, it means that the detected n substrings of length len are all words in words, and just form words, then the current position i is added to the result.

Code

Java

  • import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    
    public class Substring_with_Concatenation_of_All_Words {
    
    	public static void main(String[] args) {
    		HashMap<String, Boolean> hm = new HashMap<String, Boolean>();
    		hm.put("a", true);
    
    		// @note: ugly type convert...
    		HashMap<String, Boolean> copy = (HashMap<String, Boolean>) hm.clone();
    		copy.put("a", false);
    
    		System.out.println(hm.entrySet());
    		System.out.println(copy.entrySet());
    	}
    
    	public class Solution {
    
    	    List<Integer> list = new ArrayList<Integer>();
    
            // pre-process a hashmap storing all short words, for quick look up
    	    HashMap<String, Integer> hm = new HashMap<String, Integer>(); // L words to count
    
    	    public List<Integer> findSubstring(String s, String[] words) {
    
    	        if (s.length() == 0 || s == null || words.length == 0 || words == null || s.length() < words.length * words[0].length())
    	            return list;
    
    	        int eachlen = words[0].length();
    	        int totalLength = words.length * words[0].length();
    
    	        // pre-process hashmap
    	        for (String e: words) {
    	            if (hm.containsKey(e)) {
    	                hm.put(e, hm.get(e) + 1);
    	            } else {
    	                hm.put(e, 1);
    	            }
    	        }
    
    	        // go through s to find match
    	        for (int i = 0; i <= s.length() - totalLength; i++) {
    
    	            String sub = s.substring(i, i + totalLength);
    	            if (isValid(sub, eachlen))
    	                list.add(i);
    
    	        }
    
    	        return list;
    	    }
    
    
    	    public boolean isValid(String s, int eachlen) {
    	        HashMap<String, Integer> hmcopy = new HashMap<String, Integer>(hm);
    
    	        // increment i by eachlen
    	        for (int i = 0; i <= s.length() - eachlen; i = i + eachlen) {
    
    	            String current = s.substring(i, i + eachlen);
    
    	            // if its count==0 then false, eg:foofoo
    	            if (hmcopy.containsKey(current) && hmcopy.get(current) > 0) {
    	                // if hm has it, then decrease its count
    	                hmcopy.put(current, hmcopy.get(current) - 1);
    	            } else {
    	                return false;
    	            }
    	        }
    
    	        return true;
    	    }
    	}
    
    }
    
  • // OJ: https://leetcode.com/problems/substring-with-concatenation-of-all-words/
    // Time: O(MNL)
    // Space: O(ML)
    class Solution {
    private:
    public:
        vector<int> findSubstring(string s, vector<string>& A) {
            if (A.empty()) return {};
            int L = A.size(), M = A[0].size(), last = s.size() - M * L;
            unordered_map<string, int> cnt;
            for (auto &w : A) ++cnt[w];
            vector<int> ans;
            function<bool(int, int)> dfs = [&](int i, int seen) {
                if (seen == L) return true;
                int &c = cnt[s.substr(i, M)];
                if (!c) return false;
                c--;
                bool ans = dfs(i + M, seen + 1);
                c++;
                return ans;
            };
            for (int i = 0; i <= last; ++i) {
                if (dfs(i, 0)) ans.push_back(i);
            }
            return ans;
        }
    };
    
  • from collections import deque
    
    
    class Solution(object):
      def findSubstring(self, s, words):
        """
        :type s: str
        :type words: List[str]
        :rtype: List[int]
        """
        if len(words) > len(s):
          return []
        d = {}
        t = {}
        ans = []
        deq = deque([])
        wl = len(words[0])
        fullscore = 0
        for word in words:
          d[word] = d.get(word, 0) + 1
          fullscore += 1
    
        for i in range(0, len(s)):
          head = start = i
          t.clear()
          score = 0
    
          while start + wl <= len(s) and s[start:start + wl] in d:
            cword = s[start:start + wl]
            t[cword] = t.get(cword, 0) + 1
            if t[cword] <= d[cword]:
              score += 1
            else:
              break
            start += wl
    
          if score == fullscore:
            ans.append(head)
    
        return ans
    
    

All Problems

All Solutions