Formatted question description: https://leetcode.ca/all/2157.html

# 2157. Groups of Strings (Hard)

You are given a **0-indexed** array of strings `words`

. Each string consists of **lowercase English letters** only. No letter occurs more than once in any string of `words`

.

Two strings `s1`

and `s2`

are said to be **connected** if the set of letters of `s2`

can be obtained from the set of letters of `s1`

by any **one** of the following operations:

- Adding exactly one letter to the set of the letters of
`s1`

. - Deleting exactly one letter from the set of the letters of
`s1`

. - Replacing exactly one letter from the set of the letters of
`s1`

with any letter,**including**itself.

The array `words`

can be divided into one or more non-intersecting **groups**. A string belongs to a group if any **one** of the following is true:

- It is connected to
**at least one**other string of the group. - It is the
**only**string present in the group.

Note that the strings in `words`

should be grouped in such a manner that a string belonging to a group cannot be connected to a string present in any other group. It can be proved that such an arrangement is always unique.

Return *an array* `ans`

*of size* `2`

*where:*

`ans[0]`

*is the***total number**of groups`words`

*can be divided into, and*`ans[1]`

*is the*.**size of the largest**group

**Example 1:**

Input:words = ["a","b","ab","cde"]Output:[2,3]Explanation:- words[0] can be used to obtain words[1] (by replacing 'a' with 'b'), and words[2] (by adding 'b'). So words[0] is connected to words[1] and words[2]. - words[1] can be used to obtain words[0] (by replacing 'b' with 'a'), and words[2] (by adding 'a'). So words[1] is connected to words[0] and words[2]. - words[2] can be used to obtain words[0] (by deleting 'b'), and words[1] (by deleting 'a'). So words[2] is connected to words[0] and words[1]. - words[3] is not connected to any string in words. Thus, words can be divided into 2 groups ["a","b","ab"] and ["cde"]. The size of the largest group is 3.

**Example 2:**

Input:words = ["a","ab","abc"]Output:[1,3]Explanation:- words[0] is connected to words[1]. - words[1] is connected to words[0] and words[2]. - words[2] is connected to words[1]. Since all strings are connected to each other, they should be grouped together. Thus, the size of the largest group is 3.

**Constraints:**

`1 <= words.length <= 2 * 10`

^{4}`1 <= words[i].length <= 26`

`words[i]`

consists of lowercase English letters only.- No letter occurs more than once in
`words[i]`

.

**Similar Questions**:

## Solution 1. Union Find + Bitmask

Let `m`

be a map from a bitmask to the corresponding index in `A`

.

For each `A[i]`

:

- generate the correponding hash
`h`

- generate the addition, deletion and replacement variant of hash
`h`

. - Assume the variant’s hash is
`t`

, we connect`i`

with`m[t]`

using Union Find.

In the end, Union Find can tell us the number of groups and the size of all groups.

Note that this sometimes gets TLE because of the tight time constraint and the randomness of `unordered_map`

.

```
// OJ: https://leetcode.com/contest/weekly-contest-278/problems/groups-of-strings/
// Time: O(26 * 26 * NlogN)
// Space: O(N)
class UnionFind {
vector<int> id, size;
int cnt;
public:
UnionFind(int n) : id(n), size(n, 1), cnt(n) {
iota(begin(id), end(id), 0);
}
int find(int a) {
return id[a] == a ? a : (id[a] = find(id[a]));
}
void connect(int a, int b) {
int x = find(a), y = find(b);
if (x == y) return;
id[x] = y;
size[y] += size[x];
--cnt;
}
int getSize(int a) {
return size[find(a)];
}
int getCount() { return cnt; }
};
class Solution {
public:
vector<int> groupStrings(vector<string>& A) {
int N = A.size();
UnionFind uf(N);
unordered_map<int, int> m; // map from hash to index
m.reserve(N);
for (int i = 0; i < N; ++i) {
int h = 0;
for (char c : A[i]) h |= 1 << (c - 'a'); // `h` is the bitmask representation of `A[i]`
for (int j = 0; j < 26; ++j) {
if (h >> j & 1) { // if `h`'s j-th bit is 1
int del = h ^ (1 << j); // `del` is the bitmask after deleting the `j`-th bit
if (m.count(del)) uf.connect(i, m[del]); // Connect `A[i]` with its deletion variant
for (int k = 0; k < 26; ++k) { // we replace `j`-th bit with `k`-th bit
int rep = del | (1 << k); // `rep` is the bitmask after replacing `j`-th bit with `k`-th bit.
if (rep != del && m.count(rep)) uf.connect(i, m[rep]);
}
} else {
int add = h | (1 << j); // `add` is the bitmask after adding `j`-th bit
if (m.count(add)) uf.connect(i, m[add]);
}
}
m[h] = i;
}
int mx = 1;
for (int i = 0; i < N; ++ i) mx = max(mx, uf.getSize(i));
return {uf.getCount(), mx};
}
};
```

## Solution 2. Optimization

- Added “union by rank” to the Union Find to reduce the time complexity of
`find`

from`O(logN)`

to`O(alpha(N))`

where`alpha(N)`

is the inverse function of Ackermann function. - For the replacement operation, reduced the time from
`O(26 * 26)`

to`O(26)`

by “meet-in-the-middle”. If two strings are connected after replacement operation, then they can be the same string after deleting one character. Example`"abc"`

and`"abd"`

are connected because they both become`"ab"`

after one deletion.

```
// OJ: https://leetcode.com/problems/groups-of-strings/
// Time: O(26 * N * alpha(N))
// Space: O(26 * N)
class UnionFind {
vector<int> id, rank, size;
int cnt;
public:
UnionFind(int n) : id(n), rank(n, 0), size(n, 1), cnt(n) {
iota(begin(id), end(id), 0);
}
int find(int a) {
return id[a] == a ? a : (id[a] = find(id[a]));
}
void connect(int a, int b) {
int x = find(a), y = find(b);
if (x == y) return;
if (rank[x] > rank[y]) {
id[y] = x;
size[x] += size[y];
} else {
id[x] = y;
size[y] += size[x];
if (rank[x] == rank[y]) rank[y]++;
}
--cnt;
}
int getSize(int a) {
return size[find(a)];
}
int getCount() { return cnt; }
};
class Solution {
public:
vector<int> groupStrings(vector<string>& A) {
int N = A.size();
UnionFind uf(N);
unordered_map<int, int> m, delMap;
m.reserve(N);
for (int i = 0; i < N; ++i) {
int h = 0;
for (char c : A[i]) h |= 1 << (c - 'a'); // `h` is the bitmask representation of `A[i]`
for (int j = 0; j < 26; ++j) {
if (h >> j & 1) { // if `h`'s j-th bit is 1
int del = h ^ (1 << j); // `del` is the bitmask after deleting the `j`-th bit
if (m.count(del)) uf.connect(i, m[del]); // Connect `A[i]` with its deletion variant
if (delMap.count(del)) uf.connect(i, delMap[del]);
else delMap[del] = i;
} else {
int add = h | (1 << j); // `add` is the bitmask after adding `j`-th bit
if (m.count(add)) uf.connect(i, m[add]);
}
}
m[h] = i;
}
int mx = 1;
for (int i = 0; i < N; ++ i) mx = max(mx, uf.getSize(i));
return {uf.getCount(), mx};
}
};
```

## Discuss

https://leetcode.com/problems/all-divisions-with-the-highest-score-of-a-binary-array/discuss/1730117