3.13.8. Questions

3.13.8.1. Longest Consecutive Sequence

Given an unsorted array of integers, find the length of the longest consecutive elements sequence.

For example, given [100, 4, 200, 1, 3, 2], the longest consecutive elements sequence is [1, 2, 3, 4], it will return its length: 4.

The algorithm should run in O(N) complexity.

Algo 1: union find

struct longest_consecutive {
  vector<int> bo, sz;
  void makeset(int len) {
    bo.resize(len), sz.resize(len);
    fill(bo.begin(), bo.end(), -1);
    fill(sz.begin(), sz.end(), 1);
  }
  int findbo(int x) {  // find boss, x is index
    return bo[x] == -1 ? x : (bo[x] = findbo(bo[x]));
  }
  void merge(int x, int y) {  // team merge by index
    x = findbo(x), y = findbo(y);
    if (x == y) return;
    if (sz[x] > sz[y]) {      // make y always big boss
      swap(x, y);
    }                           
    bo[x] = y;                  // assign big as small's boss
    sz[y] += sz[x], sz[x] = 0;  // winner takes all, loser gets nothing
  }
  int run(vector<int>& nums) {
    if (nums.empty()) return 0;
    int L = nums.size(), i = 0;
    makeset(L);
    unordered_map<int, int> um;
    for (int v : nums) {
      if (um.count(v) == 0) {  // for corner case
        um[v] = i;
        if (um.count(v + 1)) merge(i, um[v + 1]);
        if (um.count(v - 1)) merge(i, um[v - 1]);
      }
      i++;
    }
    return *max_element(sz.begin(), sz.end());
  }

Algo 2: hashtable

Since the O(n) algorithm is required, sorting is obviously not possible, so it is natural to use a hash table. Store all the numbers in the sequence in an unordered_set. For any number A[i] in the sequence, we can immediately know A through the set Whether [i] 1 and A[i]-1 are also in the sequence. If so, continue to find A[i] 2 and A[i]-2, and so on, until the entire continuous sequence is found. In order to avoid scanning When A[i]-1 is reached, the sequence is searched again, and the searched number is removed from the set at the same time as each search. Until the set is empty, all consecutive sequence searches end. Complexity: Since each Numbers are inserted into the set only once, and removed once, so the algorithm is O(n).

struct longest_consecutive_v2 {
  int run(vector<int>& num) {
    if (num.empty()) return 0;
    unordered_set<int> ht(num.begin(), num.end());
    int max_len = 1;
    for (int i = 0; i < num.size(); i++) {
      if (ht.empty()) break;
      int cur_len = 0, cur_num = num[i];
      while (ht.count(cur_num)) {  // search in right direction
        ht.erase(cur_num);
        cur_len++, cur_num++;
      }
      cur_num = num[i] - 1;
      while (ht.count(cur_num)) {  // search in left direction
        ht.erase(cur_num);
        cur_len++, cur_num--;
      }
      max_len = max(max_len, cur_len);
    }
    return max_len;
  }
};

3.13.8.2. Number of Connected Components in an Undirected Graph

Given n nodes labeled from 0 to n - 1 and a list of undirected edges (each edge is a pair of nodes), write a function to find the number of connected components in an undirected graph.

Example 1:

0          3
|          |
1 --- 2    4

Given n = 5 and edges = [[0, 1], [1, 2], [3, 4]], return 2.

Example 2:

0           4
|           |
1 --- 2 --- 3
Given n = 5 and edges = [[0, 1], [1, 2], [2, 3], [3, 4]], return 1.

3.13.8.3. Groups of Strings

You are given a 0-indexed array of strings words. Each string consists of lowercase English letters only. No letter occurs more than once in any string of words.

Two strings s1 and s2 are said to be connected if the set of letters of s2 can be obtained from the set of letters of s1 by any one of the following operations:

- Adding exactly one letter to the set of the letters of s1.
- Deleting exactly one letter from the set of the letters of s1.
- Replacing exactly one letter from the set of the letters of s1 with any letter, including itself.

The array words can be divided into one or more non-intersecting groups. A string belongs to a group if any one of the following is true:

- It is connected to at least one other string of the group.
- It is the only string present in the group.

Note that the strings in words should be grouped in such a manner that a string belonging to a group cannot be connected to a string present in any other group. It can be proved that such an arrangement is always unique.

Return an array ans of size 2 where:
- ans[0] is the maximum number of groups words can be divided into, and
- ans[1] is the size of the largest group.

Example 1:

Input: words = ["a","b","ab","cde"]; Output: [2,3]
Explanation:
- words[0] can be used to obtain words[1] (by replacing 'a' with 'b'), and words[2] (by adding 'b'). So words[0] is connected to words[1] and words[2].
- words[1] can be used to obtain words[0] (by replacing 'b' with 'a'), and words[2] (by adding 'a'). So words[1] is connected to words[0] and words[2].
- words[2] can be used to obtain words[0] (by deleting 'b'), and words[1] (by deleting 'a'). So words[2] is connected to words[0] and words[1].
- words[3] is not connected to any string in words.
Thus, words can be divided into 2 groups ["a","b","ab"] and ["cde"]. The size of the largest group is 3.
Example 2:

Input: words = ["a","ab","abc"]; Output: [1,3]
Explanation:
- words[0] is connected to words[1].
- words[1] is connected to words[0] and words[2].
- words[2] is connected to words[1].

Since all strings are connected to each other, they should be grouped together.
Thus, the size of the largest group is 3.

This question is to cluster all 1-edit-distance words in an array, and return the cluster number and the size of the largest cluster.

Code Block 3.13.3 Group of Strings

#include <gtest/gtest.h>
#include <sein.hpp>

namespace ns_group_strings {
class SolutionUF {
  vector<int> bo;
  void union_(int i, int j) {
    bo[i]=bo[j]=bo[find(i)]=find(j);
  }
  int find(int i) {
    while(i!=bo[i]) i=bo[i];
    return i;
  }
  pair<int,int> findMaxGroup() {
    unordered_map<int,int> mp;
    int gcnt=0, cnt=0;
    for (int i=0; i<bo.size(); i++) {
      gcnt = max(gcnt,++mp[find(i)]);
      cnt+=(bo[i]==i);
    }
    return {cnt, gcnt};
  }
public:
  pair<int,int> groupStrings(vector<string>& words) {
    int n = words.size(), group=n;
    bo.resize(n);
    iota(bo.begin(), bo.end(), 0);
    unordered_map<int, int> mp; // bit-encoding to index
    for (int i=0; i<n; i++) {
      int bw = 0;
      for (auto& c: words[i]) bw |= 1<<(c-'a');
      for (int k=0; k<26; k++) {
        int w = bw | (1<<k);
        if (mp.count(w) && find(mp[w])!=find(i)) union_(i, mp[w]);
        mp[w] = i;
      }
    }
    return findMaxGroup();
  }
};

class SolutionDFS {
  int _dfs(int x, unordered_map<int, int>& nums) {
    int cur = 0;
    if (auto it = nums.find(x); it != end(nums) && it->second) {
      cur += it->second;
      it->second = 0;
      for (size_t m = 1; m < 1<<26; m <<= 1) {
        cur += _dfs(x^m, nums);
        for (size_t m2 = 1; m2 < 1<<26; m2 <<= 1)
          if (x&m && !(x&m2))
            cur += _dfs(x^m^m2, nums);
      }
    }
    return cur;
  }
public:
  pair<int,int> groupStrings(vector<string>& words) {
    unordered_map<int, int> nums;
    for (const auto& w : words) {
      int x = accumulate(begin(w), end(w), 0,
                         [](int s, char c) { return s|(1<<(c-'a')); });
      nums[x]++;
    }
    int ngroups = 0, largest = 0;
    for (auto [x, count] : nums) {
      if (count) {
        ngroups++;
        largest = max(largest, _dfs(x, nums));
      }
    }
    return {ngroups, largest};
  }
};
}  // namespace ns_group_strings

using namespace ns_group_strings;
TEST(_dp_group_strings, a) {
  vector<string> vs={"a","b","ab","cde"};
  cout << (SolutionDFS().groupStrings(vs).first==2) << endl;
  cout << (SolutionUF().groupStrings(vs).second==3) << endl;
}

https://leetcode.com/problems/groups-of-strings/

3.13.8.4. Percentile Query Tree

[Google L5]Coding round interviewer showed up and asked me to do the question without saying any extra word:

Given a stream of prices from transactions: 79.20, 20.05, 96.82, ...
Implement 2 methods:
1) insert(price)
2) query(percentile) - e.g.: query(0.2) should give a price that 20% of prices is lower than it, 80% of prices should be higher.

Code Block 3.13.4 An order statistic tree

struct node {
  double val = 0;
  int card = 1;  // 💣 cardinality
  node *l = nullptr, *r = nullptr;
  explicit node(double n) : val(n) {}
};

node* add(node* x, double n) {
  if (x == nullptr) return new node(n);
  if (n > x->val)
    x->r = add(x->r, n);
  else
    x->l = add(x->l, n);
  x->card++;
  return x;
}

node* order(node* nd, int rank) {
  if (rank <= 0) return nullptr;
  if (nd->l and nd->l->card >= rank) return order(nd->l, rank);
  if (nd->l and nd->l->card + 1 == rank) return nd;
  if (nd->r) return order(nd->r, rank - (nd->l ? nd->l->card : 0) - 1);
  return nd;
}

struct order_statistics_tree {
  node* root = nullptr;
  explicit order_statistics_tree(node* r) : root(r) {}
  node* insert(double numeric) { return add(root, numeric); }
  double query(double pr) {
    int o = root->card * pr;
    node* r = order(root, o);
    return r->val;
  }
};

https://www.1point3acres.com/bbs/thread-862172-1-1.html

Refer to Section 3.13.5, Section 2.6.1.4

3.13.8.5. Maze Generation

Generate a maze in a matrix.

The number in the cell is encoded by the edge in a binary number format: 0b[left][bottom][right][up]
    0
  +---+
3 | 1 | 1
  +---+
    2

So the number has a range of [0-15], from 0b0000 to 0b1111.

0001     0010     0011    0100     0101     0110      0111    1000     1001     1010        1111
+---+    +   +    +---+   +   +    +---+    +   +    +---+    +   +    +---+    +   +       +---+
  1        2 |      3 |     4        5        6 |      7 |    | 8      | 9      | 10|  ...  | 15|
+   +    +   +    +   +   +---+    +---+    +---+    +---+    +   +    +   +    +   +       +---+

The following are two example of maze generation:

+---+---+---+---+---+---+               +---+---+---+---+---+---+                  +---+---+---+---+---+---+
  7 | 15| 15| 15| 15| 15|      start -->  5   1   5   5   5   3 |         start -->  1   5   3 | 11| 9   3 |
+---+---+---+---+---+---+               +---+   +---+---+---+   +                  +   +---+   +   +   +   +
| 15| 15| 15| 15| 15| 15|               | 11| 12  5   5   3 | 10|                  | 8   3 | 12  4   2 | 10|
+---+---+---+---+---+---+    =>         +   +---+---+---+   +   +        or        +   +   +---+---+   +   +
| 15| 15| 15| 15| 15| 15|               | 8   5   1   5   6 | 14|                  | 10| 14| 11| 13  2 | 14|
+---+---+---+---+---+---+               +   +---+   +---+---+---+                  +   +---+   +---+   +---+
| 15| 15| 15| 15| 15| 13                | 12  7 | 12  5   5   5   -->end           | 12  7 | 12  5   4   5  -->end
+---+---+---+---+---+---+               +---+---+---+---+---+---+                  +---+---+---+---+---+---+

Desired Properties
  • None of the boundary is deleted (except at 'start' and 'end').
  • Every cell is reachable from every other cell.
  • There are no cycles – no cell can reach itself by a path unless it retraces some part of the path.

A sample API is:

vector<vector<int>> maze_generation(int row, int column) {...}

There are many ways to generate a maze. Something in common is we need randomized function since it is a maze generation.

Code Block 3.13.5 Maze Generation by Union Find

#include <gtest/gtest.h>
#include <sein.hpp>

namespace ns_maze {
static vector<pair<int, int>> DIRS = {{-1, 0}, {1,  0}, {0,  -1}, {0,  1}};
struct uf {
  vector<int> bo;
  uf(int sz) {
    bo.resize(sz);
    fill(bo.begin(), bo.end(), -1);
  }
  int _find(int x) {
    if (bo[x] == -1) return x;
    return bo[x] = _find(bo[x]);
  }
  void _union(int x, int y) {
    int bx = _find(x), by = _find(y);
    if (bx == by) return;
    bo[bx] = by;  // 💣
  }
  bool connected(int x, int y) { return _find(x) == _find(y); }
  int group_size() {
    return count_if(bo.begin(), bo.end(), [](int x) { return x == -1; });
  }
};

void track(vector<vector<int>> &g, int i, int j, int di, int dj) {
  if (di == 0 and dj == 1) g[i][j] &= 0b1101, g[i][j + 1] &= 0b0111; // right
  else if (di == 0 and dj == -1) g[i][j] &= 0b0111, g[i][j - 1] &= 0b1101; // left
  else if (di == 1 and dj == 0) g[i][j] &= 0b1011, g[i + 1][j] &= 0b1110; // down
  else if (di == -1 and dj == 0) g[i][j] &= 0b1110, g[i - 1][j] &= 0b1011; // up
}

vector<vector<int>> maze_generation_UF(int m, int n) {
  vector<vector<int>> g(m, vector<int>(n, 0b1111));
  g[0][0] = 7, g[m - 1][n - 1] = 13;
  uf ufo(m * n);
  srand(0xdeadbeef);

  while (ufo.group_size() > 1) {  // 💣
    for (int i = 0; i < m; i++) {
      for (int j = 0; j < n; j++) {
        auto [di, dj] = DIRS[rand() % 4];  // 💣
        int x = i * n + j, y = (i + di) * n + j + dj;
        if (i + di < 0 or i + di >= m or j + dj < 0 or j + dj >= n) continue;
        if (!ufo.connected(x, y)) {
          ufo._union(x, y);  // connect two cells
          track(g, i, j, di, dj);
        }
      }
    }
  }
  return g;
}

struct MazeGenDFS {
  int R, C;
  vector<bool> visited;  // 💣
  void dfs(vector<vector<int>> &g, int i, int j) {
    visited[i * C + j] = true;  // 💣
    if (i == R - 1 and j == C - 1) return;  // 💣
    vector<pair<int, int>> dirs = DIRS;
    random_shuffle(dirs.begin(), dirs.end());  // 💣
    for (auto [di, dj]: dirs) {
      int ni = i + di, nj = j + dj;
      if (ni >= 0 and ni < R and nj >= 0 and nj < C and !visited[ni * C + nj]) {
        track(g, i, j, di, dj);
        dfs(g, ni, nj);
      }
    }
  }
  vector<vector<int>> maze_generation_DFS(int m, int n) {
    vector<vector<int>> g(m, vector<int>(n, 0b1111));
    visited.resize(m * n);
    g[0][0] = 7, g[m - 1][n - 1] = 13; // negative means visited
    srand(0xdeadbeef);
    R = m, C = n;
    dfs(g, 0, 0);  // 💣
    return g;
  }
};
}
using namespace ns_maze;
TEST(_maze, a) {
  vector<vector<int>> expected = {{1,  5,  7, 11}, {10, 11, 9, 2}, {8,  0,  6, 10}, {14, 12, 7, 12}};
  auto vvi = maze_generation_UF(4, 4);
  EXPECT_EQ(vvi, expected);
  vvi = MazeGenDFS().maze_generation_DFS(4, 4);
  expected = { { 3, 13, 5, 3 }, { 12, 5, 5, 2 }, { 9, 5, 5, 6 }, { 12, 5, 5, 5 } };
  EXPECT_EQ(vvi, expected);
}

3.13.8.6. Multi-language Translator

Design a multi-language translator where there are two APIs:

add(String inputLanguage, String inputWord, String outputLanguage, String outputWord)
get(String inputLanguage, String inputWord, String outputLanguage)

For example:
    add("English", "hello", "Spanish", "hola")
    add("Spanish", "hola", "French", "Bon jour")
    add("French", "Bon jour", "Chinese", "nihao")
Then:
    get("English", "hello", "French") => "Bon jour"
    get("Spanish", "hola", "Chinese") => "nihao"

An intuitive way to solve this problem is graph traversal like BFS, DFS, but union find can be used to solve it in a more efficient way.

Code Block 3.13.6 Multilang Translator using Union Find

#include <gtest/gtest.h>
#include <sein.hpp>

namespace ns_uf_multilang_translator{
struct union_find {
  vector<int> bo, sz;
  union_find(int z=0) {
    bo.resize(z), sz.resize(z);
    iota(bo.begin(), bo.end(), 0); // 💣
    fill(sz.begin(), sz.end(), 1);
  }
  int find(int i) { // find final big boss' index
    int j=i;
    while (bo[i] != i){
      bo[i]=bo[bo[i]], i = bo[i]; // path halving
    }
    return bo[j]=i; // path compression
  }
  bool union_(int x, int y) {
    int bx = find(x), by = find(y);
    if (bx == by) return false;
    if (sz[bx] < sz[by])  // merge by rank, winner takes all
      bo[bx] = by, sz[by] += sz[bx], sz[bx] = 0;
    else
      bo[by] = bx, sz[bx] += sz[by], sz[by] = 0;
    return true;
  }
  void add(int n){
    while(n--) bo.push_back(bo.size()), sz.push_back(1);
  }
};
struct Solution{
  unordered_map<string,map<string, int>> m1; // lang => word => UF index
  unordered_map<int, map<string, string>> m2;  // final boss => lang => word
  union_find ufo;

  void add(string l1, string w1, string l2, string w2){
    m1[l1], m1[l2];
    if (!m1[l1].count(w1)) m1[l1][w1] = ufo.bo.size(), ufo.add(1);
    if (!m1[l2].count(w2)) m1[l2][w2] = ufo.bo.size(), ufo.add(1);
    ufo.union_(m1[l1][w1], m1[l2][w2]);  // 💣
    auto& m2_ = m2[ufo.find(m1[l1][w1])];
    m2_[l1]=w1, m2_[l2]=w2;
  }

  string get(string l1, string w1, string l2){
    int idx = m1[l1][w1];
    return m2[ufo.find(idx)][l2];
  }
};
}
using namespace ns_uf_multilang_translator;
TEST(_uf_multilang_translator, a) {
  Solution sln;
  sln.add("english","hello","spanish","hola");
  sln.add("spanish","hola", "french","bon jour");
  sln.add("french","bon jour", "chinese","nihao");
  sln.add("english","good", "chinese","hao");
  sln.add("chinese","hao", "polish","dobry");
  sln.add("polish","dobry", "hindi","अच्छा");
  EXPECT_EQ(sln.get("spanish","hola", "chinese"), "nihao");
  EXPECT_EQ(sln.get("english","hello", "chinese"), "nihao");
  EXPECT_EQ(sln.get("english","good", "hindi"), "अच्छा");
  EXPECT_EQ(sln.get("english","good", "polish"), "dobry");
}

https://www.1point3acres.com/bbs/thread-907525-1-1.html