3.8.4. Topological Sort

TC: O(|V|+|E|) , SC: O(|E|)

Topological sort of a directed graph is a linear ordering of its vertices such that for every directed edge uv from vertex u to vertex v, u comes before v in the ordering.

We can use the Course Schedule question as an example. There are a total of n courses you have to take, labeled from 0 to n - 1. Some courses may have prerequisites, for example to take course 0 you have to first take course 1, which is expressed as a pair: [0,1]. Given the total number of courses and a list of prerequisite pairs, is it possible for you to finish all courses?

Example 1:
Input: 6, [[1,0],[2,1],[2,0],[3,2],[4,3],[5,3],[5,4]]
Output: true

Example 2:
Input: 2, [[1,0],[0,1]]
Output: false
Explanation: There are a total of 2 courses to take. To take course 1 you should have finished course 0, and to take course 0 you should also have finished course 1. So it is impossible.

0 -----> 2 -----> 3 -----> 4          0 -----> 2 -----> 3 -----> 4
 \      ^          \     /             \      ^          ^     /
  \    /            \   /               \    /            \   /
   v  /              v v                 v  /              \ v
    1                 5                   1                 5
topological sortable                  NOT topological sortable(cycle 3->4->5)

3.8.4.1. Post-order DFS

Post-order DFS will be faster than Kan’s Algo to find cycle. To find cycle, we track the color state of a node. Initially all the nodes are white color. When we enter the DFS function, we mark it as grey. When we back-track and unwind the stack, we mark it as black.

#include <gtest/gtest.h>
#include <sein.hpp>

namespace ns_topo {
bool dfs_with_cycle_detection(vector<vector<int>> &graph, vector<int> &visit, int i) {
  if (visit[i] == -1) return false;
  if (visit[i] == 1) return true;
  visit[i] = -1;
  for (auto a: graph[i])
    if (!dfs_with_cycle_detection(graph, visit, a)) return false;
  visit[i] = 1;
  return true;
}

bool canFinish_DFS(int numCourses, vector<pair<int, int>> prerequisites) {
  vector<vector<int>> graph(numCourses);
  vector<int> visit(numCourses);
  for (auto [x, y]: prerequisites)
    graph[y].push_back(x);
  for (int i = 0; i < numCourses; ++i)
    if (!dfs_with_cycle_detection(graph, visit, i)) return false;
  return true;
}
}  // namespace ns_topo
using namespace ns_topo;
TEST(_topo_course_schedule_dfs, a) {
  vector<pair<int, int>> pre = {{1, 0}, {2, 1}, {2, 0}, {3, 2}, {4, 3}, {5, 3}, {5, 4}};
  EXPECT_TRUE(canFinish_DFS(6, pre));
  pre[5] = {3, 5}; // add a cycle
  EXPECT_FALSE(canFinish_DFS(6, pre));
}

To get a topological sorted path:

// find one path by DFS
bool dfs_with_cycle_detection(vector<vector<int>> g, vector<int>& path, int i, vector<int>& color){
  if(color[i]==0) return false; // cycle detected
  if(color[i]==1) return true; // yes, no cycle!
  color[i]=0;
  for(int n: g[i])
    if (not dfs_with_cycle_detection(g,path,n, color)) return false;
  path.push_back(i), color[i]=1;
  return true;
}
vector<int> find_topo_order_dfs(int n, vector<pair<int, int>> pre) {
  vector<vector<int>> G(n); // adjacency list
  vector<int> r, color(n, -1), in(n);
  for (auto [cur,pr]: pre) G[pr].push_back(cur), in[cur]++;
  for (int i=0; i<n; i++) {
    if (in[i]==0) {
      if (not dfs_with_cycle_detection(G, r, i, color)) return {};
    }
  }
  reverse(r.begin(), r.end());
  return r;
}

3.8.4.2. Kahn Algo - BFS with Zero-Degree Nodes

❤️ The advantage of Kahn’ algo is DAG checking is not needed.

Topologically sortable == No cycle!

#include <gtest/gtest.h>
#include <sein.hpp>

namespace ns_topo {
bool canFinish_Kahn(int numCourses, vector<pair<int, int>> prerequisites) {
  unordered_map<int, set<int>> suc, pre;
  unordered_set<int> ns;
  for (auto [cur, pr]: prerequisites)
    suc[pr].insert(cur), pre[cur].insert(pr), ns.insert(pr);  // ns will include all nodes
  for (auto p: pre) ns.erase(p.first);  // leave 0-in-degree in ns
  while (!ns.empty()) {
    unordered_set<int> tmp;
    for (int i: ns) {
      if (suc.count(i)) {
        for (int j: suc[i]) {
          if (pre.count(j) == 0) continue;  // in-degree==0
          pre[j].erase(i);
          if (pre[j].empty())
            pre.erase(j), tmp.insert(j);
        }
        suc.erase(i);
      }
    }
    ns = tmp;
  }
  return pre.empty() && suc.empty();
}

bool canFinish_BFS(int numCourses, vector<pair<int, int>> prerequisites) {
  vector<vector<int>> graph(numCourses);
  vector<int> in(numCourses);
  for (auto [x, y]: prerequisites)
    graph[y].push_back(x), ++in[x];
  queue<int> q;
  for (int i = 0; i < numCourses; ++i)
    if (in[i] == 0) q.push(i);
  while (!q.empty()) {
    int t = q.front(); q.pop();
    for (auto a: graph[t]) {
      --in[a];
      if (in[a] == 0) q.push(a);
    }
  }
  for (int i = 0; i < numCourses; ++i)
    if (in[i] != 0) return false;
  return true;
}
}  // namespace ns_topo
using namespace ns_topo;
TEST(_topo_course_schedule_bfs, a) {
  vector<pair<int, int>> pre = {{1, 0}, {2, 1}, {2, 0}, {3, 2}, {4, 3}, {5, 3}, {5, 4}};
  EXPECT_TRUE(canFinish_Kahn(6, pre));
  EXPECT_TRUE(canFinish_BFS(6, pre));
  pre[5] = {3, 5}; // add a cycle
  EXPECT_FALSE(canFinish_BFS(6, pre));
}

To get a topological sorted path:

// find one path by BFS
vector<int> find_topo_order_bfs(int n, vector<pair<int, int>> pre) {
  vector<vector<int>> G(n); // adjacency list
  vector<int> in(n), res;
  for (auto [cur,pr]: pre) G[pr].push_back(cur), ++in[cur];
  queue<int> q0;  // enqueue all nodes with 0 in-degree
  for (int i = 0; i < n; ++i)
    if (in[i] == 0) q0.push(i);
  while (!q0.empty()) {
    int t = q0.front(); q0.pop();
    res.push_back(t);
    for (int i: G[t])
      if (--in[i] == 0) q0.push(i);
  }
  if (res.size() != n) return {};  // topologically un-sortable
  return res;
}

With BFS, we can get all possible topological paths. First, we get the Breadth First Tree (CLRS p600) with a layered BFS. Then we use DFS and permutation to get all the possible paths. Please be noted the algorithm here is different from combination algo here: https://leetcode.com/problems/letter-combinations-of-a-phone-number/.

// find all paths by BFS and Permutation
void permute_dfs(vector<int>& nums, vector<vector<int>>& r, vector<int>& p,vector<bool> vd)
{
  if (p.size() == nums.size()){r.push_back(p); return;}
  for(int i=0; i<nums.size(); ++i){
    if (vd[i]) continue;
    vd[i]=true, p.push_back(nums[i]);
    permute_dfs(nums,r,p,vd);
    p.pop_back(), vd[i] = false;// backtrack and unwind
  }
}

vector<vector<int>> permute(vector<int>& nums) {
  vector<vector<int>> r;
  vector<int> p;
  vector<bool> vd(nums.size()); // visited
  permute_dfs(nums,r,p,vd);
  return r;
}

void dfs(vector<vector<int>>& o, int n, int i, vector<int> p, vector<vector<int>>& fin){
  if(o[i].empty()){
    fin.push_back(p); return;
  }
  auto permus = permute(o[i]);
  for(vector<int>& pm: permus){
    auto t = p;
    for(auto i: pm)
      t.push_back(i);
    dfs(o,n,i+1,t,fin);
  }
}

vector<vector<int>> find_all_paths_bfs(int n, vector<pair<int, int>> pre) {
  vector<vector<int>> G(n), tmp(n); // adjacency list
  vector<int> in(n);
  for (auto [cur,pr]: pre) G[pr].push_back(cur), ++in[cur];
  queue<int> q0;  // enqueue all nodes with 0 in-degree
  for (int i = 0; i < n; ++i)
    if (in[i] == 0) q0.push(i);
  int idx = 0, count=0;
  while (!q0.empty()) {
    int sz=q0.size();
    while(sz--){
      int t = q0.front(); q0.pop();
      tmp[idx].push_back(t);
      count++;
      for (int i: G[t])
        if (--in[i] == 0) q0.push(i);
    }
    idx++;
  }
  if (count != n) return {};  // topologically un-sortable, cycle detected
  vector<int> p;
  vector<vector<int>> res;
  dfs(tmp,n,0,p,res);
  return res;
}

../_images/topo_simple.png — Figure 3.8.1 There are four different paths

TEST(_topo_get_one_path, a) {
  vector<pair<int, int>> pre = {{6,8}, {6,7}, {1,6}, {0,6}, {2, 1},
                                {2, 0}, {3, 2}, {4, 3}, {5, 3}, {5, 4}};
  vector<int> expected = { 7, 8, 6, 1, 0, 2, 3, 4, 5 };
  EXPECT_EQ(find_topo_order_bfs(9, pre), expected);
  expected = { 8, 7, 6, 0, 1, 2, 3, 4, 5 };
  EXPECT_EQ(find_topo_order_dfs(9, pre), expected);
  vector<vector<int>> all_expected = {
    {7, 8, 6, 1, 0, 2, 3, 4, 5},
    {7, 8, 6, 0, 1, 2, 3, 4, 5},
    {8, 7, 6, 1, 0, 2, 3, 4, 5},
    {8, 7, 6, 0, 1, 2, 3, 4, 5},
  };
  auto all_paths = find_all_paths_bfs(9, pre);
  EXPECT_EQ(all_paths, all_expected);
  pre[5] = {3, 5}; // add a cycle
  EXPECT_EQ(find_topo_order_bfs(9, pre), vector<int>({}));
}

3.8.4.3. Shortest Path Search

According to CLRS page 655, topological sort can be used to solve single shortest path problem in graph.