Kruskal's Algorithm: A Comprehensive Guide
Kruskal's Algorithm is a greedy algorithm used to find the minimum spanning tree (MST) for a weighted undirected graph. A minimum spanning tree is a subset of the edges that connects all vertices together with the minimum total edge weight, without any cycles.
How Kruskal's Algorithm Works
- Sort all the edges in non-decreasing order of their weight.
- Pick the smallest edge. Check if it forms a cycle with the spanning tree formed so far. If not, include this edge. Otherwise, discard it.
- Repeat step 2 until there are (V-1) edges in the spanning tree, where V is the number of vertices.
Implementation in Java
Here's a basic implementation of Kruskal's Algorithm in Java:
import java.util.*;
class Edge implements Comparable<Edge> {
int src, dest, weight;
public int compareTo(Edge compareEdge) {
return this.weight - compareEdge.weight;
}
}
class Subset {
int parent, rank;
}
class Graph {
int V, E;
Edge[] edge;
Graph(int v, int e) {
V = v;
E = e;
edge = new Edge[E];
for (int i = 0; i < e; ++i)
edge[i] = new Edge();
}
int find(Subset subsets[], int i) {
if (subsets[i].parent != i)
subsets[i].parent = find(subsets, subsets[i].parent);
return subsets[i].parent;
}
void Union(Subset subsets[], int x, int y) {
int xroot = find(subsets, x);
int yroot = find(subsets, y);
if (subsets[xroot].rank < subsets[yroot].rank)
subsets[xroot].parent = yroot;
else if (subsets[xroot].rank > subsets[yroot].rank)
subsets[yroot].parent = xroot;
else {
subsets[yroot].parent = xroot;
subsets[xroot].rank++;
}
}
void KruskalMST() {
Edge result[] = new Edge[V];
int e = 0;
int i = 0;
for (i = 0; i < V; ++i)
result[i] = new Edge();
Arrays.sort(edge);
Subset subsets[] = new Subset[V];
for (i = 0; i < V; ++i)
subsets[i] = new Subset();
for (int v = 0; v < V; ++v) {
subsets[v].parent = v;
subsets[v].rank = 0;
}
i = 0;
while (e < V - 1) {
Edge next_edge = edge[i++];
int x = find(subsets, next_edge.src);
int y = find(subsets, next_edge.dest);
if (x != y) {
result[e++] = next_edge;
Union(subsets, x, y);
}
}
System.out.println("Following are the edges in the constructed MST");
int minimumCost = 0;
for (i = 0; i < e; ++i) {
System.out.println(result[i].src + " -- " + result[i].dest + " == " + result[i].weight);
minimumCost += result[i].weight;
}
System.out.println("Minimum Cost Spanning Tree " + minimumCost);
}
public static void main(String[] args) {
int V = 4;
int E = 5;
Graph graph = new Graph(V, E);
// Add edge 0-1
graph.edge[0].src = 0;
graph.edge[0].dest = 1;
graph.edge[0].weight = 10;
// Add edge 0-2
graph.edge[1].src = 0;
graph.edge[1].dest = 2;
graph.edge[1].weight = 6;
// Add edge 0-3
graph.edge[2].src = 0;
graph.edge[2].dest = 3;
graph.edge[2].weight = 5;
// Add edge 1-3
graph.edge[3].src = 1;
graph.edge[3].dest = 3;
graph.edge[3].weight = 15;
// Add edge 2-3
graph.edge[4].src = 2;
graph.edge[4].dest = 3;
graph.edge[4].weight = 4;
graph.KruskalMST();
}
}
Implementation in C++
Here's the same implementation in C++:
#include <algorithm>
#include <iostream>
#include <vector>
using namespace std;
class Edge {
public:
int src, dest, weight;
};
class Graph {
int V, E;
vector<Edge> edges;
public:
Graph(int V, int E);
void addEdge(int src, int dest, int weight);
int find(vector<int>& parent, int i);
void Union(vector<int>& parent, vector<int>& rank, int x, int y);
void KruskalMST();
};
Graph::Graph(int V, int E) {
this->V = V;
this->E = E;
}
void Graph::addEdge(int src, int dest, int weight) {
Edge edge;
edge.src = src;
edge.dest = dest;
edge.weight = weight;
edges.push_back(edge);
}
int Graph::find(vector<int>& parent, int i) {
if (parent[i] != i)
parent[i] = find(parent, parent[i]);
return parent[i];
}
void Graph::Union(vector<int>& parent, vector<int>& rank, int x, int y) {
int xroot = find(parent, x);
int yroot = find(parent, y);
if (rank[xroot] < rank[yroot])
parent[xroot] = yroot;
else if (rank[xroot] > rank[yroot])
parent[yroot] = xroot;
else {
parent[yroot] = xroot;
rank[xroot]++;
}
}
void Graph::KruskalMST() {
vector<Edge> result;
int e = 0;
int i = 0;
sort(edges.begin(), edges.end(), [](Edge a, Edge b) {
return a.weight < b.weight;
});
vector<int> parent(V);
vector<int> rank(V, 0);
for (int v = 0; v < V; ++v) {
parent[v] = v;
}
while (e < V - 1 && i < E) {
Edge next_edge = edges[i++];
int x = find(parent, next_edge.src);
int y = find(parent, next_edge.dest);
if (x != y) {
result.push_back(next_edge);
Union(parent, rank, x, y);
e++;
}
}
cout << "Following are the edges in the constructed MST\n";
int minimumCost = 0;
for (i = 0; i < e; ++i) {
cout << result[i].src << " -- " << result[i].dest << " == " << result[i].weight << endl;
minimumCost += result[i].weight;
}
cout << "Minimum Cost Spanning Tree: " << minimumCost << endl;
}
int main() {
int V = 4;
int E = 5;
Graph graph(V, E);
graph.addEdge(0, 1, 10);
graph.addEdge(0, 2, 6);
graph.addEdge(0, 3, 5);
graph.addEdge(1, 3, 15);
graph.addEdge(2, 3, 4);
graph.KruskalMST();
return 0;
}
Popular Interview Problems
Let's look at some popular interview problems related to Kruskal's Algorithm and their solutions in both Java and C++.
1. Connecting Cities With Minimum Cost
Problem: There are N cities numbered from 1 to N. You are given connections, where each connection is represented as an array [city1, city2, cost] indicating the cost to connect city1 and city2. Return the minimum cost to connect all cities. If it's impossible, return -1.
Java Solution:
class Solution {
class UnionFind {
int[] parent;
int[] rank;
public UnionFind(int size) {
parent = new int[size];
for (int i = 0; i < size; i++) parent[i] = i;
rank = new int[size];
}
public int find(int x) {
if (parent[x] != x) parent[x] = find(parent[x]);
return parent[x];
}
public boolean union(int x, int y) {
int xr = find(x), yr = find(y);
if (xr == yr) return false;
if (rank[xr] < rank[yr]) parent[xr] = yr;
else if (rank[xr] > rank[yr]) parent[yr] = xr;
else {
parent[yr] = xr;
rank[xr]++;
}
return true;
}
}
public int minimumCost(int N, int[][] connections) {
Arrays.sort(connections, (a, b) -> Integer.compare(a[2], b[2]));
UnionFind uf = new UnionFind(N + 1);
int totalCost = 0;
int connectedCities = 1;
for (int[] connection : connections) {
int city1 = connection[0], city2 = connection[1], cost = connection[2];
if (uf.union(city1, city2)) {
totalCost += cost;
connectedCities++;
if (connectedCities == N) return totalCost;
}
}
return -1;
}
}
C++ Solution:
class Solution {
public:
vector<int> parent;
vector<int> rank;
int find(int x) {
if (parent[x] != x) parent[x] = find(parent[x]);
return parent[x];
}
bool union_(int x, int y) {
int xr = find(x), yr = find(y);
if (xr == yr) return false;
if (rank[xr] < rank[yr]) parent[xr] = yr;
else if (rank[xr] > rank[yr]) parent[yr] = xr;
else {
parent[yr] = xr;
rank[xr]++;
}
return true;
}
int minimumCost(int N, vector<vector<int>>& connections) {
sort(connections.begin(), connections.end(),
[](vector<int>& a, vector<int>& b) { return a[2] < b[2]; });
parent.resize(N + 1);
rank.resize(N + 1, 0);
for (int i = 1; i <= N; i++) parent[i] = i;
int totalCost = 0;
int connectedCities = 1;
for (auto& connection : connections) {
int city1 = connection[0], city2 = connection[1], cost = connection[2];
if (union_(city1, city2)) {
totalCost += cost;
connectedCities++;
if (connectedCities == N) return totalCost;
}
}
return -1;
}
};
Explanation: This problem is a direct application of Kruskal's Algorithm. We sort the connections by cost and then use a Union-Find data structure to connect cities. If we can connect all cities, we return the total cost; otherwise, we return -1.
2. Optimize Water Distribution in a Village
Problem: There are n houses in a village. We want to supply water for all the houses by building wells and laying pipes. For each house i, we can either build a well inside it directly with cost wells[i], or pipe in water from another well to it. The costs to lay pipes between houses are given by the array pipes, where each pipes[i] = [house1, house2, cost] represents the cost to connect house1 and house2 together using a pipe. Connections are bidirectional. Find the minimum total cost to supply water to all houses.
Java Solution:
class Solution {
class UnionFind {
int[] parent;
int[] rank;
public UnionFind(int size) {
parent = new int[size];
for (int i = 0; i < size; i++) parent[i] = i;
rank = new int[size];
}
public int find(int x) {
if (parent[x] != x) parent[x] = find(parent[x]);
return parent[x];
}
public boolean union(int x, int y) {
int xr = find(x), yr = find(y);
if (xr == yr) return false;
if (rank[xr] < rank[yr]) parent[xr] = yr;
else if (rank[xr] > rank[yr]) parent[yr] = xr;
else {
parent[yr] = xr;
rank[xr]++;
}
return true;
}
}
public int minCostToSupplyWater(int n, int[] wells, int[][] pipes) {
List<int[]> edges = new ArrayList<>();
for (int i = 0; i < n; i++) {
edges.add(new int[]{0, i + 1, wells[i]});
}
for (int[] pipe : pipes) {
edges.add(pipe);
}
Collections.sort(edges, (a, b) -> Integer.compare(a[2], b[2]));
UnionFind uf = new UnionFind(n + 1);
int totalCost = 0;
for (int[] edge : edges) {
int house1 = edge[0], house2 = edge[1], cost = edge[2];
if (uf.union(house1, house2)) {
totalCost += cost;
}
}
return totalCost;
}
}
C++ Solution:
class Solution {
public:
vector<int> parent;
vector<int> rank;
int find(int x) {
if (parent[x] != x) parent[x] = find(parent[x]);
return parent[x];
}
bool union_(int x, int y) {
int xr = find(x), yr = find(y);
if (xr == yr) return false;
if (rank[xr] < rank[yr]) parent[xr] = yr;
else if (rank[xr] > rank[yr]) parent[yr] = xr;
else {
parent[yr] = xr;
rank[xr]++;
}
return true;
}
int minCostToSupplyWater(int n, vector<int>& wells, vector<vector<int>>& pipes) {
vector<vector<int>> edges;
// Add well costs as edges from virtual node 0
for (int i = 0; i < n; i++) {
edges.push_back({0, i + 1, wells[i]});
}
// Add pipe connections
for (auto& pipe : pipes) {
edges.push_back(pipe);
}
// Sort edges by cost
sort(edges.begin(), edges.end(),
[](const vector<int>& a, const vector<int>& b) { return a[2] < b[2]; });
// Initialize Union-Find data structure
parent.resize(n + 1);
rank.resize(n + 1, 0);
for (int i = 0; i <= n; i++) parent[i] = i;
int totalCost = 0;
// Kruskal's algorithm
for (auto& edge : edges) {
int house1 = edge[0], house2 = edge[1], cost = edge[2];
if (union_(house1, house2)) {
totalCost += cost;
}
}
return totalCost;
}
};
Explanation: This problem is a variation of the Minimum Spanning Tree problem, which can be solved using Kruskal's Algorithm. Here's how we approach it:
- We treat the option to build a well at each house as an edge from a virtual node (node 0) to that house, with the cost being the well cost.
- We combine these "well edges" with the pipe connections into a single list of edges.
- We sort all edges by cost in ascending order.
- We use Kruskal's Algorithm with a Union-Find data structure to build the minimum spanning tree:
- For each edge, if it connects two previously unconnected components, we add its cost to the total and union the components.
- We continue this process until all nodes are connected.
- The total cost at the end is the minimum cost to supply water to all houses.
This solution effectively finds the optimal combination of well-building and pipe-laying to minimize the total cost of water supply to the village.
Both the Java and C++ solutions use the same approach, with minor syntax differences. The time complexity of this solution is O(E log E) where E is the number of edges (including both pipes and wells), due to the sorting step. The space complexity is O(N) for the Union-Find data structure, where N is the number of houses.
These problems demonstrate how Kruskal's Algorithm can be applied to solve complex optimization problems efficiently, especially when dealing with connecting components with minimum cost. Understanding these applications is crucial for mastering graph algorithms and performing well in coding interviews.