greeness notes: Union Find

By wiki: A union-find algorithm is an algorithm that performs two useful operations on such a data structure:

Find: Determine which set a particular element is in. Also useful for determining if two elements are in the same set.
Union: Combine or merge two sets into a single set.

Applications

Disjoint-set data structures model the partitioning of a set, for example to keep track of the components of a graph of an undirected graph. This model can then be used to determine whether two vertices belong to the same component, or whether adding an edge between them would result in a cycle.
This data structure is used by the Boost Graph Library to implement its Incremental Connected Components functionality.
It is also used for implementing Kruskal's algorithm to find the minimum spanning tree of a graph.

Visualization

a very clear C++ implementation by Emil Stefanov

// Disjoint Set Data Structure
// Author: Emil Stefanov
// Date: 03/28/06
// Implementaton is as described in http://en.wikipedia.org/wiki/Disjoint-set_data_structure

#include <cassert>
#include <vector>

class DisjointSets
{
public:

    // Create an empty DisjointSets data structure
    DisjointSets(){
	    m_numElements = 0;
	    m_numSets = 0;
    };
    // Create a DisjointSets data structure with a specified number of elements (with element id's from 0 to count-1)
    DisjointSets(int count){
	    m_numElements = 0;
	    m_numSets = 0;
	    AddElements(count);
    };
    // Copy constructor
    DisjointSets(const DisjointSets & s){
	    this->m_numElements = s.m_numElements;
	    this->m_numSets = s.m_numSets;

	    // Copy nodes
	    m_nodes.resize(m_numElements);
	    for(int i = 0; i < m_numElements; ++i)
		    m_nodes[i] = new Node(*s.m_nodes[i]);

	    // Update parent pointers to point to newly created nodes rather than the old ones
	    for(int i = 0; i < m_numElements; ++i)
		    if(m_nodes[i]->parent != NULL)
			    m_nodes[i]->parent = m_nodes[s.m_nodes[i]->parent->index];
    };
    // Destructor
    ~DisjointSets(){
	    for(int i = 0; i < m_numElements; ++i)
		    delete m_nodes[i];
	    m_nodes.clear();
	    m_numElements = 0;
	    m_numSets = 0;
    };

    // Find the set identifier that an element currently belongs to.
    // Note: some internal data is modified for optimization even though this method is consant.
    int FindSet(int element) const;
    // Combine two sets into one. All elements in those two sets will share the same set id that can be gotten using FindSet.
    void Union(int setId1, int setId2);
    // Add a specified number of elements to the DisjointSets data structure. The element id's of the new elements are numbered
    // consequitively starting with the first never-before-used elementId.
    void AddElements(int numToAdd){
	    assert(numToAdd >= 0);
	    // insert and initialize the specified number of element nodes to the end of the `m_nodes` array
	    m_nodes.insert(m_nodes.end(), numToAdd, (Node*)NULL);
	    for(int i = m_numElements; i < m_numElements + numToAdd; ++i)
	    {
		    m_nodes[i] = new Node();
		    m_nodes[i]->parent = NULL;
		    m_nodes[i]->index = i;
		    m_nodes[i]->rank = 0;
	    }

	    // update element and set counts
	    m_numElements += numToAdd;
	    m_numSets += numToAdd;
    };
    // Returns the number of elements currently in the DisjointSets data structure.
    int NumElements() const {return m_numElements;};
    // Returns the number of sets currently in the DisjointSets data structure.
    int NumSets() const {return m_numSets;};

private:
    // Internal Node data structure used for representing an element
    struct Node
    {
	int rank; // This roughly represent the max height of the node in its subtree
	int index; // The index of the element the node represents
	Node* parent; // The parent node of the node
    };

    int m_numElements; // the number of elements currently in the DisjointSets data structure.
    int m_numSets; // the number of sets currently in the DisjointSets data structure.
    std::vector<Node*> m_nodes; // the list of nodes representing the elements
};


// Note: some internal data is modified for optimization even though this method is consant.
// path compression
int DisjointSets::FindSet(int elementId) const
{
	assert(elementId < m_numElements);
	Node* curNode;

	// Find the root element that represents the set which `elementId` belongs to
	curNode = m_nodes[elementId];
	while(curNode->parent != NULL)
		curNode = curNode->parent;
	Node* root = curNode;

	// Walk to the root, updating the parents of `elementId`. Make those elements the direct
	// children of `root`. This optimizes the tree for future FindSet invokations.
	curNode = m_nodes[elementId];
	while(curNode != root)
	{
		Node* next = curNode->parent;
		curNode->parent = root;
		curNode = next;
	}
	return root->index;
}

// union by rank
void DisjointSets::Union(int setId1, int setId2)
{
	assert(setId1 < m_numElements);
	assert(setId2 < m_numElements);
	assert(setId1 != setId2);

	Node* set1 = m_nodes[setId1];
	Node* set2 = m_nodes[setId2];

	// Determine which node representing a set has a higher rank. The node with the higher rank is
	// likely to have a bigger subtree so in order to better balance the tree representing the
	// union, the node with the higher rank is made the parent of the one with the lower rank and
	// not the other way around.
	if(set1->rank > set2->rank)
		set2->parent = set1;
	else if(set1->rank < set2->rank)
		set1->parent = set2;
	else // set1->rank == set2->rank
	{
		set2->parent = set1;
		++set1->rank; // update rank
	}
	// Since two sets have fused into one, there is now one less set so update the set count.
	--m_numSets;
}


#include <iostream>
using namespace std;
void printElementSets(const DisjointSets & s)
{
	for (int i = 0; i < s.NumElements(); ++i)
		cout << s.FindSet(i) << "  ";
	cout << endl;
}
int main()
{
	DisjointSets s(10);
	printElementSets(s);
	s.Union(s.FindSet(5),s.FindSet(3));
	printElementSets(s);
	s.Union(s.FindSet(1),s.FindSet(3));
	printElementSets(s);
	s.Union(s.FindSet(6),s.FindSet(7));
	printElementSets(s);
	s.Union(s.FindSet(8),s.FindSet(9));
	printElementSets(s);
	s.Union(s.FindSet(6),s.FindSet(9));
	printElementSets(s);
	s.AddElements(3);
	printElementSets(s);
	s.Union(s.FindSet(11),s.FindSet(12));
	printElementSets(s);
	s.Union(s.FindSet(9),s.FindSet(10));
	printElementSets(s);
	s.Union(s.FindSet(7),s.FindSet(11));
	printElementSets(s);

	system("pause");
	return 0;
}

output:

0 1 2 3 4 5 6 7 8 9
0 1 2 5 4 5 6 7 8 9
0 5 2 5 4 5 6 7 8 9
0 5 2 5 4 5 6 6 8 9
0 5 2 5 4 5 6 6 8 8
0 5 2 5 4 5 6 6 6 6
0 5 2 5 4 5 6 6 6 6 10 11 12
0 5 2 5 4 5 6 6 6 6 10 11 11
0 5 2 5 4 5 6 6 6 6 6 11 11
0 5 2 5 4 5 6 6 6 6 6 6 6
Press any key to continue . . .

Problem: topCoder grafixMask

Editorial suggest using a floodFill with BFS (DFS might have stack overflow problem)

We can also use Union-Find to solve this problem.

greeness notes

Tuesday, September 30, 2008

Union Find

Applications

Labels

Blog Archive