Multiple trees in one table

from the Artful Common Queries page


Multiple trees can bury you in table glut. Can we combine them in one table and still query them conveniently? Yes. What's more, it's useful to combine edge list and nested sets representations of the same trees in one table. Here is a table definition for the job:

CREATE TABLE trees (
  root int(11) DEFAULT 0,         -- to which tree does this node belong?
  nodeID int(11) NOT NULL,        -- edge list representation of this node
  parentID int(11) DEFAULT NULL,
  level smallint(6) DEFAULT 0,    -- depth of this node in this tree
  lft int(11) DEFAULT 0,          -- nested sets representation of this node
  rgt int(11) DEFAULT 0,
  PRIMARY KEY (root,nodeID)
) ENGINE=InnoDB;

Why combine edge list columns (nodeID, parentID) and nested sets columns (lft, rgt) columns in the same table? Two main reasons. First, most trees come to us as sets of nodes and their parents, that is they come to us as edge lists, and as we'll see, it's easy to write nested sets trees from their edge list versions when the two are stored in the same table. Second, some tree queries are easier from an edge list model, and some are easier from a nested sets model, so it's inexpensive and convenient to combine the two.

Populate the table with four trees:

INSERT INTO trees (nodeID, parentID) VALUES
(11, 53),(44, NULL),(45, 44),(50, 53),(51, 45),(52, 51),(53, 44),(57, 52),
(64, 50),(67, 53),(68, 53),(69, 53),(70, 53),(71, 53),(72, 64),(73, 64),
(74, 64),(75, 53),(76, 53),(77, 53),(78, 53),(79, 53),(93, NULL),(94, 93),
(95, 74),(96, 95),(97, NULL),(98, NULL);

How do we know this dataset has four trees? Four nodes have NULL parents.

To generalise from writing a single nested sets tree from one edge list tree to writing many such trees in one pass, we add logic to compute the root of each tree, save that root value in each row. and have the routine loop through all trees in the table:

DROP PROCEDURE IF EXISTS WriteNestedSets;
DELIMITER go
CREATE PROCEDURE WriteNestedSets();
BEGIN
  DECLARE current, nextleft, nextright, maxrightedge, trees, n, rows, iter INT;
  BEGIN
    DECLARE col_exists CONDITION FOR SQLSTATE '42S21';
    DECLARE CONTINUE HANDLER FOR col_exists SET @root_exists=1;
  END;
  UPDATE trees SET root=0,level=0,lft=0,rgt=0;
  DROP TEMPORARY TABLE IF EXISTS _tree;
  CREATE TEMPORARY TABLE _tree( childID INT, parentID INT ) ENGINE=HEAP;
  INSERT INTO _tree SELECT nodeID, parentID from trees;
  DROP TEMPORARY TABLE IF EXISTS _result;
  CREATE TEMPORARY TABLE _result (
    root INT, level INT, nodeID INT, leftedge INT, rightedge INT, KEY(root,nodeID,leftedge,rightedge) 
  ) ENGINE=HEAP;
  REPEAT                      
    SET @root = ( SELECT childID FROM _tree WHERE parentID IS NULL LIMIT 1);
    IF @root IS NOT NULL THEN
      DELETE FROM _tree WHERE childID=@root;
    ELSE
      SELECT a.parentID INTO @root FROM _tree a 
      LEFT JOIN trees b ON a.parentID=b.nodeID  
      WHERE b.nodeID IS NULL LIMIT 1;
    END IF;
    IF @root IS NOT NULL THEN
      SET trees = ( SELECT Count(*) FROM _tree WHERE parentID IS NULL );   
      SET maxrightedge = 2 * ( (SELECT COUNT(*) FROM _tree ) - trees + 1 );
      INSERT INTO _result VALUES( @root, 1, @root, 1, maxrightedge );      
      SET iter=0, n=1, current=1, nextright=2;
      WHILE nextright < maxrightedge DO                                    
        IF (SELECT Count(*) FROM _result s JOIN _tree t ON s.nodeID=t.parentID AND s.level=current) > 0 THEN
          SET nextleft = (SELECT Min(t.childID) FROM _result s JOIN _tree t ON s.nodeID=t.parentID AND s.level=current);
          INSERT INTO _result VALUES(@root, current+1, nextleft, nextright, NULL);
          SET rows = Row_Count();
          DELETE FROM _tree                  
          WHERE childID = (SELECT nodeID FROM _result WHERE level=(current+1) AND root=@root);
          SET nextright=nextright+1, current=current+1;
        ELSE
          UPDATE _result SET rightedge=nextright,level=-level WHERE root=@root AND level=current;
          SET nextright=nextright+1, current=current-1;
          SET rows=Row_Count();   
        END IF;
        SET n=n+rows, iter=iter+1;
      END WHILE;
      SET maxrightedge=(SELECT 1+Max(rightedge) FROM _result WHERE root=@root AND leftedge<>@root);
      UPDATE _result SET rightedge=maxrightedge WHERE root=@root AND leftedge=@root; 
    END IF;
  UNTIL @root IS NULL END REPEAT ; 
  UPDATE _result SET level=Abs(level); 
  UPDATE trees e JOIN _result r ON e.nodeID=r.nodeID 
    SET e.root=r.root,e.level=r.level,e.lft=r.leftedge,e.rgt=r.rightedge;
  DROP TEMPORARY TABLE _tree; DROP TEMPORARY TABLE _result;
END;
go
CALL WriteNestedSets();

Note that the table definition and procedure do not verify that the given edge list tree values are valid. Arguably, that job belongs to the process that generated the edge list trees. If you disagree, you can add this recursive constraint to the table:

ALTER TABLE trees ADD FOREIGN KEY(root,parentID) REFERENCES trees(root,nodeID) 
  ON UPDATE CASCADE ON DELETE CASCADE;

You will then have to bracket commands, like the command used above to populate the table, with ...

SET session foreign_key_checks=0;
INSERT ...
SET session foreign_key_checks=1;

And you will have to do the same with all calls to the procedure ...

SET session foreign_key_checks=0;
CALL WriteNestedSets();
SET session foreign_key_checks=1;

or add those SET commands into the procedure itself.

This query displays all trees in a way that's suitable for an ASCII terminal:

SELECT Concat( Space(2*child.level-2), child.nodeID ) AS Nodes
FROM trees AS parent
JOIN trees AS child ON parent.root=child.root AND child.lft BETWEEN parent.lft AND parent.rgt
GROUP BY parent.root, child.nodeID
ORDER BY parent.root, child.lft;
+----------------+
| Nodes          |
+----------------+
| 44             |
|   45           |
|     51         |
|       52       |
|         57     |
|   53           |
|     11         |
|     50         |
|       64       |
|         72     |
|         73     |
|         74     |
|           95   |
|             96 |
|     67         |
|     68         |
|     69         |
|     70         |
|     71         |
|     75         |
|     76         |
|     77         |
|     78         |
|     79         |
| 93             |
|   94           |
| 97             |
| 98             |
+----------------+

and this query does the same for a web page:

SELECT Concat( Repeat('&nbsp;', 2*child.level-2), child.nodeID ) AS Nodes
FROM trees AS parent
JOIN trees AS child ON parent.root=child.root AND child.lft BETWEEN parent.lft AND parent.rgt
GROUP BY parent.root, child.nodeID
ORDER BY parent.root, child.lft;

Here are corresponding queries for displaying one tree from the table:

SELECT Concat( Space(2*child.level-2), child.nodeID, '[', child.lft, '-', child.rgt, ']' ) AS Nodes
FROM trees AS parent
JOIN trees AS child ON parent.root=child.root AND child.lft BETWEEN parent.lft AND parent.rgt
WHERE child.root=44
GROUP BY child.nodeID
ORDER BY child.lft;

SELECT Concat( Repeat('&nbsp;', 2*child.level-2), child.nodeID, '[', child.lft, '-', child.rgt, ']' ) AS Nodes
FROM trees AS parent
JOIN trees AS child ON parent.root=child.root AND child.lft BETWEEN parent.lft AND parent.rgt
WHERE child.root=44
GROUP BY child.nodeID
ORDER BY child.lft;

The bane of nested sets trees is that updates require rewrites of most or all of the tree. By storing all trees in one table, have we compounded this problem by requiring that all our nested sets trees be rewritten whenever any is changed? No. Here is a procedure that, given a root value, writes one nested sets tree at a time:

DROP PROCEDURE IF EXISTS WriteNestedSetsTree;
DELIMITER go
CREATE PROCEDURE WriteNestedSetsTree( proot INT )
BEGIN
  DECLARE thislevel, nextleft, nextright, maxrightedge, n, rows INT;
  SELECT Count(*) INTO rows FROM trees WHERE root=proot;
  IF rows > 0 THEN
    UPDATE trees SET level=0,lft=0,rgt=0 WHERE root=proot;
    DROP TEMPORARY TABLE IF EXISTS _tree;
    CREATE TEMPORARY TABLE _tree( childID INT, parentID INT ) ENGINE=HEAP;
    INSERT INTO _tree SELECT nodeID, parentID from trees WHERE root=proot;
    DROP TEMPORARY TABLE IF EXISTS _result;
    CREATE TEMPORARY TABLE _result (
      root INT, level INT, nodeID INT, leftedge INT, rightedge INT, KEY(root,nodeID,leftedge,rightedge) 
    ) ENGINE=HEAP;
    SET maxrightedge = 2 * ( rows + 1 );
    INSERT INTO _result VALUES( proot, 1, proot, 1, maxrightedge );      
    SET n=1, thislevel=1, nextright=2;
    WHILE nextright < maxrightedge DO                                    
      IF (SELECT Count(*) FROM _result s JOIN _tree t ON s.nodeID=t.parentID AND s.level=thislevel) > 0 THEN
        SET nextleft = (SELECT Min(t.childID) FROM _result s JOIN _tree t ON s.nodeID=t.parentID AND s.level=thislevel);
        INSERT INTO _result VALUES(proot, thislevel+1, nextleft, nextright, NULL);
        SET rows = Row_Count();
        DELETE FROM _tree                  
        WHERE childID = (SELECT nodeID FROM _result WHERE level=(thislevel+1) AND root=proot);
        SET nextright=nextright+1, thislevel=thislevel+1;
      ELSE
        UPDATE _result SET rightedge=nextright,level=-level WHERE root=proot AND level=thislevel;
        SET nextright=nextright+1, thislevel=thislevel-1;
        SET rows=Row_Count();   
      END IF;
      SET n=n+rows;
    END WHILE;
    UPDATE _result SET level=Abs(level); 
    UPDATE trees e JOIN _result r ON e.nodeID=r.nodeID 
      SET e.root=r.root,e.level=r.level,e.lft=r.leftedge,e.rgt=r.rightedge;
    DROP TEMPORARY TABLE _tree; DROP TEMPORARY TABLE _result;
  END IF;
END;
go
DELIMITER ;

The Google Visualization API for Organizational Charts can display multiple edge list trees in one pass. It can't display nested sets data. If you store your trees in a table like the trees table above, though, just pass the nodeID and parentID values to the Google API to get a graphic display of your tree. As TheUsual does. Here's that graphic for these trees:

tree graphic

Return to the Artful Common Queries page