8 * Copyright (c) 2013 Nicholas J Humfrey. All rights reserved.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 * 3. The name of the author 'Nicholas J Humfrey" may be used to endorse or
18 * promote products derived from this software without specific prior
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
34 * @copyright Copyright (c) 2013 Nicholas J Humfrey
35 * @license http://www.opensource.org/licenses/bsd-license.php
39 * Functions for comparing two graphs with each other
41 * Based on rdf-isomorphic.rb by Ben Lavender:
42 * https://github.com/ruby-rdf/rdf-isomorphic
45 * @copyright Copyright (c) 2013 Nicholas J Humfrey
46 * @license http://www.opensource.org/licenses/bsd-license.php
48 class EasyRdf_Isomorphic
51 * Check if one graph is isomorphic (equal) to another graph
54 * $graphA = EasyRdf_Graph::newAndLoad('http://example.com/a.ttl');
55 * $graphB = EasyRdf_Graph::newAndLoad('http://example.com/b.ttl');
56 * if (EasyRdf_Isomorphic::isomorphic($graphA, $graphB)) print "Equal!";
58 * @param object EasyRdf_Graph $graphA The first graph to be compared
59 * @param object EasyRdf_Graph $graphB The second graph to be compared
60 * @return boolean True if the two graphs are isomorphic
62 public static function isomorphic($graphA, $graphB)
64 return is_array(self::bijectionBetween($graphA, $graphB));
68 * Returns an associative array of bnode identifiers representing an isomorphic
69 * bijection of one EasyRdf_Graph to another EasyRdf_Graph's blank nodes or
70 * null if a bijection cannot be found.
72 * @param object EasyRdf_Graph $graphA The first graph to be compared
73 * @param object EasyRdf_Graph $graphB The second graph to be compared
74 * @return array bnode mapping from $graphA to $graphB
76 public static function bijectionBetween($graphA, $graphB)
80 $statementsA = array();
81 $statementsB = array();
83 // Quick initial check: are there differing numbers of subjects?
84 if (self::countSubjects($graphA) != self::countSubjects($graphB)) {
88 // Check if all the statements in Graph A exist in Graph B
89 $groundedStatementsMatch = self::groundedStatementsMatch($graphA, $graphB, $bnodesA, $statementsA);
91 if ($groundedStatementsMatch) {
92 // Check if all the statements in Graph B exist in Graph A
93 $groundedStatementsMatch = self::groundedStatementsMatch($graphB, $graphA, $bnodesB, $statementsB);
96 if ($groundedStatementsMatch === false) {
97 // The grounded statements do not match
99 } elseif (count($bnodesA) > 0 or count($bnodesB > 0)) {
100 // There are blank nodes - build a bi-jection
101 return self::buildBijectionTo($statementsA, $bnodesA, $statementsB, $bnodesB);
103 // No bnodes and the grounded statements match
109 * Count the number of subjects in a graph
112 private static function countSubjects($graph)
114 return count($graph->toRdfPhp());
118 * Check if all the statements in $graphA also appear in $graphB
121 private static function groundedStatementsMatch($graphA, $graphB, &$bnodes, &$anonStatements)
123 $groundedStatementsMatch = true;
125 foreach ($graphA->toRdfPhp() as $subject => $properties) {
126 if (substr($subject, 0, 2) == '_:') {
127 array_push($bnodes, $subject);
128 $subjectIsBnode = true;
130 $subjectIsBnode = false;
133 foreach ($properties as $property => $values) {
134 foreach ($values as $value) {
135 if ($value['type'] == 'uri' and substr($value['value'], 0, 2) == '_:') {
136 array_push($bnodes, $value['value']);
137 $objectIsBnode = true;
139 $objectIsBnode = false;
142 if ($groundedStatementsMatch and
143 $subjectIsBnode === false and
144 $objectIsBnode === false and
145 $graphB->hasProperty($subject, $property, $value) === false
147 $groundedStatementsMatch = false;
150 if ($subjectIsBnode or $objectIsBnode) {
154 array('type' => $subjectIsBnode ? 'bnode' : 'uri', 'value' => $subject),
155 array('type' => 'uri', 'value' => $property),
164 return $groundedStatementsMatch;
168 * The main recursive bijection algorithm.
170 * This algorithm is very similar to the one explained by Jeremy Carroll in
171 * http://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf. Page 12 has the
172 * relevant pseudocode.
176 private static function buildBijectionTo
182 $groundedHashesA = array(),
183 $groundedHashesB = array()
186 // Create a hash signature of every node, based on the signature of
187 // statements it exists in.
188 // We also save hashes of nodes that cannot be reliably known; we will use
189 // that information to eliminate possible recursion combinations.
191 // Any mappings given in the method parameters are considered grounded.
192 list($hashesA, $ungroundedHashesA) = self::hashNodes($statementsA, $nodesA, $groundedHashesA);
193 list($hashesB, $ungroundedHashesB) = self::hashNodes($statementsB, $nodesB, $groundedHashesB);
195 // Grounded hashes are built at the same rate between the two graphs (if
196 // they are isomorphic). If there exists a grounded node in one that is
197 // not in the other, we can just return. Ungrounded nodes might still
198 // conflict, so we don't check them. This is a little bit messy in the
199 // middle of the method, and probably slows down isomorphic checks, but
200 // prevents almost-isomorphic cases from getting nutty.
201 foreach ($hashesA as $nodeA => $hashA) {
202 if (!in_array($hashA, $hashesB)) {
206 foreach ($hashesB as $nodeB => $hashB) {
207 if (!in_array($hashB, $hashesA)) {
212 // Using the created hashes, map nodes to other_nodes
213 // Ungrounded hashes will also be equal, but we keep the distinction
214 // around for when we recurse later (we only recurse on ungrounded nodes)
215 $bijection = array();
216 foreach ($nodesA as $nodeA) {
218 foreach ($ungroundedHashesB as $nodeB => $hashB) {
219 if ($ungroundedHashesA[$nodeA] == $hashB) {
225 $bijection[$nodeA] = $foundNode;
227 // Deletion is required to keep counts even; two nodes with identical
228 // signatures can biject to each other at random.
229 unset($ungroundedHashesB[$foundNode]);
233 // bijection is now a mapping of nodes to other_nodes. If all are
234 // accounted for on both sides, we have a bijection.
236 // If not, we will speculatively mark pairs with matching ungrounded
237 // hashes as bijected and recurse.
238 $bijectionA = array_keys($bijection);
239 $bijectionB = array_values($bijection);
244 if ($bijectionA != $nodesA or $bijectionB != $nodesB) {
247 foreach ($nodesA as $nodeA) {
249 // We don't replace grounded nodes' hashes
250 if (isset($hashesA[$nodeA])) {
254 foreach ($nodesB as $nodeB) {
255 // We don't replace grounded nodesB's hashes
256 if (isset($hashesB[$nodeB])) {
260 // The ungrounded signature must match for this to potentially work
261 if ($ungroundedHashesA[$nodeA] != $ungroundedHashesB[$nodeB]) {
265 $hash = sha1($nodeA);
266 $hashesA[$nodeA] = $hash;
267 $hashesB[$nodeB] = $hash;
268 $bijection = self::buildBijectionTo(
284 * Given a set of statements, create a mapping of node => SHA1 for a given
285 * set of blank nodes. grounded_hashes is a mapping of node => SHA1 pairs
286 * that we will take as a given, and use those to make more specific
287 * signatures of other nodes.
289 * Returns a tuple of associative arrats: one of grounded hashes, and one of all
290 * hashes. grounded hashes are based on non-blank nodes and grounded blank
291 * nodes, and can be used to determine if a node's signature matches
296 private static function hashNodes($statements, $nodes, $groundedHahes)
298 $hashes = $groundedHahes;
299 $ungroundedHashes = array();
302 // We may have to go over the list multiple times. If a node is marked as
303 // grounded, other nodes can then use it to decide their own state of
305 while ($hashNeeded) {
306 $startingGroundedNodes = count($hashes);
307 foreach ($nodes as $node) {
308 if (!isset($hashes[$node])) {
309 $hash = self::nodeHashFor($node, $statements, $hashes);
310 if (self::nodeIsGrounded($node, $statements, $hashes)) {
311 $hashes[$node] = $hash;
314 $ungroundedHashes[$node] = $hash;
317 // after going over the list, any nodes with a unique hash can be marked
318 // as grounded, even if we have not tied them back to a root yet.
320 foreach ($ungroundedHashes as $node => $hash) {
321 $uniques[$hash] = isset($uniques[$hash]) ? false : $node;
323 foreach ($uniques as $hash => $node) {
325 $hashes[$node] = $hash;
328 $hashNeeded = ($startingGroundedNodes != count($hashes));
331 return array($hashes, $ungroundedHashes);
335 * Generate a hash for a node based on the signature of the statements it
336 * appears in. Signatures consist of grounded elements in statements
337 * associated with a node, that is, anything but an ungrounded anonymous
338 * node. Creating the hash is simply hashing a sorted list of each
339 * statement's signature, which is itself a concatenation of the string form
340 * of all grounded elements.
342 * Nodes other than the given node are considered grounded if they are a
343 * member in the given hash.
345 * Returns a tuple consisting of grounded being true or false and the string
350 private static function nodeHashFor($node, $statements, $hashes)
352 $statement_signatures = array();
353 foreach ($statements as $statement) {
354 foreach ($statement as $n) {
355 if ($n['type'] != 'literal' and $n['value'] == $node) {
357 $statement_signatures,
358 self::hashStringFor($statement, $hashes, $node)
364 // Note that we sort the signatures--without a canonical ordering,
365 // we might get different hashes for equivalent nodes
366 sort($statement_signatures);
368 // Convert statements into one long string and hash it
369 return sha1(implode('', $statement_signatures));
373 * Returns true if a given node is grounded
374 * A node is groundd if it is not a blank node or it is included
375 * in the given mapping of grounded nodes.
379 private static function nodeIsGrounded($node, $statements, $hashes)
382 foreach ($statements as $statement) {
383 if (in_array($node, $statement)) {
384 foreach ($statement as $resource) {
385 if ($node['type'] != 'bnode' or
386 isset($hashes[$node['value']]) or
398 * Provide a string signature for the given statement, collecting
399 * string signatures for grounded node elements.
403 private static function hashStringFor($statement, $hashes, $node)
406 foreach ($statement as $r) {
407 $str .= self::stringForNode($r, $hashes, $node);
413 * Provides a string for the given node for use in a string signature
414 * Non-anonymous nodes will return their string form. Grounded anonymous
415 * nodes will return their hashed form.
419 private static function stringForNode($node, $hashes, $target)
421 if (is_null($node)) {
423 } elseif ($node['type'] == 'bnode') {
424 if ($node['value'] == $target) {
426 } elseif (isset($hashes[$node['value']])) {
427 return $hashes[$node['value']];
429 return "a blank node";
432 $s = new EasyRdf_Serialiser_Ntriples();
433 return $s->serialiseValue($node);