8 * Copyright (c) 2009-2013 Nicholas J Humfrey. All rights reserved.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 * 3. The name of the author 'Nicholas J Humfrey" may be used to endorse or
18 * promote products derived from this software without specific prior
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
34 * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey
35 * @license http://www.opensource.org/licenses/bsd-license.php
39 * A pure-php class to parse N-Triples with no dependancies.
42 * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey
43 * @license http://www.opensource.org/licenses/bsd-license.php
45 class EasyRdf_Parser_Ntriples extends EasyRdf_Parser
48 * Decodes an encoded N-Triples string. Any \-escape sequences are substituted
49 * with their decoded value.
51 * @param string $str An encoded N-Triples string.
52 * @return The unencoded string.
54 protected function unescapeString($str)
56 if (strpos($str, '\\') === false) {
69 foreach ($mappings as $in => $out) {
70 $str = preg_replace('/\x5c([' . $in . '])/', $out, $str);
73 if (stripos($str, '\u') === false) {
77 while (preg_match('/\\\(U)([0-9A-F]{8})/', $str, $matches) ||
78 preg_match('/\\\(u)([0-9A-F]{4})/', $str, $matches)) {
79 $no = hexdec($matches[2]);
80 if ($no < 128) { // 0x80
82 } elseif ($no < 2048) { // 0x800
83 $char = chr(($no >> 6) + 192) .
84 chr(($no & 63) + 128);
85 } elseif ($no < 65536) { // 0x10000
86 $char = chr(($no >> 12) + 224) .
87 chr((($no >> 6) & 63) + 128) .
88 chr(($no & 63) + 128);
89 } elseif ($no < 2097152) { // 0x200000
90 $char = chr(($no >> 18) + 240) .
91 chr((($no >> 12) & 63) + 128) .
92 chr((($no >> 6) & 63) + 128) .
93 chr(($no & 63) + 128);
95 # FIXME: throw an exception instead?
98 $str = str_replace('\\' . $matches[1] . $matches[2], $char, $str);
106 protected function parseNtriplesSubject($sub, $lineNum)
108 if (preg_match('/<([^<>]+)>/', $sub, $matches)) {
109 return $this->unescapeString($matches[1]);
110 } elseif (preg_match('/_:([A-Za-z0-9]*)/', $sub, $matches)) {
111 if (empty($matches[1])) {
112 return $this->graph->newBNodeId();
114 $nodeid = $this->unescapeString($matches[1]);
115 return $this->remapBnode($nodeid);
118 throw new EasyRdf_Parser_Exception(
119 "Failed to parse subject: $sub",
128 protected function parseNtriplesObject($obj, $lineNum)
130 if (preg_match('/"(.+)"\^\^<([^<>]+)>/', $obj, $matches)) {
133 'value' => $this->unescapeString($matches[1]),
134 'datatype' => $this->unescapeString($matches[2])
136 } elseif (preg_match('/"(.+)"@([\w\-]+)/', $obj, $matches)) {
139 'value' => $this->unescapeString($matches[1]),
140 'lang' => $this->unescapeString($matches[2])
142 } elseif (preg_match('/"(.*)"/', $obj, $matches)) {
143 return array('type' => 'literal', 'value' => $this->unescapeString($matches[1]));
144 } elseif (preg_match('/<([^<>]+)>/', $obj, $matches)) {
145 return array('type' => 'uri', 'value' => $matches[1]);
146 } elseif (preg_match('/_:([A-Za-z0-9]*)/', $obj, $matches)) {
147 if (empty($matches[1])) {
150 'value' => $this->graph->newBNodeId()
153 $nodeid = $this->unescapeString($matches[1]);
156 'value' => $this->remapBnode($nodeid)
160 throw new EasyRdf_Parser_Exception(
161 "Failed to parse object: $obj",
168 * Parse an N-Triples document into an EasyRdf_Graph
170 * @param object EasyRdf_Graph $graph the graph to load the data into
171 * @param string $data the RDF document data
172 * @param string $format the format of the input data
173 * @param string $baseUri the base URI of the data being parsed
174 * @return integer The number of triples added to the graph
176 public function parse($graph, $data, $format, $baseUri)
178 parent::checkParseParams($graph, $data, $format, $baseUri);
180 if ($format != 'ntriples') {
181 throw new EasyRdf_Exception(
182 "EasyRdf_Parser_Ntriples does not support: $format"
186 $lines = preg_split('/\x0D?\x0A/', strval($data));
187 foreach ($lines as $index => $line) {
188 $lineNum = $index + 1;
189 if (preg_match('/^\s*#/', $line)) {
192 } elseif (preg_match('/^\s*(.+?)\s+<([^<>]+?)>\s+(.+?)\s*\.\s*$/', $line, $matches)) {
194 $this->parseNtriplesSubject($matches[1], $lineNum),
195 $this->unescapeString($matches[2]),
196 $this->parseNtriplesObject($matches[3], $lineNum)
198 } elseif (preg_match('/^\s*$/', $line)) {
202 throw new EasyRdf_Parser_Exception(
203 "Failed to parse statement",
209 return $this->tripleCount;