Source for file N3Parser.php

Documentation is available at N3Parser.php

  1. <?php
  2.  
  3. // ----------------------------------------------------------------------------------
  4. // Class: N3Parser
  5. // ----------------------------------------------------------------------------------
  6.  
  7.  
  8. /**
  9. * PHP Notation3 Parser
  10. *
  11. * This parser can parse a subset of n3, reporting triples to a callback function
  12. * or constructing a RAP Model ( http://www.wiwiss.fu-berlin.de/suhl/bizer/rdfapi )
  13. *
  14. * Supported N3 features:
  15. * <ul>
  16. * <li>Standard things, repeated triples ( ; and , ), blank nodes using [ ], self-reference (''<>'')</li>
  17. * <li>@prefix mappings</li>
  18. * <li>= maps to owl#sameAs</li>
  19. * <li>a maps to rdf-syntax-ns#type</li>
  20. * <li>Literal datytype- and xmlLanguageTag support
  21. * </ul>
  22. * Un-supported N3 Features include:
  23. * <ul>
  24. * <li>Reification using { }</li>
  25. * <li>. and ^ operators for tree traversal</li>
  26. * <li>Any log operators, like log:forAll etc.</li>
  27. * </ul>
  28. *
  29. * This parser is based on n3.py from Epp released 2nd March, 2002.
  30. * by Sean B. Palmer
  31. * ( http://infomesh.net/2002/eep/20020302-013802/n3.py )
  32. *
  33. * This parser is released under the GNU GPL license.
  34. * ( http://www.gnu.org/licenses/gpl.txt )
  35. *
  36. * <b>History:</b>
  37. * <ul>
  38. * <LI>04-05-2005 toke() function improved by Hannes Gassert hannes.gassert@deri.org </LI>
  39. * <LI>03-25-2005 N3 list processing added by Hannes Gassert hannes.gassert@deri.org</LI>
  40. * <LI>12-06-2004 improved namespace handling added (tobias.gauss@web.de)</LI>
  41. * <LI>08-10-2004 Function for converting strings to its unicode NFC form. Benjamin Nowack <bnowack@appmosphere.com></LI>
  42. * <LI>10-05-2004 Fixed bug with trailing space on qnames and space before ] parsin bug
  43. * <LI>11-27-2003 fixed problems with whithespaces at the end of bNodes</li>
  44. * <LI>11-18-2003 Changed xml:language regex for supporting lang-tags like en-uk.</li>
  45. * <li>11-07-2003 Added "setFixBnodes" function. Sets, if Bnodes should be renamed to the BNODE_PREFIX constant.</li>
  46. * <li>10-27-2003 fixed problems in generateModel(), changed regEx for Literals.</li>
  47. * <li>10-24-2003 Added support for Literals with rdf:DataType and xml:Language Tags. URI-Self-Reference with ''<>'' is supported.</li>
  48. * <li>08-01-2003 Made compatible with new v6 MemModel.</li>
  49. * <li>07-31-2003 Function generateModel() added.</li>
  50. * <li>07-16-2003 Fixed bug with anon nodes alone on a line.</li>
  51. * <li>06-08-2003 Initial version converted from n3.py.</li>
  52. * </ul>
  53. *
  54. *
  55. * @author Sean B. Palmer <sean@mysterylights.com>, Gunnar AA. Grimnes <ggrimnes@csd.abdn.ac.uk>, Daniel Westphal <mail@d-westphal.de>
  56. * @version V0.9.3
  57. * @package syntax
  58. * @access public
  59. ***/
  60.  
  61. class N3Parser extends Object {
  62.  
  63.  
  64. /* ==================== Variables ==================== */
  65.  
  66. var $Tokens;
  67. var $bNode;
  68. var $RDF_NS, $DAML_NS, $OWL_NS;
  69. var $debug;
  70. var $parseError;
  71. var $parsedNamespaces = array();
  72.  
  73. /* ==================== Public Methods ==================== */
  74.  
  75. /**
  76. * Constructor
  77. * @access public
  78. ***/
  79. function N3Parser() {
  80. //Regular expressions:
  81. $Name = ''[A-Za-z0-9_@.]+[^.,;[] ]*'';
  82. $URI = ''<[^> ]*>'';
  83. $bNode = ''_:''.$Name;
  84. $Univar = ''?''.$Name;
  85. $QName = ''(?:[A-Za-z][A-Za-z0-9_@.]*)?:''.$Name;
  86. $Literal = ''"(\\\"|[^"])*"''; # ''"(?:"|[^"])*"''
  87. // $Literal = ''"[^"\\\\]*(?:\\.\\[^"]*)*"''; # ''"(?:"|[^"])*"''
  88. $LangTag = ''@[A-Za-z\-]*[^ \^\.\;\,]'';
  89. $Datatype = ''(\^\^)[^ ,\.;)]+'';
  90. $Datatype_URI = ''(\^\^)''.$URI;
  91. // $LLiteral = ''"""[^"\\\\]*(?:(?:.|"(?!""))[^"\\\\]*)*"""'';
  92. $LLiteral = ''"""[^"]*(?:(?:.|"(?!""))[^"]*)*"""'';
  93. // ''"""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""''
  94. $Comment = ''# .*$'';
  95. $Prefix = ''(?:[A-Za-z][A-Za-z0-9_]*)?:'';
  96. $PrefixDecl = ''@prefix'';
  97. $WS = ''[ t]'';
  98. $this->RDF_NS = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; # for ''a'' keyword
  99. $this->DAML_NS = ''http://www.daml.org/2001/03/daml+oil#''; # for ''='' keyword
  100. $this->OWL_NS = ''http://www.w3.org/2002/07/owl#'';
  101.  
  102. // $t = array( $LLiteral, $URI); //, $Literal, $PrefixDecl, $QName, $bNode, $Prefix,
  103. // $Univar, ''a'', ''{'', ''}'', ''\('', ''\)'', ''\['', ''\]'', '','', '';'', ''\.'', $WS, $Comment);
  104. $t = array( $Datatype_URI,$Datatype,$LLiteral, $URI, $Literal, $PrefixDecl, $QName, $bNode, $Prefix, $Univar, ''a'',''='', ''{'', ''}'', ''('', '')'', ''['', '']'', '','', '';'', ''.'', $WS, $Comment,$LangTag);
  105. $this->Tokens="/(".join($t,"|").")/m";
  106.  
  107. $this->bNode=0;
  108. $this->debug=0;
  109. $this->bNodeMap = array();
  110. $this->FixBnodes = FIX_BLANKNODES;
  111. $this->parseError=false;
  112. }
  113.  
  114. /**
  115. * Sets, if BlankNode labels should be replaced by the generic label from the constants.php file
  116. * default is "false" -> the used label in n3 is parsed to the model
  117. * @param boolean
  118. * @access public
  119. ***/
  120. function setFixBnodes($set) {
  121.  
  122. if (($set===true) OR ($set===false)) $this->FixBnodes = $set;
  123. }
  124. /**
  125. * This parses a N3 string and prints out the triples
  126. * @param string $s
  127. * @access public
  128. ***/
  129. function parse($s) {
  130. // """Get a string, tokenize, create list, convert to Eep store."""
  131. $stat=$this->n3tolist($s);
  132. foreach ( $stat as $t) {
  133.  
  134. if (count($t)>3) {
  135. $object=$t[2];
  136.  
  137. for ($i = 3; $i < 5; $i++){
  138. if ($t[$i][0]==''@'')$object.=$t[$i];
  139. if (substr($t[$i],0,2)==''^^'')$object.=$t[$i];
  140. };
  141. } else {$object=$t[2];};
  142. print ''(''.$t[0].'', ''.$t[1].'', ''.$object.")\n";
  143.  
  144. }
  145. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  146. // for t in n3tolist(s)]
  147. }
  148.  
  149. /**
  150. * This parses a N3 string and calls func($subject, $predicate, $object) with each trioke
  151. * @param string $s
  152. * @param string $func
  153. * @access public
  154. ***/
  155. function uparse($s,$func) {
  156. // """Get a string, tokenize, create list, convert to Eep store."""
  157. $stat=$this->n3tolist($s);
  158. foreach ( $stat as $t) {
  159.  
  160. if (count($t)>3) {
  161. $object=$t[2];
  162.  
  163. for ($i = 3; $i < 5; $i++){
  164. if ($t[$i][0]==''@'')$object.=$t[$i];
  165. if (substr($t[$i],0,2)==''^^'')$object.=$t[$i];
  166. };
  167. } else {$object=$t[2];};
  168. // print "(".$t[0].", ".$t[1].", ".$t[2].")";
  169. $func($t[0],$t[1],$object);
  170. }
  171. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  172. // for t in n3tolist(s)]
  173. }
  174.  
  175.  
  176. /**
  177. * This parses a N3 string and returns a memmodel
  178. * @param string $s
  179. * @access public
  180. * @return object Model
  181. ***/
  182.  
  183. function parse2model($s,$model = false) {
  184. if($model == false){
  185. $m=new MemModel();
  186. }else{
  187. $m=$model;
  188. }
  189. // """Get a string, tokenize, create list, convert to Eep store."""
  190. $stat=$this->n3tolist($s);
  191.  
  192. foreach ( $stat as $t) {
  193. $s=$this->toRDFNode($t[0],$t);
  194. $p=$this->toRDFNode($t[1],$t);
  195. $o=$this->toRDFNode($t[2],$t);
  196. $new_statement= new Statement($s,$p,$o);
  197.  
  198. $m->add($new_statement);
  199. // print "(".$t[0].", ".$t[1].", ".$t[2].")";
  200. }
  201. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  202. // for t in n3tolist(s)]
  203. $m->addParsedNamespaces($this->parsedNamespaces);
  204. return $m;
  205. }
  206.  
  207. /**
  208. * Generate a new MemModel from an URI or file.
  209. *
  210. * @access public
  211. * @param $path
  212. * @throws PhpError
  213. * @return object MemModel
  214. */
  215. function & generateModel($path,$dummy=false,$model=false) {
  216.  
  217. $handle = fopen($path,''r'') or die("N3 Parser: Could not open File: ''$path'' - Stopped parsing.");
  218. $done=false;
  219. $input="";
  220. while(!$done)
  221. {
  222. $input .= fread( $handle, 512 );
  223. $done = feof($handle);
  224. };
  225.  
  226. fclose($handle);
  227.  
  228. return $this->parse2model($input,$model);
  229. }
  230. /* ==================== Private Methods from here ==================== */
  231.  
  232. // General list processing functions
  233.  
  234. /**
  235. * Returns FALSE if argument is a whitespace character
  236. * @access private
  237. * @param string $s
  238. ***/
  239. function isWS($s) {
  240. return !preg_match(''/^(#.*|\s*)$/'', $s);
  241. }
  242.  
  243.  
  244.  
  245. /**
  246. * Returns true if the string is not a comment
  247. * @access private
  248. * @param string $s
  249. * @returns boolean
  250. ***/
  251. function notComment($s) {
  252. if ($s=="") return false;
  253. $N3Comment = ''^[ t]*#'';
  254. if (ereg($N3Comment,$s)) return false;
  255. else return true;
  256. }
  257.  
  258. /**
  259. * Removes all whitespace tokens from list
  260. * @access private
  261. * @param array $list
  262. ***/
  263. function filterWs($list) {
  264. // var_dump($list);
  265. // """Filter whitespace from a list."""
  266.  
  267. return array_filter($list, array($this,"isWS"));
  268. }
  269. /**
  270. * converts a string to its unicode NFC form (e.g. \uHHHH or \UHHHHHHHH).
  271. *
  272. * @param String $str
  273. * @return String
  274. * @access private
  275. *
  276. */
  277. function str2unicode_nfc($str=""){
  278. $result="";
  279. /* try to detect encoding */
  280. $tmp=str_replace("?", "", $str);
  281. if(strpos(utf8_decode($tmp), "?")===false){
  282. $str=utf8_decode($str);
  283. }
  284. for($i=0,$i_max=strlen($str);$i<$i_max;$i++){
  285. $nr=0;/* unicode dec nr */
  286. /* char */
  287. $char=$str[$i];
  288. /* utf8 binary */
  289. $utf8_char=utf8_encode($char);
  290. $bytes=strlen($utf8_char);
  291. if($bytes==1){
  292. /* 0####### (0-127) */
  293. $nr=ord($utf8_char);
  294. }
  295. elseif($bytes==2){
  296. /* 110##### 10###### = 192+x 128+x */
  297. $nr=((ord($utf8_char[0])-192)*64) + (ord($utf8_char[1])-128);
  298. }
  299. elseif($bytes==3){
  300. /* 1110#### 10###### 10###### = 224+x 128+x 128+x */
  301. $nr=((ord($utf8_char[0])-224)*4096) + ((ord($utf8_char[1])-128)*64) + (ord($utf8_char[2])-128);
  302. }
  303. elseif($bytes==4){
  304. /* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */
  305. $nr=((ord($utf8_char[0])-240)*262144) + ((ord($utf8_char[1])-128)*4096) + ((ord($utf8_char[2])-128)*64) + (ord($utf8_char[3])-128);
  306. }
  307. /* result (see http://www.w3.org/TR/rdf-testcases/#ntrip_strings) */
  308. if($nr<9){/* #x0-#x8 (0-8) */
  309. $result.="\\u".sprintf("%04X",$nr);
  310. }
  311. elseif($nr==9){/* #x9 (9) */
  312. $result.=''t'';
  313. }
  314. elseif($nr==10){/* #xA (10) */
  315. $result.=''n'';
  316. }
  317. elseif($nr<13){/* #xB-#xC (11-12) */
  318. $result.="\\u".sprintf("%04X",$nr);
  319. }
  320. elseif($nr==13){/* #xD (13) */
  321. $result.=''t'';
  322. }
  323. elseif($nr<32){/* #xE-#x1F (14-31) */
  324. $result.="\\u".sprintf("%04X",$nr);
  325. }
  326. elseif($nr<34){/* #x20-#x21 (32-33) */
  327. $result.=$char;
  328. }
  329. elseif($nr==34){/* #x22 (34) */
  330. $result.=''"'';
  331. }
  332. elseif($nr<92){/* #x23-#x5B (35-91) */
  333. $result.=$char;
  334. }
  335. elseif($nr==92){/* #x5C (92) */
  336. $result.=''\\'';
  337. }
  338. elseif($nr<127){/* #x5D-#x7E (93-126) */
  339. $result.=$char;
  340. }
  341. elseif($nr<65536){/* #x7F-#xFFFF (128-65535) */
  342. $result.="u".sprintf("%04X",$nr);
  343. }
  344. elseif($nr<1114112){/* #x10000-#x10FFFF (65536-1114111) */
  345. $result.="U".sprintf("%08X",$nr);
  346. }
  347. else{
  348. /* other chars are not defined => ignore */
  349. }
  350. }
  351. return $result;
  352. }
  353.  
  354.  
  355. /**
  356. * Gets a slice of an array.
  357. * Returns the wanted slice, as well as the remainder of the array.
  358. * e.g. getSpan([''p'', ''q'', ''r''], 1, 2) gives ([''q''], [''p'', ''r''])
  359. * @return array
  360. * @access private
  361. * @param array $list
  362. * @param integer $start
  363. * @param integer $end
  364. **/
  365. function getSpan($list, $start, $end) {
  366. $pre=array_slice($list, 0, $start);
  367. $post=array_slice($list, $end);
  368.  
  369. return array(array_slice($list, $start,$end-$start),$this->array_concat($pre,$post));
  370. }
  371.  
  372.  
  373. /**
  374. * Concatenates two arrays
  375. * @param array $a
  376. * @param array $b
  377. * @returns array
  378. * @access private
  379. **/
  380. function array_concat($a, $b) {
  381. array_splice($a,count($a),0,$b);
  382. return $a;
  383. }
  384.  
  385. /**
  386. * Returns an array with all indexes where item appears in list
  387. * @param array $list
  388. * @param string $item
  389. * @returns array
  390. * @access private
  391. **/
  392. function posns($list, $item) {
  393. $res=array();
  394. $i=0;
  395. foreach ( $list as $k=>$v) {
  396. if ($v === $item ) $res[]=$i;
  397. $i++;
  398. }
  399. $res[]=$i;
  400. return $res;
  401. }
  402.  
  403.  
  404. /* More N3 specific functions */
  405.  
  406. /**
  407. * Returns a list of tokens
  408. * @param string $s
  409. * @returns array
  410. * @access private
  411. **/
  412. function toke($s) {
  413.  
  414. // print "$sn";
  415. // """Notation3 tokenizer. Takes in a string, returns a raw token list."""
  416. if (strlen($s) == 0) die(''Document has no content!'');
  417.  
  418. $s=str_replace("rn","n",$s);
  419. $s=str_replace("r","n",$s);
  420.  
  421.  
  422. //$lines=explode("n",$s);
  423.  
  424. //$reallines=array_filter($lines, array($this, "notComment"));
  425. // print "LINES: ".join($reallines, " ")." :LINESn";
  426. //array_walk($reallines, array($this, "trimLine"));
  427. //$res=array();
  428.  
  429. // foreach ($reallines as $l) {
  430. //preg_match_all($this->Tokens, $l, $newres);
  431. //$res=$this->array_concat($res,$newres[0]);
  432. //}
  433.  
  434. $res=array();
  435. preg_match_all($this->Tokens, $s, $newres);
  436.  
  437. $res=$this->array_concat($res, array_map(''trim'', $newres[0]));
  438. return $res;
  439. }
  440. /**
  441. * Returns a list with the elements between start and end as one quoted string
  442. * e.g. listify(["a","b","c","d"],1,2) => ["a","b c", "d"]
  443. * @param array $list
  444. * @param integer $start
  445. * @param integer $end
  446. * @returns array
  447. * @access private
  448. **/
  449. function listify($list, $start, $end) {
  450.  
  451. //Re-form a list, merge elements start->end into one quoted element
  452. //Start and end are offsets...
  453.  
  454. $l=$end-$start;
  455.  
  456. $s=array_slice($list, 0, $start);
  457. $m=array_slice($list, $start,$l);
  458. $e=array_slice($list, $end);
  459. // array_push($s,""".join($m," ")."\"");
  460. array_push($s,$m);
  461. return $this->array_concat($s,$e);
  462. }
  463.  
  464. /**
  465. * Returns an array with prefixes=>namespace mappings
  466. * @param array $list
  467. * @access private
  468. * @returns array
  469. **/
  470. function getPrefixes($list) {
  471.  
  472. $prefixes=array();
  473. $ns=1;
  474. $name=2;
  475. foreach ($list as $l) {
  476. if ($l==''@prefix'') {
  477. // while ''@prefix'' in list {
  478. $pos=current($list);
  479. //pos = list.index(''@prefix'')
  480. $r = $this->getSpan($list, $pos, ($pos+4)); # processes the prefix tokens
  481. $binding=$r[0];
  482. $list=$r[1];
  483. $prefixes[$binding[$ns]] = substr($binding[$name],1,-1);
  484. $this->parsedNamespaces[substr($binding[$name],1,-1)] = substr($binding[$ns],0,-1);
  485. }
  486. }
  487. if (count($prefixes)<1) $list= array_slice($list,0);
  488. return array($prefixes, $list);
  489. }
  490.  
  491. /**
  492. * Callback function for replacing "a" elements with the right RDF uri.
  493. * @param string $l
  494. * @access private
  495. **/
  496. function replace_a_type(&$l,$p) {
  497. if ($l==''a'') $l=''<''.$this->RDF_NS.''type>'';
  498. }
  499.  
  500. /**
  501. * Callback function for replacing "=" elements with the right DAML+OIL uri.
  502. * @param string $l
  503. * @access private
  504. **/
  505. function replace_equal(&$l,$p) {
  506. if ($l==''='') $l=''<''.$this->OWL_NS.''sameAs>'';
  507. }
  508.  
  509. /**
  510. * Callback function for replacing "this" elements with the right RDF uri.
  511. * @param string $l
  512. * @access private
  513. **/
  514. function replace_this($l,$p) {
  515. if ($l==''this'') $l=''<urn:urn-n:this>'';
  516. }
  517.  
  518. /**
  519. * Applies stuff :)
  520. * Expands namespace prefixes etc.
  521. * @param array $prefixes
  522. * @param array $list
  523. * @returns $list
  524. * @access private
  525. **/
  526. function applyStuff($prefixes, $list) {
  527.  
  528. array_walk($list, array($this, ''replace_a_type''));
  529. array_walk($list, array($this, ''replace_equal''));
  530. array_walk($list, array($this, ''replace_this''));
  531.  
  532. for ($i=0;$i<count($list);$i++) {
  533. // for i in range(len(list)) {
  534. // if (!strstr(''<_"?.;,{}[]()'',$list[$i]{0})) {
  535.  
  536.  
  537. // if a <> resource occours, change it to the parsed filename or local URI + timestamp
  538.  
  539. if ($list[$i]==''<>'') {
  540. if (!isset($path)) {
  541. if (!isset($_SERVER[''SERVER_ADDR''])) $_SERVER[''SERVER_ADDR'']=''localhost'';
  542. if (!isset($_SERVER[''REQUEST_URI''])) $_SERVER[''REQUEST_URI'']=''/rdfapi-php'';
  543. $list[$i]=''<http://''.$_SERVER[''SERVER_ADDR''].$_SERVER[''REQUEST_URI''].''#generate_timestamp_''.time().''>'';
  544. }else {$list[$i]=''<''.$path.''>'';};
  545. };
  546.  
  547.  
  548. if ((!strstr(''<_"?.;,{}[]()@'',$list[$i]{0}))AND (substr($list[$i],0,3)!=''^^<'')) {
  549. $_r= explode(":",$list[$i]);
  550.  
  551.  
  552.  
  553.  
  554.  
  555. $ns=$_r[0].'':'';
  556. $name=$_r[1];
  557. if (isset($prefixes[$ns])) $list[$i] = ''<''.$prefixes[$ns].$name.''>'';
  558. else if (isset($prefixes[substr($ns,2)])) $list[$i] = ''^^''.$prefixes[substr($ns,2)].$name.'''';
  559. else {
  560. #die(''Prefix not declared:''.$ns);
  561. $this->parseError=true;
  562. trigger_error(''Prefix not declared: ''.$ns, E_USER_ERROR);
  563. break;
  564. }
  565. } else {
  566. if ($list[$i]{0} == ''"'') { // Congratulations - it''s a literal!
  567. if (substr($list[$i],0,3) == ''"""'') {
  568. if (substr($list[$i],-3,3) == ''"""'') { // A big literal...
  569. $lit = substr($list[$i],3,-3);
  570. // print "++$lit++";
  571. $lit=str_replace(''n'', ''n'',$lit);
  572.  
  573. $lit=ereg_replace("[^\\]\"", "\\\"", $lit);
  574.  
  575. $list[$i] = ''"''.$lit.''"'';
  576. }
  577. else { die (''Incorrect string formatting: ''.substr($list[$i],-3,3)); }
  578. } else {
  579. if (strstr($list[$i],"\n")) die(''Newline in literal: ''+$list[$i]);
  580. }
  581. }
  582. }
  583. if (substr($list[$i],0,2)==''^^'') {
  584. if ($list[$i][2]!=''<''){$list[$i]=''^^<''.substr($list[$i],2).''>'';};
  585. };
  586.  
  587. }
  588.  
  589.  
  590. return $list;
  591. }
  592.  
  593. /**
  594. * Returns an array of triples extracted from the list of n3 tokens
  595. * @param array $list
  596. * @returns array
  597. * @access private
  598. ***/
  599. function getStatements($list) {
  600.  
  601.  
  602. $statements = array();
  603.  
  604. while (in_array(''.'', $list)) {
  605. // for($i=0;$i<count($list); $i++) {
  606. // if ($list[$i]==".") {
  607. // while ''.'' in list {
  608. $pos=array_search(''.'',$list);
  609.  
  610. $r=$this->getSpan($list, 0, $pos+1);
  611.  
  612. $statement=$r[0];
  613. $list = $r[1];
  614.  
  615. array_pop($statement);
  616. $statements[]=$statement;
  617. }
  618.  
  619. return $statements;
  620. }
  621. /**
  622. * Gets a list of triples with same subject
  623. * e.g. :Gunnar :firstname "Gunnar" ; :lastname "Grimnes.
  624. * @param array $list
  625. * @returns array
  626. * @acces private
  627. ***/
  628. function getPovs($list) {
  629. $povs = array();
  630. while (in_array('';'', $list)) {
  631. $r=$this->posns($list,'';'');
  632. $pos=array_slice($r,0,2);
  633. $r = $this->getSpan($list, $pos[0], $pos[1]);
  634. $pov=$r[0];
  635. $list=$r[1];
  636.  
  637. $povs[]=array_slice($pov,1);
  638. }
  639.  
  640. return array($list, $povs);
  641. }
  642.  
  643. /**
  644. * Gets a list of triples with same predicate
  645. * e.g. :Gunnar :likes "Cheese", "Wine".
  646. * @access private
  647. * @param array $list
  648. * @returns array
  649. ***/
  650. function getObjs($list) {
  651.  
  652.  
  653. $objs = array();
  654. while (in_array(",",$list)) {
  655. $pos=array_search(",",$list);
  656. // for($i=0;$i<count($list); $i++) {
  657. // if ($list[$i]==",") {
  658. // while '','' in list {
  659.  
  660.  
  661. $get_array_fields=2;
  662. if (isset ($list[$pos+2])) {
  663. if (@$list[$pos+2][0]==''@'') $get_array_fields++;
  664. if (@$list[$pos+2][0]==''^'') $get_array_fields++;
  665. };
  666. if (isset ($list[$pos+3])) { if (@$list[$pos+3][0]==''^'') $get_array_fields++;};
  667.  
  668.  
  669. $r=$this->getSpan($list, $pos, ($pos+$get_array_fields));
  670.  
  671. $obj=$r[0];
  672. if (!isset($obj[2])) $obj[2]='' '';
  673. if (!isset($obj[3])) $obj[3]='' '';
  674. $list=$r[1];
  675.  
  676. $objs[]=$obj;
  677.  
  678. }
  679. return array($list, $objs);
  680. }
  681.  
  682. /**
  683. * Does the real work, returns a list of subject, predicate, object triples.
  684. * @param array $list
  685. * @returns array
  686. * @access private
  687. ***/
  688. function statementize($list) {
  689.  
  690. if (count($list) == 1 && preg_match("/_".BNODE_PREFIX."[0-9]+_/",$list[0])) {
  691. if ($this->debug) print "Ignored bNode exists statement. $list\n";
  692. return array();
  693. }
  694.  
  695. if (count($list) == 3) return array($list);
  696. if (count($list) < 3) die("Error: statement too short!");
  697.  
  698. //Get all ;
  699. $r=$this->getPovs($list);
  700. $spo=$r[0];
  701. $po=$r[1];
  702. $all=array();
  703.  
  704.  
  705.  
  706. // (spo, po), all = getPovs(list), []
  707. $subject = $spo[0];
  708. foreach ($po as $pop) {
  709. // for pop in po {
  710. $r=$this->getObjs($pop);
  711.  
  712. $myPo=$r[0];
  713. $obj=$r[1];
  714. //myPo, obj = getObjs(pop)
  715.  
  716. if (!isset($myPo[2])) $myPo[2]='' '';
  717. if (!isset($myPo[3])) $myPo[3]='' '';
  718. $predicate = $myPo[0];
  719. $all[]=array($subject,$predicate,$myPo[1],$myPo[2],$myPo[3]);
  720. // all.append([subject, predicate, myPo[1]])
  721.  
  722.  
  723. foreach ($obj as $o) $all[]=array($subject,$predicate, $o[1],$o[2],$o[3]);
  724. // for x in obj: all.append([subject, predicate, x])
  725.  
  726. }
  727.  
  728.  
  729.  
  730. $r = $this->getObjs($spo);
  731. $spo=$r[0];
  732.  
  733. $objs=$r[1];
  734.  
  735. //spo, objs = getObjs(spo)
  736. $subject=$spo[0];
  737. $predicate=$spo[1];
  738.  
  739. if(!isset($spo[3])) $spo[3]='' '';
  740. if(!isset($spo[4])) $spo[4]='' '';
  741. $all[]=array($subject, $predicate, $spo[2],$spo[3],$spo[4]);
  742.  
  743. foreach ($objs as $obj) $all[]=array($subject, $predicate, $obj[1],$obj[2],$obj[3]);
  744.  
  745. return $all;
  746. }
  747.  
  748. /**
  749. * Makes lists of elements in list into a seperate array element.
  750. * e.g. doLists(["a","b","[","c","]","d"], "[","]")=> ["a","b", ["c"], "d"]
  751. * @param array $list
  752. * @param string $schar
  753. * @param string $echar
  754. * @returns array
  755. * @access private
  756. ***/
  757. function doLists($list, $schar, $echar) {
  758.  
  759. while (in_array($schar, $list)) {
  760. // while schar in list {
  761. $ndict=array();
  762. $nestingLevel=0;
  763. $biggest=0;
  764. for ($i=0;$i<count($list);$i++) {
  765. if ($list[$i] == $schar) {
  766. $nestingLevel += 1;
  767. if (!in_array($nestingLevel, array_keys($ndict))) {
  768. $ndict[$nestingLevel] = array(array($i));
  769. } else {
  770. $ndict[$nestingLevel][]=array($i);
  771. }
  772. }
  773. if ($list[$i] == $echar) {
  774. if (!in_array($nestingLevel, array_keys($ndict))) {
  775. $ndict[$nestingLevel]=array(array($i));
  776. } else {
  777. $ndict[$nestingLevel][count($ndict[$nestingLevel])-1][]=$i;
  778. $nestingLevel-= 1;
  779. # elif type(list[i]) == type([]) {
  780. # list[i] = doLists(list[i], schar, echar)
  781. }
  782. }
  783. }
  784. foreach (array_keys($ndict) as $key)
  785. if ($key > $biggest) $biggest = $key;
  786.  
  787. $tol = $ndict[$biggest][0];
  788. $list = $this->listify($list, $tol[0], ($tol[1]+1));
  789. }
  790. return $list;
  791. }
  792.  
  793. /**
  794. * Apply doLists for all different types of list.
  795. * @param array
  796. * @returns array
  797. * @access private
  798. ***/
  799. function listStuff($list) {
  800. # y, z = zip([''['', '']''], [''{'', ''}''], [''('', '')''])
  801. # return map(doLists, [list, list, list], y, z).pop()
  802. $list = $this->doLists($list, ''['', '']'');
  803. $list = $this->doLists($list, ''{'', ''}'');
  804. return $this->doLists($list, ''('', '')'');
  805. }
  806.  
  807. /**
  808. * Generates a new node id.
  809. * @access private
  810. * @returns string
  811. ***/
  812. function bnodeID() {
  813. $this->bNode++;
  814. return "_".BNODE_PREFIX.$this->bNode."_";
  815. }
  816.  
  817. /**
  818. * This makes bNodes out of variables like _:a etc.
  819. * @access private
  820. * @param array $list
  821. * @returns array
  822. ***/
  823. function fixAnon($list) {
  824. // $map=array();
  825. for($i=0;$i<count($list);$i++) {
  826. $l=$list[$i];
  827. if (substr($l,0,2)=="_:") {
  828. if (!isset($this->bNodeMap[$l])) {
  829. $a=$this->bnodeID();
  830. $this->bNodeMap[$l]=$a;
  831. } else $a=$this->bNodeMap[$l];
  832. $list[$i]=$a;
  833. }
  834. }
  835. return $list;
  836. }
  837.  
  838. /**
  839. * This makes [ ] lists into bnodes.
  840. * @access private
  841. * @param array $list
  842. * @return array
  843. ***/
  844. function expandLists($list) {
  845.  
  846. for($i=0;$i<count($list);$i++) {
  847. if (is_array($list[$i])) {
  848. if ( $list[$i][0]==''['' ) {
  849. $bnode=$this->bnodeID();
  850. $prop=$list[$i];
  851. $list[$i]=$bnode;
  852. $list[]=$bnode;
  853. $list=$this->array_concat($list, array_slice($prop,1,-1));
  854. $list[]=''.'';
  855. }elseif($list[$i][0]==''('') {
  856.  
  857. $rdfNil = ''<''. RDF_NAMESPACE_URI . RDF_NIL .''>'';
  858. $rdfFirst = ''<''. RDF_NAMESPACE_URI . RDF_FIRST .''>'';
  859. $rdfRest = ''<''. RDF_NAMESPACE_URI . RDF_REST .''>'';
  860.  
  861. // local copy of list without "(" and ")"
  862. $t_list = array_slice($list[$i], 1, -1);
  863.  
  864. //prepare bnodes
  865. $fromBnode = $this->bnodeID();
  866. $toBnode = $this->bnodeID();
  867.  
  868. //link first bnode into graph
  869. $list[$i] = $fromBnode;
  870.  
  871. $count = count($t_list);
  872.  
  873. //loop through list, convert to RDF linked list
  874. for ($idx = 0; $idx < $count; $idx++){
  875.  
  876. // set rdf:first
  877. $list[] = $fromBnode;
  878. $list[] = $rdfFirst;
  879. $list[] = $t_list[$idx];
  880. $list[] = ''.'';
  881.  
  882. // set rdf:rest (nil or next bnode)
  883. if ($idx == $count - 1) {
  884. $list[] = $fromBnode;
  885. $list[] = $rdfRest;
  886. $list[] = $rdfNil;
  887. $list[] = ''.'';
  888. }
  889. else {
  890. $list[] = $fromBnode;
  891. $list[] = $rdfRest;
  892. $list[] = $toBnode;
  893. $list[] = ''.'';
  894.  
  895. $fromBnode = $toBnode;
  896. $toBnode = $this->bnodeID();
  897. }
  898. }
  899. }
  900. else {
  901. die(''Only [ ] and () lists are supported!'');
  902. }
  903. }
  904.  
  905. }
  906. return $list;
  907. }
  908.  
  909. /**
  910. * Main work-horse function. This converts a N3 string to a list of statements
  911. * @param string $s
  912. * @returns array
  913. * @access private
  914. ***/
  915. function n3tolist($s) {
  916.  
  917. // """Convert an N3 string into a list of triples as strings."""
  918. $result = array();
  919.  
  920. $t = $this->filterWs($this->toke($s)); # tokenize the stream, and filter whitespace tokens
  921.  
  922. if ($this->debug) {
  923. print "Filter WS:\n";
  924. var_dump($t);
  925. }
  926. $r=$this->getPrefixes($t); # get the prefix directives, and add to a dict
  927. $prefixes=$r[0];
  928. $t=$r[1];
  929. if ($this->debug) {
  930. print "Prefixes:\n";
  931. var_dump($prefixes);
  932. print "***\n";
  933. var_dump($t);
  934. }
  935. $t=$this->applyStuff($prefixes, $t);#apply prefixes, keywords, and string formatting
  936. if ($this->debug) {
  937. print "Stuff applied:\n";
  938. var_dump($t);
  939. }
  940.  
  941. $t=$this->fixAnon($t); # fix _:a anons
  942. if ($this->debug) {
  943. print "Fix anon:\n";
  944. var_dump($t);
  945. }
  946. $t = $this->listStuff($t); # apply list stuff: todo
  947. if ($this->debug) {
  948. print "Lists done:\n";
  949. var_dump($t);
  950. }
  951. $t=$this->expandLists($t);
  952. if ($this->debug) {
  953. print "Lists applied:\n";
  954. var_dump($t);
  955. }
  956. $t = $this->getStatements($t); # get all of the "statements" from the stream
  957.  
  958. foreach ($t as $stat) {
  959. $stats=$this->statementize($stat);
  960. foreach ($stats as $y) {
  961. $result[]=$y;
  962. }
  963. }
  964. // for x in [statementize(stat) for stat in t] {
  965. // for y in x: result.append(y)
  966. return $result;
  967. }
  968.  
  969. /**
  970. * Constructs a RAP RDFNode from URI/Literal/Bnode
  971. * @access private
  972. * @param string $s
  973. * @returns object RDFNode
  974. ***/
  975. function toRDFNode($s,$state) {
  976. $ins=substr($s,1,-1);
  977. if ($s{0}=="\"") {
  978. $lang=NULL;
  979.  
  980.  
  981. if (count($state)>3) {
  982.  
  983.  
  984. for ($i = 3; $i < count($state); $i++){
  985. if ($state[$i][0]==''@'')$lang=substr($state[3],1);
  986. if (substr($state[$i],0,2)==''^^''){
  987. $dtype=substr($state[$i],2);
  988. if ($dtype[0]==''<'') $dtype= substr($dtype,1,-1);
  989. };
  990. };
  991. };
  992. if(UNIC_RDF){
  993. $ins=$this->str2unicode_nfc($ins);
  994. }
  995. $new_Literal=new Literal($ins,$lang);
  996. if (isset($dtype)) $new_Literal->setDatatype($dtype);
  997. return $new_Literal;
  998. };
  999.  
  1000. if (strstr($s,''_''.BNODE_PREFIX)) {
  1001. if (($this->FixBnodes) OR (!array_search($s,$this->bNodeMap))) {
  1002. return new BlankNode($ins);
  1003. } else {return new BlankNode(trim(substr(array_search($s,$this->bNodeMap),2)));
  1004. };
  1005. }
  1006. return new Resource($ins);
  1007. }
  1008.  
  1009.  
  1010. } //end: N3Parser
  1011.  
  1012. ?>

Documentation generated on Fri, 13 Jan 2006 07:48:41 +0100 by phpDocumentor 1.3.0RC4