Source for file RdqlParser.php

Documentation is available at RdqlParser.php

  1. <?php
  2.  
  3. // ----------------------------------------------------------------------------------
  4. // Class: RdqlParser
  5. // ----------------------------------------------------------------------------------
  6.  
  7. /**
  8. * This class contains methods for parsing an Rdql query string into PHP variables.
  9. * The output of the RdqlParser is an array with variables and constraints
  10. * of each query clause (Select, From, Where, And, Using).
  11. * To perform an RDQL query this array has to be passed to the RdqlEngine.
  12. *
  13. * <BR><BR>History:<UL>
  14. * <LI>05-12-2004 : Support for unquoted QNames added.
  15. * However, backward compatibility is provided, that means,
  16. * both prefix:local_name and <prefix:local_name> are allowed.
  17. * In the case of unquoted QNames, the parser now also checks
  18. * if a prefix is defined in the USING clause.
  19. * changes: - the stracture of the class variable $parsedQuery
  20. * - methods: parseFrom(), parseExpressions(), replaceNamespacePrefixes(),
  21. * _validateVarUri(), _validateVarUriLiteral(), _validateUri(),
  22. * _validateLiteral(), _validatePrefix()
  23. * methods added: _validateQName(), _validateNCName(), _replaceNamespacePrefix()
  24. * : Bug in the handling of empty Literals fixed.</LI>
  25. * <LI>07-27-2003 : First release of this class</LI>
  26. *
  27. * @version V0.9.3
  28. * @author Radoslaw Oldakowski <radol@gmx.de>
  29. *
  30. * @package rdql
  31. * @access public
  32. */
  33.  
  34.  
  35. Class RdqlParser extends Object{
  36.  
  37. /**
  38. * Parsed query variables and constraints.
  39. * { } are only used within the parser class and are not returned as parsed query.
  40. * ( [] stands for an integer index - 0..N )
  41. *
  42. * @var array [''selectVars''][] = ?VARNAME
  43. * [''sources''][]{[''value'']} = URI | QName
  44. * {[''is_qname''] = boolean}
  45. * [''patterns''][][''subject''][''value''] = VARorURI
  46. * {[''is_qname''] = boolean}
  47. * [''predicate''][''value''] = VARorURI
  48. * {[''is_qname''] = boolean}
  49. * [''object''][''value''] = VARorURIorLiterl
  50. * {[''is_qname''] = boolean}
  51. * [''is_literal''] = boolean
  52. * [''l_lang''] = string
  53. * [''l_dtype''] = string
  54. * {[''l_dtype_is_qname''] = boolean}
  55. * [''filters''][][''string''] = string
  56. * [''evalFilterStr''] = string
  57. * [''reqexEqExprs''][][''var''] = ?VARNAME
  58. * [''operator''] = (eq | ne)
  59. * [''regex''] = string
  60. * [''strEqExprs''][][''var''] = ?VARNAME
  61. * [''operator''] = (eq | ne)
  62. * [''value''] = string
  63. * [''value_type''] = (''variable'' | ''URI'' | ''QName'' | ''Literal'')
  64. * [''value_lang''] = string
  65. * [''value_dtype''] = string
  66. * {[''value_dtype_is_qname''] = boolean}
  67. * [''numExpr''][''vars''][] = ?VARNAME
  68. * {[''ns''][PREFIX] = NAMESPACE}
  69. * @access private
  70. */
  71. var $parsedQuery;
  72.  
  73.  
  74. /**
  75. * Query string divided into a sequence of tokens.
  76. * A token is either: '' '' or "\n" or "\r" or "\t" or '','' or ''('' or '')''
  77. * or a string containing any characters except from the above.
  78. *
  79. * @var array
  80. * @access private
  81. */
  82. var $tokens;
  83.  
  84.  
  85. /**
  86. * Parse the given RDQL query string and return an array with query variables and constraints.
  87. *
  88. * @param string $queryString
  89. * @return array $this->parsedQuery
  90. * @access public
  91. */
  92. function & parseQuery($queryString) {
  93.  
  94. $cleanQueryString = $this->removeComments($queryString);
  95. $this->tokenize($cleanQueryString);
  96. $this->startParsing();
  97. if ($this->parsedQuery[''selectVars''][0] == ''*'')
  98. $this->parsedQuery[''selectVars''] = $this->findAllQueryVariables();
  99. else
  100. $this->_checkSelectVars();
  101. $this->replaceNamespacePrefixes();
  102.  
  103. return $this->parsedQuery;
  104. }
  105.  
  106.  
  107. /**
  108. * Remove comments from the passed query string.
  109. *
  110. * @param string $query
  111. * @return string
  112. * @throws PHPError
  113. * @access private
  114. */
  115. function removeComments($query) {
  116. $last = strlen($query)-1;
  117. $query .= '' '';
  118. $clean = '''';
  119. for ($i=0; $i<=$last; $i++) {
  120. // don''t search for comments inside a ''literal''@lang^^dtype or "literal"@lang^^dtype
  121. if ($query{$i} == "''" || $query{$i} == ''"'') {
  122. $quotMark = $query{$i};
  123. do
  124. $clean .= $query{$i++};
  125. while($i < $last && $query{$i} != $quotMark);
  126. $clean .= $query{$i};
  127. // language
  128. if ($query{$i+1} == ''@'') {
  129. do{
  130. if ($query{$i+1} == ''^'' && $query{$i+2} == ''^'')
  131. break;
  132. $clean .= $query{++$i};
  133. }while ($i < $last && $query{$i} != '' '' && $query{$i} != "t"
  134. && $query{$i} != "n" && $query{$i} != "r");
  135. }
  136. // datatype
  137. if ($query{$i+1} == ''^'' && $query{$i+2} == ''^'') {
  138. do
  139. $clean .= $query{++$i};
  140. while ($i < $last && $query{$i} != '' '' && $query{$i} != "t"
  141. && $query{$i} != "n" && $query{$i} != "r" );
  142. }
  143. // don''t search for comments inside an <URI> either
  144. }elseif ($query{$i} == ''<'') {
  145. do{
  146. $clean .= $query{$i++};
  147. }while($i < $last && $query{$i} != ''>'');
  148. $clean .= $query{$i};
  149. }elseif ($query{$i} == ''/'') {
  150. // clear: // comment
  151. if ($i < $last && $query{$i+1} == ''/'') {
  152. while($i < $last && $query{$i} != "n" && $query{$i} != "r")
  153. ++$i;
  154. $clean .= '' '';
  155. // clear: /*comment*/
  156. }elseif ($i < $last-2 && $query{$i+1} == ''*'') {
  157. $i += 2;
  158. while($i < $last && ($query{$i} != ''*'' || $query{$i+1} != ''/''))
  159. ++$i;
  160. if ($i >= $last && ($query{$last-1} != ''*'' || $query{$last} != ''/''))
  161. trigger_error(RDQL_SYN_ERR .": unterminated comment - ''*/'' missing", E_USER_ERROR);
  162. ++$i;
  163. }else
  164. $clean .= $query{$i};
  165. }else
  166. $clean .= $query{$i};
  167. }
  168. return $clean;
  169. }
  170.  
  171.  
  172. /**
  173. * Divide the query string into tokens.
  174. * A token is either: '' '' or "n" or "r" or ''\t'' or '','' or ''('' or '')''
  175. * or a string containing any character except from the above.
  176. *
  177. * @param string $queryString
  178. * @access private
  179. */
  180. function tokenize($queryString) {
  181.  
  182. $queryString = trim($queryString, " rnt");
  183. $specialChars = array (" ", "t", "r", "n", ",", "(", ")");
  184. $len = strlen($queryString);
  185. $this->tokens[0]='''';
  186. $n = 0;
  187.  
  188. for ($i=0; $i<$len; ++$i) {
  189. if (!in_array($queryString{$i}, $specialChars))
  190. $this->tokens[$n] .= $queryString{$i};
  191. else {
  192. if ($this->tokens[$n] != '''')
  193. ++$n;
  194. $this->tokens[$n] = $queryString{$i};
  195. $this->tokens[++$n] = '''';
  196. }
  197. }
  198. }
  199.  
  200.  
  201. /**
  202. * Start parsing of the tokenized query string.
  203. *
  204. * @access private
  205. */
  206. function startParsing() {
  207.  
  208. $this->parseSelect();
  209. }
  210.  
  211.  
  212. /**
  213. * Parse the SELECT clause of an Rdql query.
  214. * When the parsing of the SELECT clause is finished, this method will call
  215. * a suitable method to parse the subsequent clause.
  216. *
  217. * @throws PhpError
  218. * @access private
  219. */
  220. function parseSelect() {
  221.  
  222. $this->_clearWhiteSpaces();
  223.  
  224. // Check if the queryString contains a "SELECT" token
  225. if (strcasecmp(''SELECT'', current($this->tokens)))
  226. trigger_error(RDQL_SEL_ERR ."''" .current($this->tokens)
  227. ."'' - SELECT keyword expected", E_USER_ERROR);
  228. unset($this->tokens[key($this->tokens)]);
  229. $this->_clearWhiteSpaces();
  230.  
  231. // Parse SELECT *
  232. if (current($this->tokens) == ''*'') {
  233. unset($this->tokens[key($this->tokens)]);
  234. $this->parsedQuery[''selectVars''][0] = ''*'';
  235. $this->_clearWhiteSpaces();
  236. if (strcasecmp(''FROM'', current($this->tokens))
  237. && strcasecmp(''SOURCE'', current($this->tokens))
  238. && strcasecmp(''WHERE'', current($this->tokens)))
  239. trigger_error(RDQL_SYN_ERR .": ''" .htmlspecialchars(current($this->tokens))
  240. ."'' - SOURCE or WHERE clause expected", E_USER_ERROR);
  241. }
  242.  
  243. // Parse SELECT ?Var (, ?Var)*
  244. $commaExpected = FALSE;
  245. $comma = FALSE;
  246. while (current($this->tokens) != NULL) {
  247. $k = key($this->tokens);
  248. $token = $this->tokens[$k];
  249.  
  250. switch ($token) {
  251. case '','': if (!$commaExpected)
  252. trigger_error(RDQL_SEL_ERR ." '','' - unexpected comma", E_USER_ERROR);
  253. $comma = TRUE;
  254. $commaExpected = FALSE;
  255. break;
  256. case ''('':
  257. case '')'': trigger_error(RDQL_SEL_ERR ." ''$token'' - illegal input", E_USER_ERROR);
  258. break;
  259. default :
  260. if (!strcasecmp(''FROM'', $token) || !strcasecmp(''SOURCE'', $token)) {
  261. if ($comma)
  262. trigger_error(RDQL_SEL_ERR ." '','' - unexpected comma", E_USER_ERROR);
  263. unset($this->tokens[$k]);
  264. return $this->parseFrom();
  265. }elseif (!strcasecmp(''WHERE'', $token) && !$comma) {
  266. if ($comma)
  267. trigger_error(RDQL_SEL_ERR ." '','' - unexpected comma", E_USER_ERROR);
  268. unset($this->tokens[$k]);
  269. return $this->parseWhere();
  270. }
  271. if ($token{0} == ''?'') {
  272. $this->parsedQuery[''selectVars''][] = $this->_validateVar($token, RDQL_SEL_ERR);
  273. $commaExpected = TRUE;
  274. $comma = FALSE;
  275. }else
  276. trigger_error(RDQL_SEL_ERR ." ''$token'' - ''?'' missing", E_USER_ERROR);
  277. }
  278. unset($this->tokens[$k]);
  279. $this->_clearWhiteSpaces();
  280. }
  281. trigger_error(RDQL_SYN_ERR . '': WHERE clause missing'', E_USER_ERROR);
  282. }
  283.  
  284.  
  285. /**
  286. * Parse the FROM/SOURCES clause of an Rdql query
  287. * When the parsing of this clause is finished, parseWhere() will be called.
  288. *
  289. * @throws PhpError
  290. * @access private
  291. */
  292. function parseFrom() {
  293.  
  294. $comma = FALSE;
  295. $commaExpected = FALSE;
  296. $i = -1;
  297. while (current($this->tokens) != NULL) {
  298.  
  299. $this->_clearWhiteSpaces();
  300. if (!strcasecmp(''WHERE'', current($this->tokens)) && count($this->parsedQuery[''sources'']) != 0) {
  301. if ($comma)
  302. trigger_error(RDQL_SEL_ERR ." '','' - unexpected comma", E_USER_ERROR);
  303. unset($this->tokens[key($this->tokens)]);
  304. return $this->parseWhere();
  305. }
  306. if (current($this->tokens) == '','') {
  307. if ($commaExpected) {
  308. $comma = TRUE;
  309. $commaExpected = FALSE;
  310. unset($this->tokens[key($this->tokens)]);
  311. }else
  312. trigger_error(RDQL_SRC_ERR ."'','' - unecpected comma", E_USER_ERROR);
  313. }else{
  314. $token = current($this->tokens);
  315. $this->parsedQuery[''sources''][++$i][''value''] = $this->_validateURI($token, RDQL_SRC_ERR);
  316. if ($token{0} != ''<'')
  317. $this->parsedQuery[''sources''][$i][''is_qname''] = TRUE;
  318. $commaExpected = TRUE;
  319. $comma = FALSE;
  320. }
  321. }
  322. trigger_error(RDQL_SYN_ERR .'': WHERE clause missing'', E_USER_ERROR);
  323. }
  324.  
  325.  
  326. /**''
  327. * Parse the WHERE clause of an Rdql query.
  328. * When the parsing of the WHERE clause is finished, this method will call
  329. * a suitable method to parse the subsequent clause if provided.
  330. *
  331. * @throws PhpError
  332. * @access private
  333. */
  334. function parseWhere() {
  335.  
  336. $comma = FALSE;
  337. $commaExpected = FALSE;
  338. $i=0;
  339.  
  340. do {
  341. $this->_clearWhiteSpaces();
  342. if (!strcasecmp(''AND'', current($this->tokens))
  343. && count($this->parsedQuery[''patterns'']) != 0){
  344. if ($comma)
  345. trigger_error(RDQL_WHR_ERR ." '','' - unexpected comma", E_USER_ERROR);
  346. unset($this->tokens[key($this->tokens)]);
  347. return $this->parseAnd();
  348. }elseif (!strcasecmp(''USING'', current($this->tokens))
  349. && count($this->parsedQuery[''patterns'']) != 0) {
  350. if ($comma)
  351. trigger_error(RDQL_WHR_ERR ." '','' - unexpected comma", E_USER_ERROR);
  352. unset($this->tokens[key($this->tokens)]);
  353. return $this->parseUsing();
  354. }
  355.  
  356. if (current($this->tokens) == '','') {
  357. $comma = TRUE;
  358. $this->_checkComma($commaExpected, RDQL_WHR_ERR);
  359.  
  360. }else{
  361.  
  362. if (current($this->tokens) != ''('')
  363. trigger_error(RDQL_WHR_ERR ."''" .current($this->tokens)
  364. ."'' - ''('' expected", E_USER_ERROR);
  365. unset($this->tokens[key($this->tokens)]);
  366. $this->_clearWhiteSpaces();
  367.  
  368. $this->parsedQuery[''patterns''][$i][''subject''] = $this->_validateVarUri(current($this->tokens));
  369. $this->_checkComma(TRUE, RDQL_WHR_ERR);
  370. $this->parsedQuery[''patterns''][$i][''predicate''] = $this->_validateVarUri(current($this->tokens));
  371. $this->_checkComma(TRUE, RDQL_WHR_ERR);
  372. $this->parsedQuery[''patterns''][$i++][''object''] = $this->_validateVarUriLiteral(current($this->tokens));
  373. $this->_clearWhiteSpaces();
  374.  
  375. if (current($this->tokens) != '')'')
  376. trigger_error(RDQL_WHR_ERR ."''" .current($this->tokens) ."'' - '')'' expected", E_USER_ERROR);
  377. unset($this->tokens[key($this->tokens)]);
  378. $this->_clearWhiteSpaces();
  379. $commaExpected = TRUE;
  380. $comma = FALSE;
  381. }
  382. }while(current($this->tokens) != NULL);
  383.  
  384. if ($comma)
  385. trigger_error(RDQL_WHR_ERR ." '','' - unexpected comma", E_USER_ERROR);
  386. }
  387.  
  388.  
  389. /**
  390. * Parse the AND clause of an Rdql query
  391. *
  392. * @throws PhpError
  393. * @access private
  394. * @toDo clear comments
  395. */
  396. function parseAnd() {
  397.  
  398. $this->_clearWhiteSpaces();
  399. $n = 0;
  400. $filterStr = '''';
  401.  
  402. while(current($this->tokens) != NULL) {
  403. $k = key($this->tokens);
  404. $token = $this->tokens[$k];
  405.  
  406. if (!strcasecmp(''USING'', $token)) {
  407. $this->parseFilter($n, $filterStr);
  408. unset($this->tokens[$k]);
  409. return $this->parseUsing();
  410. }elseif ($token == '','') {
  411. $this->parseFilter($n, $filterStr);
  412. $filterStr = '''';
  413. $token = '''';
  414. ++$n;
  415. }
  416. $filterStr .= $token;
  417. unset($this->tokens[$k]);
  418. }
  419. $this->parseFilter($n, $filterStr);
  420. }
  421. /**
  422. * Parse the USING clause of an Rdql query
  423. *
  424. * @throws PhpError
  425. * @access private
  426. */
  427. function parseUsing() {
  428.  
  429. $commaExpected = FALSE;
  430. $comma = FALSE;
  431.  
  432. do {
  433. $this->_clearWhiteSpaces();
  434. if (current($this->tokens) == '','') {
  435. $comma = TRUE;
  436. $this->_checkComma($commaExpected, RDQL_USG_ERR);
  437. }else{
  438. $prefix = $this->_validatePrefix(current($this->tokens));
  439. $this->_clearWhiteSpaces();
  440.  
  441. if (strcasecmp(''FOR'', current($this->tokens)))
  442. trigger_error(RDQL_USG_ERR ." keyword: ''FOR'' missing in the namespace declaration: ''", E_USER_ERROR);
  443. unset($this->tokens[key($this->tokens)]);
  444. $this->_clearWhiteSpaces();
  445.  
  446. $this->parsedQuery[''ns''][$prefix] = $this->_validateUri(current($this->tokens), RDQL_USG_ERR);
  447. $this->_clearWhiteSpaces();
  448. $commaExpected = TRUE;
  449. $comma = FALSE;
  450. }
  451. }while(current($this->tokens) != NULL);
  452.  
  453. if ($comma)
  454. trigger_error(RDQL_WHR_ERR ." '','' - unexpected comma", E_USER_ERROR);
  455. }
  456.  
  457.  
  458. /**
  459. * Check if a filter from the AND clause contains an equal number of ''('' and '')''
  460. * and parse filter expressions.
  461. *
  462. * @param integer $n
  463. * @param string $filter
  464. * @throws PHPError
  465. * @access private
  466. */
  467. function parseFilter($n, $filter) {
  468.  
  469. if ($filter == NULL)
  470. trigger_error(RDQL_AND_ERR ." '','' - unexpected comma", E_USER_ERROR);
  471. $paren = substr_count($filter, ''('') - substr_count($filter, '')'');
  472. if ($paren != 0) {
  473. if ($paren > 0)
  474. $errorMsg = "''" .htmlspecialchars($filter) ."'' - '')'' missing ";
  475. elseif ($paren < 0)
  476. $errorMsg = "''" .htmlspecialchars($filter) ."'' - too many '')'' ";
  477. trigger_error(RDQL_AND_ERR .$errorMsg, E_USER_ERROR);
  478. }
  479.  
  480. $this->parsedQuery[''filters''][$n] = $this->parseExpressions($filter);
  481. }
  482.  
  483.  
  484. /**
  485. * Parse expressions inside the passed filter:
  486. * 1) regex equality expressions: ?var [~~ | =~ | !~ ] REG_EX
  487. * 2a) string equality expressions: ?var [eq | ne] "literal"@lang^^dtype.
  488. * 2b) string equality expressions: ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
  489. * 3) numerical expressions: e.q. (?var1 - ?var2)*4 >= 20
  490. *
  491. * In cases 1-2 parse each expression of the given filter into an array of variables.
  492. * For each parsed expression put a place holder (e.g. ##RegEx_1##) into the filterStr.
  493. * The RDQLengine will then replace each place holder with the outcomming boolean value
  494. * of the corresponding expression.
  495. * The remaining filterStr contains only numerical expressions and place holders.
  496. *
  497. * @param string $filteStr
  498. * @return array [''string''] = string
  499. * [''evalFilterStr''] = string
  500. * [''reqexEqExprs''][][''var''] = ?VARNAME
  501. * [''operator''] = (eq | ne)
  502. * [''regex''] = string
  503. * [''strEqExprs''][][''var''] = ?VARNAME
  504. * [''operator''] = (eq | ne)
  505. * [''value''] = string
  506. * [''value_type''] = (''variable'' | ''URI'' | ''QName''| ''Literal'')
  507. * [''value_lang''] = string
  508. * [''value_dtype''] = string
  509. * [''value_dtype_is_qname''] = boolean
  510. * [''numExpr''][''vars''][] = ?VARNAME
  511. * @access private
  512. */
  513. function parseExpressions($filterStr) {
  514.  
  515. $parsedFilter[''string''] = $filterStr;
  516. $parsedFilter[''regexEqExprs''] = array();
  517. $parsedFilter[''strEqExprs''] = array();
  518. $parsedFilter[''numExprVars''] = array();
  519.  
  520. // parse regex string equality expressions, e.g. ?x ~~ !//foo.com/r!i
  521. $reg_ex = "/(?[a-zA-Z0-9_]+)s+([~!=]~)s+([''|"])?([^\s''\"]+)([''|\"])?/";
  522. preg_match_all($reg_ex, $filterStr, $eqExprs);
  523. foreach ($eqExprs[0] as $i => $eqExpr) {
  524. $this->_checkRegExQuotation($filterStr, $eqExprs[3][$i], $eqExprs[5][$i]);
  525. $parsedFilter[''regexEqExprs''][$i][''var''] = $this->_isDefined($eqExprs[1][$i]);
  526. $parsedFilter[''regexEqExprs''][$i][''operator''] = $eqExprs[2][$i];
  527. $parsedFilter[''regexEqExprs''][$i][''regex''] = $eqExprs[4][$i];
  528.  
  529. $filterStr = str_replace($eqExpr, " ##RegEx_$i## ", $filterStr);
  530. }
  531.  
  532. // parse ?var [eq | ne] "literal"@lang^^dtype
  533. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\''[^\'']*\''|\"[^\"]*\")";
  534. $reg_ex .= "(@[a-zA-Z]+)?(\^{2}\S+:?\S+)?/i";
  535. preg_match_all($reg_ex, $filterStr, $eqExprs);
  536. foreach ($eqExprs[0] as $i => $eqExpr) {
  537. $parsedFilter[''strEqExprs''][$i][''var''] = $this->_isDefined($eqExprs[1][$i]);#
  538. $parsedFilter[''strEqExprs''][$i][''operator''] = strtolower($eqExprs[2][$i]);
  539. $parsedFilter[''strEqExprs''][$i][''value''] = trim($eqExprs[3][$i],"''\"");
  540. $parsedFilter[''strEqExprs''][$i][''value_type''] = ''Literal'';
  541. $parsedFilter[''strEqExprs''][$i][''value_lang''] = substr($eqExprs[4][$i], 1);
  542. $dtype = substr($eqExprs[5][$i], 2);
  543. if ($dtype) {
  544. $parsedFilter[''strEqExprs''][$i][''value_dtype''] = $this->_validateUri($dtype, RDQL_AND_ERR);
  545. if ($dtype{0} != ''<'')
  546. $parsedFilter[''strEqExprs''][$i][''value_dtype_is_qname''] = TRUE;
  547. }else
  548. $parsedFilter[''strEqExprs''][$i][''value_dtype''] = '''';
  549.  
  550. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$i## ", $filterStr);
  551. }
  552. // parse ?var [eq | ne] ?var
  553. $ii = count($parsedFilter[''strEqExprs'']);
  554. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\?[a-zA-Z0-9_]+)/i";
  555. preg_match_all($reg_ex, $filterStr, $eqExprs);
  556. foreach ($eqExprs[0] as $i => $eqExpr) {
  557. $parsedFilter[''strEqExprs''][$ii][''var''] = $this->_isDefined($eqExprs[1][$i]);
  558. $parsedFilter[''strEqExprs''][$ii][''operator''] = strtolower($eqExprs[2][$i]);
  559. $parsedFilter[''strEqExprs''][$ii][''value''] = $this->_isDefined($eqExprs[3][$i]);
  560. $parsedFilter[''strEqExprs''][$ii][''value_type''] = ''variable'';
  561.  
  562. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
  563. $ii++;
  564. }
  565.  
  566. // parse ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
  567. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+((<\S+>)|(\S+:\S*))/i";
  568. preg_match_all($reg_ex, $filterStr, $eqExprs);
  569. foreach ($eqExprs[0] as $i => $eqExpr) {
  570. $parsedFilter[''strEqExprs''][$ii][''var''] = $this->_isDefined($eqExprs[1][$i]);
  571. $parsedFilter[''strEqExprs''][$ii][''operator''] = strtolower($eqExprs[2][$i]);
  572. if ($eqExprs[4][$i]) {
  573. $parsedFilter[''strEqExprs''][$ii][''value''] = trim($eqExprs[4][$i], "<>");
  574. $parsedFilter[''strEqExprs''][$ii][''value_type''] = ''URI'';
  575. }else if($eqExprs[5][$i]){
  576. $this->_validateQName($eqExprs[5][$i], RDQL_AND_ERR);
  577. $parsedFilter[''strEqExprs''][$ii][''value''] = $eqExprs[5][$i];
  578. $parsedFilter[''strEqExprs''][$ii][''value_type''] = ''QName'';
  579. }
  580.  
  581. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
  582. $ii++;
  583. }
  584. $parsedFilter[''evalFilterStr''] = $filterStr;
  585.  
  586. // all that is left are numerical expressions and place holders for the above expressions
  587. preg_match_all("/\?[a-zA-Z0-9_]+/", $filterStr, $vars);
  588. foreach ($vars[0] as $var) {
  589. $parsedFilter[''numExprVars''][] = $this->_isDefined($var);
  590. }
  591.  
  592. return $parsedFilter;
  593. }
  594.  
  595.  
  596. /**
  597. * Find all query variables used in the WHERE clause.
  598. *
  599. * @return array [] = ?VARNAME
  600. * @access private
  601. */
  602. function findAllQueryVariables() {
  603.  
  604. $vars = array();
  605. foreach ($this->parsedQuery[''patterns''] as $pattern) {
  606. $count = 0;
  607. foreach ($pattern as $v) {
  608. if ($v[''value''] && $v[''value'']{0} == ''?'') {
  609. ++$count;
  610. if (!in_array($v[''value''], $vars))
  611. $vars[] = $v[''value''];
  612. }
  613. }
  614. if (!$count)
  615. trigger_error(RDQL_WHR_ERR .''pattern contains no variables'', E_USER_ERROR);
  616. }
  617.  
  618. return $vars;
  619. }
  620.  
  621.  
  622. /**
  623. * Replace all namespace prefixes in the pattern and constraint clause of an rdql query
  624. * with the namespaces declared in the USING clause and default namespaces.
  625. *
  626. * @access private
  627. */
  628. function replaceNamespacePrefixes() {
  629.  
  630. global $default_prefixes;
  631.  
  632. if (!isset($this->parsedQuery[''ns'']))
  633. $this->parsedQuery[''ns''] = array();
  634.  
  635. // add default namespaces
  636. // if in an rdql query a reserved prefix (e.g. rdf: rdfs:) is used
  637. // it will be overridden by the default namespace defined in constants.php
  638. $this->parsedQuery[''ns''] = array_merge($this->parsedQuery[''ns''], $default_prefixes);
  639.  
  640. // replace namespace prefixes in the FROM clause
  641. if (isset($this->parsedQuery[''sources'']))
  642. foreach ($this->parsedQuery[''sources''] as $n => $source) {
  643. if (isset($source[''is_qname'']))
  644. $this->parsedQuery[''sources''][$n] = $this->_replaceNamespacePrefix($source[''value''], RDQL_SRC_ERR);
  645. else {
  646. foreach ($this->parsedQuery[''ns''] as $prefix => $uri)
  647. $source[''value''] = eregi_replace("$prefix:", $uri, $source[''value'']);
  648. $this->parsedQuery[''sources''][$n] = $source[''value''];
  649. }
  650. }
  651. // replace namespace prefixes in the where clause
  652. foreach ($this->parsedQuery[''patterns''] as $n => $pattern) {
  653. foreach ($pattern as $key => $v)
  654. if ($v[''value''] && $v[''value'']{0} != ''?'') {
  655. if (isset($v[''is_qname''])) {
  656. $this->parsedQuery[''patterns''][$n][$key][''value'']
  657. = $this->_replaceNamespacePrefix($v[''value''], RDQL_WHR_ERR);
  658. unset($this->parsedQuery[''patterns''][$n][$key][''is_qname'']);
  659. } else { // is quoted URI (== <URI>) or Literal
  660. if (isset($this->parsedQuery[''patterns''][$n][$key][''is_literal''])) {
  661. if (isset($this->parsedQuery[''patterns''][$n][$key][''l_dtype_is_qname''])) {
  662. $this->parsedQuery[''patterns''][$n][$key][''l_dtype'']
  663. = $this->_replaceNamespacePrefix($v[''l_dtype''], RDQL_WHR_ERR);
  664. unset($this->parsedQuery[''patterns''][$n][$key][''l_dtype_is_qname'']);
  665. }else {
  666. foreach ($this->parsedQuery[''ns''] as $prefix => $uri)
  667. $this->parsedQuery[''patterns''][$n][$key][''l_dtype'']
  668. = eregi_replace("$prefix:", $uri, $this->parsedQuery[''patterns''][$n][$key][''l_dtype'']);
  669. }
  670. }else {
  671. foreach ($this->parsedQuery[''ns''] as $prefix => $uri)
  672. $this->parsedQuery[''patterns''][$n][$key][''value'']
  673. = eregi_replace("$prefix:", $uri, $this->parsedQuery[''patterns''][$n][$key][''value'']);
  674. }
  675. }
  676. }
  677. }
  678.  
  679. // replace prefixes in the constraint clause
  680. if (isset($this->parsedQuery[''filters'']))
  681. foreach ($this->parsedQuery[''filters''] as $n => $filter)
  682. foreach ($filter[''strEqExprs''] as $i => $expr) {
  683. if ($expr[''value_type''] == ''QName'') {
  684. $this->parsedQuery[''filters''][$n][''strEqExprs''][$i][''value'']
  685. = $this->_replaceNamespacePrefix($expr[''value''], RDQL_AND_ERR);
  686. $this->parsedQuery[''filters''][$n][''strEqExprs''][$i][''value_type''] = ''URI'';
  687. }
  688. if ($expr[''value_type''] == ''URI'')
  689. foreach ($this->parsedQuery[''ns''] as $prefix => $uri)
  690. $this->parsedQuery[''filters''][$n][''strEqExprs''][$i][''value'']
  691. = eregi_replace("$prefix:", $uri,
  692. $this->parsedQuery[''filters''][$n][''strEqExprs''][$i][''value'']);
  693. elseif ($expr[''value_type''] == ''Literal'') {
  694. if (isset($expr[''value_dtype_is_qname''])) {
  695. $this->parsedQuery[''filters''][$n][''strEqExprs''][$i][''value_dtype'']
  696. = $this->_replaceNamespacePrefix($expr[''value_dtype''], RDQL_AND_ERR);
  697. unset($this->parsedQuery[''filters''][$n][''strEqExprs''][$i][''value_dtype_is_qname'']);
  698. } else {
  699. foreach ($this->parsedQuery[''ns''] as $prefix => $uri)
  700. $this->parsedQuery[''filters''][$n][''strEqExprs''][$i][''value_dtype'']
  701. = eregi_replace("$prefix:", $uri,
  702. $this->parsedQuery[''filters''][$n][''strEqExprs''][$i][''value_dtype'']);
  703. }
  704. }
  705. }
  706.  
  707. unset($this->parsedQuery[''ns'']);
  708. }
  709.  
  710.  
  711. // =============================================================================
  712. // *************************** helper functions ********************************
  713. // =============================================================================
  714.  
  715.  
  716. /**
  717. * Remove whitespace-tokens from the array $this->tokens
  718. *
  719. * @access private
  720. */
  721. function _clearWhiteSpaces() {
  722.  
  723. while (current($this->tokens) == '' '' ||
  724. current($this->tokens) == "\n" ||
  725. current($this->tokens) == "\t" ||
  726. current($this->tokens) == "\r")
  727.  
  728. unset($this->tokens[key($this->tokens)]);
  729. }
  730.  
  731.  
  732. /**
  733. * Check if the query string of the given clause contains an undesired '',''.
  734. * If a comma was correctly placed then remove it and clear all whitespaces.
  735. *
  736. * @param string $commaExpected
  737. * @param string $clause_error
  738. * @throws PHPError
  739. * @access private
  740. */
  741. function _checkComma($commaExpected, $clause_error) {
  742.  
  743. $this->_clearWhiteSpaces();
  744. if (current($this->tokens) == '','') {
  745. if (!$commaExpected)
  746. trigger_error($clause_error ."'','' - unexpected comma", E_USER_ERROR);
  747. else {
  748. unset($this->tokens[key($this->tokens)]);
  749. $this->_checkComma(FALSE, $clause_error);
  750. }
  751. }
  752. }
  753.  
  754. /**
  755. * Check if the given token is either a variable (?var) or the first token of an URI (<URI>).
  756. * In case of an URI this function returns the whole URI string.
  757. *
  758. * @param string $token
  759. * @return array [''value''] = string
  760. * @throws PHPError
  761. * @access private
  762. */
  763. function _validateVarUri($token) {
  764. if ($token{0} == ''?'') {
  765. $token_res[''value''] = $this->_validateVar($token, RDQL_WHR_ERR);
  766. } else {
  767. $token_res[''value''] = $this->_validateUri($token, RDQL_WHR_ERR);
  768. if ($token{0} != ''<'')
  769. $token_res[''is_qname''] = TRUE;
  770. }
  771. return $token_res;
  772. }
  773.  
  774.  
  775. /**
  776. * Check if the given token is either a variable (?var) or the first token
  777. * of either an URI (<URI>) or a literal ("Literal").
  778. * In case of a literal return an array with literal properties (value, language, datatype).
  779. * In case of a variable or an URI return only [''value''] = string.
  780. *
  781. * @param string $token
  782. * @return array [''value''] = string
  783. * [''is_qname''] = boolean
  784. * [''is_literal''] = boolean
  785. * [''l_lang''] = string
  786. * [''l_dtype''] = string
  787. * @throws PHPError
  788. * @access private
  789. */
  790. function _validateVarUriLiteral($token) {
  791. if ($token{0} == ''?'')
  792. $statement_object[''value''] = $this->_validateVar($token, RDQL_WHR_ERR);
  793. elseif ($token{0} == "''" || $token{0} == ''"'')
  794. $statement_object = $this->_validateLiteral($token);
  795. elseif ($token{0} == ''<'')
  796. $statement_object[''value''] = $this->_validateUri($token, RDQL_WHR_ERR);
  797. elseif (ereg('':'', $token)) {
  798. $statement_object[''value''] = $this->_validateUri($token, RDQL_WHR_ERR);
  799. $statement_object[''is_qname''] = TRUE;
  800. }else
  801. trigger_error(RDQL_WHR_ERR ." ''$token'' - ?Variable, &lt;URI&gt;, QName, or "LITERAL\" expected", E_USER_ERROR);
  802. return $statement_object;
  803. }
  804.  
  805. /**
  806. * Check if the given token is a valid variable name (?var).
  807. *
  808. * @param string $token
  809. * @param string $clause_error
  810. * @return string
  811. * @throws PHPError
  812. * @access private
  813. */
  814. function _validateVar($token, $clause_error) {
  815.  
  816. preg_match("/\?[a-zA-Z0-9_]+/", $token, $match);
  817. if (!isset($match[0]) || $match[0] != $token)
  818. trigger_error($clause_error ."''" .htmlspecialchars($token)
  819. ."'' - variable name contains illegal characters", E_USER_ERROR);
  820. unset($this->tokens[key($this->tokens)]);
  821. return $token;
  822. }
  823.  
  824.  
  825. /**
  826. * Check if $token is the first token of a valid URI (<URI>) and return the whole URI string
  827. *
  828. * @param string $token
  829. * @param string $clause_error
  830. * @return string
  831. * @throws PHPError
  832. * @access private
  833. */
  834. function _validateUri($token, $clause_error) {
  835.  
  836. if ($token{0} != ''<'') {
  837. if (strpos($token, '':'') && $this->_validateQName($token, $clause_error)) {
  838. unset($this->tokens[key($this->tokens)]);
  839. return rtrim($token, '':'');
  840. }
  841. $errmsg = $clause_error .'''''' .htmlspecialchars($token) .'''' '';
  842. if ($clause_error == RDQL_WHR_ERR)
  843. $errmsg .= "- ?Variable or &lt;URI&gt; or QName expected";
  844. else
  845. $errmsg .= "- &lt;URI&gt; or QName expected";
  846. trigger_error($errmsg, E_USER_ERROR);
  847. }else{
  848. $token_res = $token;
  849. while($token{strlen($token)-1} != ''>'' && $token != NULL) {
  850. if ($token == ''('' || $token == '')'' || $token == '','' ||
  851. $token == '' '' || $token == "\n" || $token == "\r") {
  852. trigger_error($clause_error .'''''' .htmlspecialchars($token_res)
  853. ."'' - illegal input: ''$token'' - ''>'' missing", E_USER_ERROR);
  854. }
  855. unset($this->tokens[key($this->tokens)]);
  856. $token = current($this->tokens);
  857. $token_res .= $token;
  858. }
  859. if ($token == NULL)
  860. trigger_error($clause_error .'''''' .htmlspecialchars($token_res) ."'' - ''>'' missing", E_USER_ERROR);
  861. unset($this->tokens[key($this->tokens)]);
  862. return trim($token_res, ''<>'');
  863. }
  864. }
  865.  
  866.  
  867. /**
  868. * Check if $token is the first token of a valid literal ("LITERAL") and
  869. * return an array with literal properties (value, language, datatype).
  870. *
  871. * @param string $token
  872. * @return array [''value''] = string
  873. * [''is_literal''] = boolean
  874. * [''l_lang''] = string
  875. * [''l_dtype''] = string
  876. * [''l_dtype_is_qname''] = boolean
  877. * @throws PHPError
  878. * @access private
  879. */
  880. function _validateLiteral($token) {
  881.  
  882. $quotation_mark = $token{0};
  883. $statement_object = array (''value'' => '''',
  884. ''is_literal'' => TRUE,
  885. ''l_lang'' => '''',
  886. ''l_dtype'' => '''');
  887. $this->tokens[key($this->tokens)] = substr($token,1);
  888.  
  889. $return = FALSE;
  890. foreach ($this->tokens as $k => $token) {
  891.  
  892. if ($token != NULL && $token{strlen($token)-1} == $quotation_mark) {
  893. $token = rtrim($token, $quotation_mark);
  894. $return = TRUE;
  895.  
  896. // parse @language(^^datatype)?
  897. }elseif (strpos($token, $quotation_mark .''@'') || substr($token, 0, 2) == $quotation_mark .''@'') {
  898. $lang = substr($token, strpos($token, $quotation_mark .''@'')+2);
  899. if (strpos($lang, ''^^'') || substr($lang, 0,2) == ''^^'') {
  900. $dtype = substr($lang, strpos($lang, ''^^'')+2);
  901. if (!$dtype)
  902. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object[''value'']
  903. .$token ." - datatype expected" ,E_USER_ERROR);
  904. $statement_object[''l_dtype''] = $this->_validateUri($dtype, RDQL_WHR_ERR);
  905. if ($dtype{0} != ''<'')
  906. $statement_object[''l_dtype_is_qname''] = TRUE;
  907. $lang = substr($lang, 0, strpos($lang, ''^^''));
  908. }
  909. if (!$lang)
  910. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object[''value'']
  911. .$token ." - language expected" ,E_USER_ERROR);
  912. $statement_object[''l_lang''] = $lang;
  913. $token = substr($token, 0, strpos($token, $quotation_mark .''@''));
  914. $return = TRUE;
  915.  
  916. // parse ^^datatype
  917. }elseif (strpos($token, $quotation_mark .''^^'') || substr($token, 0, 3) == $quotation_mark .''^^'') {
  918. $dtype = substr($token, strpos($token, $quotation_mark .''^^'')+3);
  919. if (!$dtype)
  920. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object[''value'']
  921. .$token ." - datatype expected" ,E_USER_ERROR);
  922.  
  923. $statement_object[''l_dtype''] = $this->_validateUri($dtype, RDQL_WHR_ERR);
  924. if ($dtype{0} != ''<'')
  925. $statement_object[''l_dtype_is_qname''] = TRUE;
  926.  
  927. $token = substr($token, 0, strpos($token, $quotation_mark .''^^''));
  928. $return = TRUE;
  929. }elseif (strpos($token, $quotation_mark))
  930. trigger_error(RDQL_WHR_ERR ."''$token'' - illegal input", E_USER_ERROR);
  931. $statement_object[''value''] .= $token;
  932. unset($this->tokens[$k]);
  933. if ($return)
  934. return $statement_object;
  935. }
  936. trigger_error(RDQL_WHR_ERR ."quotation end mark: $quotation_mark missing", E_USER_ERROR);
  937. }
  938.  
  939. /**
  940. * Check if the given token is a valid QName.
  941. *
  942. * @param string $token
  943. * @param string $clause_error
  944. * @return boolean
  945. * @throws PHPError
  946. * @access private
  947. */
  948. function _validateQName($token, $clause_error) {
  949. $parts = explode('':'', $token);
  950. if (count($parts) > 2)
  951. trigger_error($clause_error ."illegal QName: ''$token''", E_USER_ERROR);
  952. if (!$this->_validateNCName($parts[0]))
  953. trigger_error($clause_error ."illegal prefix in QName: ''$token''", E_USER_ERROR);
  954. if ($parts[1] && !$this->_validateNCName($parts[1]))
  955. trigger_error($clause_error ."illegal local part in QName: ''$token''", E_USER_ERROR);
  956. return TRUE;
  957. }
  958.  
  959.  
  960. /**
  961. * Check if the given token is a valid NCName.
  962. *
  963. * @param string $token
  964. * @return boolean
  965. * @access private
  966. */
  967. function _validateNCName($token) {
  968. preg_match("/[a-zA-Z_]+[a-zA-Z_0-9.\-]*/", $token, $match);
  969. if (isset($match[0]) && $match[0] == $token)
  970. return TRUE;
  971. return FALSE;
  972. }
  973.  
  974.  
  975. /**
  976. * Check if the given token is a valid namespace prefix.
  977. *
  978. * @param string $token
  979. * @return string
  980. * @throws PHPError
  981. * @access private
  982. */
  983. function _validatePrefix($token) {
  984.  
  985. if (!$this->_validateNCName($token))
  986. trigger_error(RDQL_USG_ERR ."''" .htmlspecialchars($token)
  987. ."'' - illegal input, namespace prefix expected", E_USER_ERROR);
  988. unset($this->tokens[key($this->tokens)]);
  989. return $token;
  990. }
  991.  
  992. /**
  993. * Replace a prefix in a given QName and return a full URI.
  994. *
  995. * @param string $qName
  996. * @param string $clasue_error
  997. * @return string
  998. * @throws PHPError
  999. * @access private
  1000. */
  1001. function _replaceNamespacePrefix($qName, $clause_error) {
  1002.  
  1003. $qName_parts = explode('':'', $qName);
  1004. if (!array_key_exists($qName_parts[0], $this->parsedQuery[''ns'']))
  1005. trigger_error($clause_error .''undefined prefix: '''' .$qName_parts[0] .'''' in: '''' .$qName .'''''', E_USER_ERROR);
  1006. return $this->parsedQuery[''ns''][$qName_parts[0]] .$qName_parts[1];
  1007. }
  1008. /**
  1009. * Check if all variables from the SELECT clause are defined in the WHERE clause
  1010. *
  1011. * @access private
  1012. */
  1013. function _checkSelectVars() {
  1014.  
  1015. foreach ($this->parsedQuery[''selectVars''] as $var)
  1016. $this->_isDefined($var);
  1017. }
  1018.  
  1019.  
  1020. /**
  1021. * Check if the given variable is defined in the WHERE clause.
  1022. *
  1023. * @param $var string
  1024. * @return string
  1025. * @throws PHPError
  1026. * @access private
  1027. */
  1028. function _isDefined($var) {
  1029.  
  1030. $allQueryVars = $this->findAllQueryVariables();
  1031.  
  1032. if (!in_array($var, $allQueryVars))
  1033. trigger_error(RDQL_SYN_ERR .": ''$var'' - variable must be defined in the WHERE clause", E_USER_ERROR);
  1034. return $var;
  1035. }
  1036.  
  1037.  
  1038. /**
  1039. * Throw an error if the regular expression from the AND clause is not quoted.
  1040. *
  1041. * @param string $filterString
  1042. * @param string $lQuotMark
  1043. * @param string $rQuotMark
  1044. * @throws PHPError
  1045. * @access private
  1046. */
  1047. function _checkRegExQuotation($filterString, $lQuotMark, $rQuotMark) {
  1048.  
  1049. if (!$lQuotMark)
  1050. trigger_error(RDQL_AND_ERR ."''$filterString'' - regular expressions must be quoted", E_USER_ERROR);
  1051.  
  1052. if ($lQuotMark != $rQuotMark)
  1053. trigger_error(RDQL_AND_ERR ."''$filterString'' - quotation end mark in the regular expression missing", E_USER_ERROR);
  1054. }
  1055.  
  1056. } // end: Class RdqlParser
  1057.  
  1058. ?>

Documentation generated on Fri, 13 Jan 2006 07:49:22 +0100 by phpDocumentor 1.3.0RC4