1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2010-2018, University of Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(serql, 36 [ serql_query/2, % +Query, -Result 37 serql_query/3, % +Query, -Result, +Options 38 serql_compile/3, % +Query, -Compiled, +Options 39 serql_run/2 % +Compiled, -Reply 40 ]). 41:- use_module(library(semweb/rdf_db)). 42:- use_module(library(semweb/rdf_optimise)). 43:- use_module(library(lists)). 44:- use_module(library(option)). 45:- use_module(library(debug)). 46:- use_module(library(settings)). 47:- use_module(rdfql_util). 48:- include(entailment(load)). 49 50:- meta_predicate 51 select_results( , , ). 52 53/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 54A Prolog path expression is a conjunction of rdf/3 statements. Parts may 55be wrapped in opt/1 to indicate they are optional and nodes may be of 56the form set(List) to indicate a conjunction of distinct values. 57- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
row(Col1, Col2, ...)
for SELECT statements or a term rdf(S,P,O)
.
The predicate is non-deterministic, returning the rows or RDF
statements one-by-one.68serql_query(Query, Result) :- 69 serql_query(Query, Result, 70 [ entailment(rdf) 71 ]). 72 73serql_query(Query, Result, Options) :- 74 serql_compile(Query, Compiled, Options), 75 serql_run(Compiled, Result).
select(VarNames)
or construct87serql_compile(Text, Compiled, Options) :- 88 atom(Options), Options \== [], % compatibility 89 !, 90 serql_compile(Text, Compiled, [entailment(Options)]). 91serql_compile(Text, serql_query(Goal, ReplyTempl, Module), Options) :- 92 serql_parse(Text, Query), 93 compile(Query, Goal, ReplyTempl, Module, Options). 94 95compile(select(Row0, VarNames, Path, Where, Distinct, Limit, Offset), 96 select(Final, Solutions), 97 Row, 98 Module, 99 Options) :- 100 option(entailment(Entailment), Options, rdfs), 101 entailment_module(Entailment, Module), 102 mk_solutions(Distinct, Limit, Offset, Solutions), 103 set_type(select(VarNames), Options), 104 where_constraints(Where, Annotations), 105 serql_compile_path(Path, select, Goal), 106 remove_annotations(Annotations, where), 107 projection_functions(Row0, Row, Select), 108 ( setting(cliopatria:optimise_query, Def), 109 option(optimise(Opt), Options, Def), 110 Opt == true 111 -> rdf_optimise((Goal,Where,Select), Optimised) 112 ; Optimised = (Goal,Where,Select) 113 ), 114 serql_select_bind_null(Optimised, Final), 115 debug(serql(compiled), '~@', 116 [ portray_clause((q(Row) :- Final)) 117 ]). 118compile(construct(RPath, Path, Where, Distinct, Limit, Offset), 119 construct(Final, Solutions), 120 RDF, 121 Module, 122 Options) :- 123 option(entailment(Entailment), Options, rdfs), 124 entailment_module(Entailment, Module), 125 mk_solutions(Distinct, Limit, Offset, Solutions), 126 set_type(construct, Options), 127 where_constraints(Where, Annotations), 128 serql_compile_path(Path, construct, Goal), 129 remove_annotations(Annotations, where), 130 statements(RPath, Statements), 131 entailment_module(Entailment, Module), 132 ( setting(cliopatria:optimise_query, Def), 133 option(optimise(Opt), Options, Def), 134 Opt == true 135 -> rdf_optimise((Goal,Where), Optimised) 136 ; Optimised = (Goal,Where) 137 ), 138 Final = (Optimised, serql_member_statement(RDF, Statements)), 139 debug(serql(compiled), '~@', 140 [ portray_clause((q(RDF) :- Final)) 141 ]).
148mk_solutions(distinct, Limit, Offset, 149 distinct(solutions(unsorted, Limit, Offset))) :- !. 150mk_solutions(_, Limit, Offset, solutions(unsorted, Limit, Offset)).
type(X)
156set_type(Type, Options) :- 157 memberchk(type(T), Options), 158 !, 159 ( T = Type 160 -> true 161 ; functor(T, Expected, _), 162 functor(Type, Found, _), 163 throw(error(type_error(query_type(Expected), Found), _)) 164 ). 165set_type(_, _).
169serql_run(serql_query(Parsed, Reply, Module), Reply) :- 170 serql_run(Parsed, Reply, Module). 171 172serql_run(select(Goal, Solutions), Reply, Module) :- 173 select_results(Solutions, Reply, Module:Goal). 174serql_run(construct(Goal, Solutions), Reply, Module) :- 175 select_results(Solutions, Reply, Module:Goal).
181select_results(distinct(solutions(Order, Limit, Offset)), Reply, Goal) :- 182 !, 183 select_results(distinct, Offset, Limit, Order, Reply, Goal). 184select_results(solutions(Order, Limit, Offset), Reply, Goal) :- 185 select_results(all, Offset, Limit, Order, Reply, Goal). 186 187 188 /******************************* 189 * COMPILER * 190 *******************************/
197serql_compile_path(rdf(S,P,O), Type, Conj) :- 198 set(S, Set), 199 !, 200 make_set_subj_conj(Set, [], P, O, Type, Conj). 201serql_compile_path(rdf(S,P,O), Type, Conj) :- 202 set(O, Set), 203 !, 204 make_set_obj_conj(Set, [], S, P, Type, Conj). 205serql_compile_path(rdf(S0, P, O), Type, Goal) :- 206 reified(S0, S, GS), 207 !, 208 serql_compile_path(rdf(S, P, O), Type, G0), 209 Goal = (G0, GS). 210serql_compile_path(rdf(S, P, O0), Type, Goal) :- 211 reified(O0, O, GS), 212 !, 213 serql_compile_path(rdf(S, P, O), Type, G0), 214 Goal = (G0, GS). 215serql_compile_path((A0,B0), Type, (A,B)) :- 216 !, 217 serql_compile_path(A0, Type, A), 218 serql_compile_path(B0, Type, B). 219serql_compile_path(optional(Id, A0), construct, (A *-> Id=true ; Id=false)) :- 220 !, 221 serql_compile_path(A0, construct, A). 222serql_compile_path(optional(_, A0), select, (A *-> true ; true)) :- 223 !, 224 serql_compile_path(A0, select, A). 225serql_compile_path(rdf(S,P,O0), _, Goal) :- 226 !, 227 resource_annotations(S, GS), 228 resource_annotations(P, GP), 229 object_annotations(O0, O, GO), 230 clean_conj((GS, GP, rdf(S,P,O), GO), Goal). 231serql_compile_path(G, _, G). 232 233reified(0, _, _) :- % catch variables 234 !, fail. 235reified(rdf(S,P,O), StatementId, 236 ( rdf(StatementId, Type, Statement), 237 rdf(StatementId, Subject, S), 238 rdf(StatementId, Predicate, P), 239 rdf(StatementId, Object, O) 240 )) :- 241 rdf_equal(Type, rdf:type), 242 rdf_equal(Subject, rdf:subject), 243 rdf_equal(Predicate, rdf:predicate), 244 rdf_equal(Object, rdf:object), 245 rdf_equal(Statement, rdf:'Statement'). 246 247 248 249make_set_subj_conj([], _, _, _, _, true). % should not happen 250make_set_subj_conj([Last], [], P, O, Type, Goal) :- 251 !, 252 serql_compile_path(rdf(Last, P, O), Type, Goal). 253make_set_subj_conj([Last], Diff, P, O, Type, (Goal, Diffs)) :- 254 !, 255 serql_compile_path(rdf(Last, P, O), Type, Goal), 256 make_diff(Diff, Last, Diffs). 257make_set_subj_conj([H|T], Diff, P, O, Type, (Goal, Diffs, More)) :- 258 !, 259 serql_compile_path(rdf(H, P, O), Type, Goal), 260 make_diff(Diff, H, Diffs), 261 make_set_subj_conj(T, [H|Diff], P, O, Type, More). 262 263 264make_set_obj_conj([], _, _, _, _, true). % should not happen 265make_set_obj_conj([Last], [], S, P, Type, Goal) :- 266 !, 267 serql_compile_path(rdf(S, P, Last), Type, Goal). 268make_set_obj_conj([Last], Diff, S, P, Type, (Goal, Diffs)) :- 269 !, 270 serql_compile_path(rdf(S, P, Last), Type, Goal), 271 make_diff(Diff, Last, Diffs). 272make_set_obj_conj([H|T], Diff, S, P, Type, (Goal, Diffs, More)) :- 273 !, 274 serql_compile_path(rdf(S, P, H), Type, Goal), 275 make_diff(Diff, H, Diffs), 276 make_set_obj_conj(T, [H|Diff], S, P, Type, More). 277 278 279make_diff([], _, true). 280make_diff([Last], To, (Last \== To)) :- !. 281make_diff([H|T], To, (H \== To, More)) :- 282 make_diff(T, To, More).
optional(Bool, ListOfTriples)
. Using CONSTRUCT * (i.e. when the
executed path is the result path) the goal generated by the
compiler will unify Bool with true or false. See also
member_statement/2.294statements(Graph, Statements) :- 295 phrase(statements(Graph), Statements). 296 297statements(rdf(S,P,O)) --> 298 { set(S, Set) }, 299 !, 300 subj_statements(Set, P, O). 301statements(rdf(S,P0,O)) --> 302 !, 303 { nonvar(P0), 304 map_builtin(P0, P) 305 -> true 306 ; P = P0 307 }, 308 [ rdf(S,P,O) ]. 309statements((A,B)) --> 310 !, 311 statements(A), 312 statements(B). 313statements(optional(Id, A)) --> 314 !, 315 { phrase(statements(A), OptionalStatements) }, 316 [ optional(Id, OptionalStatements) ]. 317statements(_) --> 318 []. 319 320term_expansion(map_builtin(B0, P0), map_builtin(B, P)) :- 321 rdf_global_id(B0, B), 322 rdf_global_id(P0, P). 323 324map_builtin(serql:directSubClassOf, rdfs:subClassOf). 325map_builtin(serql:directSubPropertyOf, rdfs:subPropertyOf). 326map_builtin(serql:directType, rdf:type). 327 328 329subj_statements([], _, _) --> 330 []. 331subj_statements([H|T], P, O) --> 332 ( { set(O, Set) } 333 -> obj_statements(Set, H, P) 334 ; [ rdf(H, P, O) ] 335 ), 336 subj_statements(T, P, O). 337 338obj_statements([], _, _) --> 339 []. 340obj_statements([H|T], S, P) --> 341 [ rdf(S, P, H) ], 342 obj_statements(T, S, P). 343 344 345set(Node, Set) :- 346 nonvar(Node), 347 Node = set(Set). 348 349 350 /******************************* 351 * SELECT FUNCTIONS * 352 *******************************/ 353 354projection_functions(Row0, Row, Map) :- 355 functor(Row0, Functor, Arity), 356 functor(Row, Functor, Arity), 357 projection_functions(0, Arity, Row0, Row, true, Map). 358 359projection_functions(Arity, Arity, _, _, Map, Map) :- !. 360projection_functions(I0, Arity, Row0, Row, Map0, Map) :- 361 I is I0 + 1, 362 arg(I, Row0, A0), 363 ( var(A0) 364 -> arg(I, Row, A0), 365 projection_functions(I, Arity, Row0, Row, Map0, Map) 366 ; arg(I, Row, A), 367 add_conj(Map0, serql_eval(A0, A), Map1), 368 projection_functions(I, Arity, Row0, Row, Map1, Map) 369 ). 370 371add_conj(true, X, X) :- !. 372add_conj(C0, G, (C0,G)). 373 374 375 /******************************* 376 * WHERE CONSTRAINTS * 377 *******************************/ 378 379/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 380The idea of this step is to turn where clauses into constraints on 381variables. 382 383Supported annotations (in standard order of terms): 384 385 any 386 literal 387 resource 388 eq(Value) 389 like(Pattern) 390- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
or(ListOfAlternatives)
. The latter is used if different paths
through the control-structure yields different annotations.
398where_constraints(Goal, Annotations) :-
399 bagof(Annot, where_constraint_list(Goal, Annot), AltAnnots),
400 sort_lol(AltAnnots, AltAnnots1),
401 join_alt_annots(AltAnnots1, Annotations).
409where_constraint_list(Goal, Annotations) :- 410 where_constraints(Goal, AttrVars, []), 411 attrs_to_terms(AttrVars, Annotations). 412 413 414where_constraints((A,B)) --> 415 !, 416 where_constraints(A), 417 where_constraints(B). 418where_constraints((A;B)) --> 419 !, 420 ( where_constraints(A) 421 ; where_constraints(B) 422 ). 423where_constraints(serql_compare(like, Var, Pattern)) --> 424 !, 425 constrain(Var, like(Pattern)). 426where_constraints(serql_compare(=, Var, Value)) --> 427 !, 428 constrain(Var, eq(Value)). 429where_constraints(rdf_is_literal(V)) --> 430 !, 431 constrain(V, literal). 432where_constraints(rdf_is_resource(V)) --> 433 !, 434 constrain(V, resource). 435where_constraints(rdf(S,P,_)) --> 436 !, 437 constrain(S, resource), 438 constrain(P, resource). 439where_constraints(_) --> 440 []. 441 442constrain(Var, Cond) --> 443 { var(Var) }, 444 !, 445 ( { get_attr(Var, where, C0) } 446 -> { put_attr(Var, where, (Cond, C0)) }, 447 [] 448 ; { put_attr(Var, where, Cond) 449 }, 450 [ Var ] 451 ). 452constrain(label(X), Cond) --> 453 !, 454 constrain(X, (literal, Cond)). 455constrain(lang(X), Cond) --> 456 !, 457 constrain(X, (literal, Cond)). 458constrain(datatype(X), Cond) --> 459 !, 460 constrain(X, (literal, Cond)). 461constrain(_, _) --> 462 [].
471join_alt_annots(LoL, Annotated) :- 472 smallest_var(LoL, Var), 473 !, 474 var_annotations(Var, LoL, LoL1, Annotations0), 475 sort(Annotations0, Annotations), % remove duplicates 476 ( empty_annotations(Annotations) 477 -> join_alt_annots(LoL1, Annotated) 478 ; put_annotations(Annotations, Var), 479 Annotated = [Var|T], 480 join_alt_annots(LoL1, T) 481 ). 482join_alt_annots(LoL, []) :- 483 assertion(maplist(=([]), LoL)).
492normalise_annotation(A0, A) :- 493 conj_to_list(A0, L0, []), 494 sort(L0, L), 495 list_do_conj(L, A). 496 497conj_to_list((A,B)) --> 498 !, 499 conj_to_list(A), 500 conj_to_list(B). 501conj_to_list(A) --> 502 [A]. 503 504list_do_conj([], any). 505list_do_conj([H], H) :- !. 506list_do_conj([H|T0], (H,T)) :- 507 list_do_conj(T0, T).
A,(C1;C2) into (A,C1);(A,C2)
And apply optimisation on both branches.
521empty_annotations([]) :- !. 522empty_annotations(List) :- 523 memberchk(any, List). 524 525put_annotations([], _). 526put_annotations([One], Var) :- 527 !, 528 put_attr(Var, where, One). 529put_annotations(More, Var) :- 530 put_attr(Var, where, or(More)).
537smallest_var([[S0=_|_]|T], Smallest) :- 538 smallest_var(T, S0, Smallest). 539smallest_var([[]|T], Smallest) :- 540 smallest_var(T, Smallest). 541 542smallest_var([], S, S). 543smallest_var([[S1=_|_]|T], S0, S) :- 544 !, 545 smallest(S1, S0, S2), 546 smallest_var(T, S2, S). 547smallest_var([[]|T], S0, S) :- 548 smallest_var(T, S0, S). 549 550smallest(A, B, S) :- 551 ( A @< B 552 -> S = A 553 ; S = B 554 ).
561var_annotations(_, [], [], []) :- !. 562var_annotations(Var, [[Var=A|TA0]|TL0], LoL, [A|TA]) :- 563 !, 564 ( TA0 == [] 565 -> LoL = TL 566 ; LoL = [TA0|TL] 567 ), 568 var_annotations(Var, TL0, TL, TA). 569var_annotations(Var, [A0|TL0], [A0|TL], [any|A]) :- 570 var_annotations(Var, TL0, TL, A). 571 572 573whereattr_unify_hook(_,_) :- fail. 574whereattr_portray_hook(Val, _Var) :- 575 print(Val).
583attrs_to_terms([], []). 584attrs_to_terms([H|T0], [H=A|T]) :- 585 get_attr(H, where, A0), 586 del_attr(H, where), 587 normalise_annotation(A0, A), 588 attrs_to_terms(T0, T).
592sort_lol([], []). 593sort_lol([H0|T0], [H|T]) :- 594 sort(H0, H), 595 sort_lol(T0, T).
600remove_annotations([], _). 601remove_annotations([H|T], A) :- 602 del_attr(H, A), 603 remove_annotations(T, A).
608object_annotations(O0, O, G) :- 609 get_attr(O0, where, Annotations), 610 object_annot(Annotations, O0, O, G), 611 !. 612object_annotations(O, O, true). 613 614object_annot((literal, like(Pattern)), O, 615 literal(like(Pattern), L), O = literal(L)).
619resource_annotations(R, Goal) :- 620 get_attr(R, where, Annotations), 621 resource_annot(Annotations, R, Goal), 622 !. 623resource_annotations(_, true). 624 625resource_annot(eq(R1), R, true) :- % where A = B 626 var(R1), 627 !, 628 del_attr(R, where), 629 R = R1. 630resource_annot(eq(query(String)), R, true) :- 631 !, 632 del_attr(R, where), 633 R = String. 634resource_annot(or(List), R, Goal) :- 635 eq_list(List, Resources), 636 !, 637 Goal = member(R, Resources). 638 639eq_list([], []). 640eq_list([eq(query(R))|T0], [R|T]) :- 641 eq_list(T0, T).
648clean_conj((true, G0), G) :- 649 !, 650 clean_conj(G0, G). 651clean_conj((G0, true), G) :- 652 !, 653 clean_conj(G0, G). 654clean_conj(G, G). 655 656 /******************************* 657 * PARSER * 658 *******************************/
664serql_parse(Codes, Query) :- 665 is_list(Codes), 666 !, 667 ( phrase(tokens(Tokens), Codes), 668 phrase(query(Query0, NameSpaces), Tokens), 669 expand_vars(Query0, Query1), 670 expand_uris(Query1, NameSpaces, Query) 671 -> true 672 ; syntax_error(unknown) 673 ). 674serql_parse(Atomic, Query) :- 675 atomic(Atomic), 676 !, 677 atom_codes(Atomic, Codes), 678 serql_parse(Codes, Query). 679serql_parse(Input, _) :- 680 throw(error(type_error(text, Input), _)). 681 682 683 /******************************* 684 * ERRORS * 685 *******************************/ 686 687syntax_error(What) :- 688 throw(error(syntax_error(What), 689 context(_, 'in SeRQL query'))). 690 691 692 /******************************* 693 * NAMESPACES * 694 *******************************/ 695 696expand_uris(Var, _, Var) :- 697 var(Var), 698 !. 699expand_uris(uri(URI), _, URI) :- !. % <!foo:bar> 700expand_uris(uri(NS, URI0), Map, URI) :- % foo:bar 701 !, 702 ( memberchk(NS=Prefix, Map) 703 -> true 704 ; ns(NS, Prefix) 705 -> true 706 ; throw(error(existence_error(namespace, NS), _)) 707 ), 708 atom_concat(Prefix, URI0, URI). 709expand_uris(old_uri(NS, URI0), Map, URI) :- % <foo:bar> 710 !, 711 ( ( memberchk(NS=Prefix, Map) 712 ; ns(NS, Prefix) 713 ) 714 -> atom_concat(Prefix, URI0, URI) 715 ; concat_atom([NS, :, URI0], URI) 716 ). 717expand_uris(Q0, Map, Q) :- 718 compound(Q0), 719 !, 720 functor(Q0, Name, Arity), 721 functor(Q, Name, Arity), 722 expand_uris(0, Arity, Q0, Map, Q). 723expand_uris(Q, _, Q). 724 725expand_uris(Arity, Arity, _, _, _) :- !. 726expand_uris(I0, Arity, Q0, Map, Q) :- 727 I is I0 + 1, 728 arg(I, Q0, A0), 729 arg(I, Q, A), 730 expand_uris(A0, Map, A), 731 expand_uris(I, Arity, Q0, Map, Q).
740ns(NS, URI) :- 741 setting(cliopatria:rdf_db_namespaces, true), 742 !, 743 rdf_db:ns(NS, URI). 744ns(NS, URI) :- 745 serql_ns(NS, URI). 746 747serql_ns(rdf, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'). 748serql_ns(rdfs, 'http://www.w3.org/2000/01/rdf-schema#'). 749serql_ns(owl, 'http://www.w3.org/2002/7/owl#'). 750serql_ns(xsd, 'http://www.w3.org/2001/XMLSchema#'). % Wrong in SeRQL docs! 751serql_ns(serql,'http://rdf4j.org/schema/serql#'). 752 753 754 /******************************* 755 * VARIABLES * 756 *******************************/ 757 758% TBD: Check that projection variables actually appear in the 759% query! 760 761expand_vars(select(*, Path0, Where0, Distinct, Limit, Offset), 762 select(Row, VNames, Path, Where, Distinct, Limit, Offset)) :- 763 !, 764 var_names(Path0-Where0, Path-Where, VarNames), 765 vars(VarNames, Vars, Names), 766 Row =.. [row | Vars], 767 VNames =.. [names|Names]. 768expand_vars(select(Projection, Path0, Where0, Distinct, Limit, Offset), 769 select(Row, VNames, Path, Where, Distinct, Limit, Offset)) :- 770 !, 771 var_names(x(Projection,Path0,Where0), x(Vars,Path,Where), _VarNames), 772 Row =.. [row | Vars], 773 proj_names(Projection, Names), 774 VNames =.. [names|Names]. 775expand_vars(construct(*, Path0, Where0, Distinct, Limit, Offset), 776 construct(Path, Path, Where, Distinct, Limit, Offset)) :- 777 !, 778 var_names(x(Path0,Where0), x(Path,Where), _VarNames). 779expand_vars(construct(Ret0, Path0, Where0, Distinct, Limit, Offset), 780 construct(Ret, Path, Where, Distinct, Limit, Offset)) :- 781 !, 782 var_names(x(Ret0,Path0,Where0), x(Ret,Path,Where), _VarNames). 783 784 785var_names(var(-(V)), V, _) :- !. % bnodes, mapped from {} 786var_names(var(Name), Var, Map) :- 787 member(Name=Var, Map), 788 !. 789var_names(Q0, Q, Map) :- 790 compound(Q0), 791 !, 792 functor(Q0, Name, Arity), 793 functor(Q, Name, Arity), 794 var_names(0, Arity, Q0, Q, Map). 795var_names(Q, Q, _). 796 797var_names(Arity, Arity, _, _, _) :- !. 798var_names(I0, Arity, Q0, Q, Map) :- 799 I is I0 + 1, 800 arg(I, Q0, A0), 801 arg(I, Q, A), 802 var_names(A0, A, Map), 803 var_names(I, Arity, Q0, Q, Map). 804 805vars([], [], []) :- !. % also closes list! 806vars([Name=Var|T0], [Var|TV], [Name|TN]) :- 807 vars(T0, TV, TN). 808 809proj_names([], []). 810proj_names([var(Var)|T0], [Var|T]) :- 811 !, 812 proj_names(T0, T). 813proj_names([_|T0], [-|T]) :- 814 proj_names(T0, T). 815 816 817 /******************************* 818 * ERROR LOCATIONS * 819 *******************************/ 820 821syntax_error(What, In, []) :- 822 throw(error(syntax_error(What), 823 context(_, left(In)))). 824 825add_error_location(error(syntax_error(What), 826 context(_, left(After))), 827 Tokens) :- 828 append(Before, After, Tokens), 829 length(Before, BL), 830 ( BL =< 5 831 -> BC = Before 832 ; length(BC0, 5), 833 append(_, BC0, Before), 834 BC = ['...'|BC0] 835 ), 836 length(After, AL), 837 ( AL =< 5 838 -> AC = After 839 ; length(AC0, 5), 840 append(AC0, _, After), 841 append(AC0, ['...'], AC) 842 ), 843 append(BC, ['**here**'|AC], ContextTokens0), 844 maplist(token_to_atom, ContextTokens0, ContextTokens), 845 concat_atom(ContextTokens, ' ', Context), 846 throw(error(syntax_error(What), 847 context(serql_parse/2, Context))). 848 849token_to_atom(Token, Token) :- 850 atom(Token), 851 !. 852token_to_atom(id(X), X) :- !. 853token_to_atom(string(X), X) :- !. 854token_to_atom(uri(URI), X) :- 855 !, 856 concat_atom([<, URI, >], X). 857token_to_atom(uri(NS,Local), X) :- 858 !, 859 concat_atom([NS, Local], :, X). 860token_to_atom(old_uri(NS,Local), X) :- 861 !, 862 concat_atom([<, NS, :, Local, >], X). 863token_to_atom(cmp(X), X) :- !. 864token_to_atom(rest(X), X) :- !. 865token_to_atom(Token, Atom) :- 866 term_to_atom(Token, Atom). 867 868query(Query, NameSpaces, In, Out) :- 869 catch(compilation_unit(Query, NameSpaces, In, Out), 870 E, 871 add_error_location(E, In)). 872 873must_see(Token) --> 874 [Token], 875 !. 876must_see(Token) --> 877 syntax_error(expected(Token)). 878 879must_see(Token, _) --> 880 [Token], 881 !. 882must_see(_, UserName) --> 883 syntax_error(expected(UserName)). 884 885 886 /******************************* 887 * HIGH LEVEL PARSER * 888 *******************************/ 889 890compilation_unit(Query, NameSpaces) --> 891 query(Query), 892 namespace_list(NameSpaces).
898namespace_list([H|T]) --> 899 [ using ], !, must_see(namespace), 900 !, 901 must_see_namespace(H), 902 namespaces(T). 903namespace_list([]) --> 904 []. 905 906must_see_namespace(Decl) --> 907 namespace(Decl), 908 !. 909must_see_namespace(_) --> 910 syntax_error(expected(namespace_declaration)). 911 912namespace(NS=URI) --> 913 must_see(id(NS), identifier), 914 must_see(cmp(=), =), 915 namespace_uri(URI). 916 917namespace_uri(URI) --> 918 [ uri(URI) ], 919 !. 920namespace_uri(URI) --> 921 [ old_uri(Protocol, Local) ], % New style <foo:bar> 922 !, 923 { concat_atom([Protocol, :, Local], URI) 924 }. 925namespace_uri(_) --> 926 syntax_error(expected(absolute_uri)). 927 928namespaces([H|T]) --> 929 [ ',' ], 930 !, 931 must_see_namespace(H), 932 namespaces(T). 933namespaces([]) --> 934 []. 935 936query(select(Projection, Path, Where, Distinct, Limit, Offset)) --> 937 [ select ], 938 !, 939 distinct(Distinct), 940 projection(Projection), 941 must_see(from), path_expr_list(Path), 942 query_tail(Where, Limit, Offset). 943query(construct(Construct, Path, Where, Distinct, Limit, Offset)) --> 944 [ construct ], 945 !, 946 distinct(Distinct), 947 construct_clause(Construct), 948 must_see(from), path_expr_list(Path), 949 query_tail(Where, Limit, Offset). 950query(_) --> 951 syntax_error(no_select_or_construct). 952 953distinct(distinct) --> 954 [ distinct ], 955 !. 956distinct(false) --> 957 []. 958 959query_tail(Where, Limit, Offset) --> 960 ( [ where ] 961 -> ( boolean_query(Where) 962 -> [] 963 ; syntax_error(illegal_where_clause) 964 ) 965 ; {Where = true} 966 ), 967 ( [ limit ] 968 -> ( pos_int(Limit) 969 -> [] 970 ; syntax_error(illegal_limit) 971 ) 972 ; {Limit = inf} 973 ), 974 ( [ offset ] 975 -> ( pos_int(Offset) 976 -> [] 977 ; syntax_error(illegal_offset) 978 ) 979 ; {Offset = 0} 980 ). 981 982projection(*) --> 983 [ * ], 984 !. 985projection([H|T]) --> 986 var_or_value(H), 987 !, 988 var_or_value_list(T). 989projection(_) --> 990 syntax_error(expected(projection)). 991 992construct_clause(*) --> 993 [ * ], 994 !. 995construct_clause(Path) --> 996 path_expr_list(Path), 997 !. 998construct_clause(_) --> 999 syntax_error(expected(construct_clause)). 1000 1001path_expr_list(Expr) --> 1002 must_see_path_expr(E0), 1003 ( [ ',' ] 1004 -> path_expr_list(Es), 1005 { Expr = (E0, Es) } 1006 ; { Expr = E0 } 1007 ). 1008 1009must_see_path_expr(E) --> 1010 path_expr(E), 1011 !. 1012must_see_path_expr(_) --> 1013 syntax_error(expected(path_expression)). 1014 1015path_expr(optional(_, Path)) --> 1016 [ '[' ], !, path_expr_list(Path), must_see(']'). 1017path_expr(Expr) --> 1018 path_expr0(Expr). 1019 1020path_expr0(Expr) --> 1021 path_expr_head(Head), 1022 ( ( [ ';' ] 1023 -> { arg(1, Head, H) } 1024 ; { arg(3, Head, H) } 1025 ), 1026 path_expr_tail(H, Tail) 1027 -> { Expr = (Head, Tail) 1028 } 1029 ; { Expr = Head } 1030 ). 1031 1032 1033path_expr_head(rdf(S, P, O)) --> 1034 must_see_node(S), must_see_edge(P), must_see_node(O). 1035 1036path_expr_tail(S, Expr) --> 1037 [ '[' ], path_expr_tail0(S, Expr1), [ ']' ], 1038 { Expr0 = optional(_, Expr1) }, 1039 ( [ ';' ] 1040 -> path_expr_tail(S, Tail), 1041 { Expr = (Expr0, Tail) } 1042 ; { Expr = Expr0 } 1043 ). 1044path_expr_tail(S, Expr) --> 1045 path_expr_tail0(S, Expr). 1046 1047% path_expr_tail0 <=> Edge Node ((";")? Path_expr_tail)? 1048 1049path_expr_tail0(S, Expr) --> 1050 edge(P), must_see_node(O), 1051 { Statement = rdf(S, P, O) }, 1052 ( ( [ ';' ] 1053 -> path_expr_tail(S, Tail) 1054 ; path_expr_tail(O, Tail) 1055 ) 1056 -> { Expr = (Statement, Tail) } 1057 ; { Expr = Statement } 1058 ). 1059 1060must_see_edge(Edge) --> 1061 edge(Edge), 1062 !. 1063must_see_edge(_) --> 1064 syntax_error(expected(edge)). 1065 1066edge(var(Var)) --> 1067 [ id(Var) ], 1068 !. 1069edge(uri(URI)) --> 1070 [ uri(URI) ], 1071 !. % <!foo:bar> 1072edge(uri(NS, URI)) --> 1073 [ uri(NS, URI) ], 1074 !. % foo:bar 1075edge(old_uri(NS, URI)) --> 1076 [ old_uri(NS, URI) ], 1077 !. % <foo:bar> 1078 1079must_see_node(Node) --> 1080 node(Node), 1081 !. 1082must_see_node(_) --> 1083 syntax_error(expected(node)). 1084 1085node(Node) --> 1086 [ '{' ], node_elem(E0), !, node_elem_list(Es), [ '}' ], 1087 ( {Es == []} 1088 -> {Node = E0} 1089 ; {Node = set([E0|Es])} 1090 ). 1091node(var(-(_))) --> % the _ is the variable that will 1092 [ '{', '}' ]. % be shared 1093 1094node_elem_list([H|T]) --> 1095 [ ',' ], 1096 !, 1097 must_see_node_elem(H), 1098 node_elem_list(T). 1099node_elem_list([]) --> 1100 []. 1101 1102must_see_node_elem(Elem) --> 1103 node_elem(Elem), 1104 !. 1105must_see_node_elem(_) --> 1106 syntax_error(expected(node_element)). 1107 1108node_elem(Elem) --> 1109 ( var(Elem) 1110 ; uri(Elem) 1111 ; literal(Elem) 1112 ; reified_stat(Elem) 1113 ), 1114 !. 1115 1116reified_stat(rdf(S,P,O)) --> 1117 node(S), must_see_edge(P), must_see_node(O). 1118 1119 1120 /******************************* 1121 * WHERE ... * 1122 *******************************/ 1123 1124boolean_query(Query) --> 1125 and_expr(And), 1126 ( [ or ], 1127 boolean_query(Or) 1128 -> {Query = (And ; Or)} 1129 ; {Query = And} 1130 ). 1131 1132and_expr(Query) --> 1133 boolean_query0(Q0), 1134 ( [ and ], 1135 and_expr(And) 1136 -> {Query = (Q0, And)} 1137 ; {Query = Q0} 1138 ). 1139 1140boolean_query0(Query) --> 1141 [ '(' ], !, boolean_query(Query), must_see(')'). 1142boolean_query0(true) --> 1143 [ true ], 1144 !. 1145boolean_query0(fail) --> 1146 [ false ], 1147 !. 1148boolean_query0(\+(Q)) --> 1149 [ not ], !, boolean_query0(Q). 1150boolean_query0(serql_compare(Cmp, L, R)) --> 1151 var_or_query_value(L), 1152 [ cmp(Cmp) ], 1153 !, 1154 var_or_query_value(R). 1155boolean_query0(serql_compare(like, Var, String)) --> 1156 var_or_value(Var), % must be var? 1157 [ like ], !, must_see_string(String). 1158boolean_query0(rdf_is_literal(V)) --> 1159 [ isliteral, '(' ], !, var(V), must_see(')'). 1160boolean_query0(rdf_is_resource(V)) --> 1161 [ isresource, '(' ], !, var(V), must_see(')'). 1162boolean_query0(_) --> 1163 syntax_error(expected(boolean_test)). 1164 1165must_see_string(String) --> 1166 [ string(String) ], 1167 !. 1168must_see_string(_) --> 1169 syntax_error(expected(string)). 1170 1171var_or_value_list([H|T]) --> 1172 [ ',' ], 1173 !, 1174 must_see_var_or_value(H), 1175 var_or_value_list(T). 1176var_or_value_list([]) --> 1177 []. 1178 1179must_see_var_or_value(X) --> 1180 var_or_value(X), 1181 !. 1182must_see_var_or_value(_) --> 1183 syntax_error(expected(var_or_value)). 1184 1185var_or_value(X) --> 1186 var(X), 1187 !. 1188var_or_value(X) --> 1189 value(X). 1190 1191var_or_query_value(X) --> 1192 ( literal_value(Value) 1193 -> { X = query(Value) 1194 } 1195 ; var_or_value(X) 1196 ). 1197 1198var(var(Var)) --> 1199 [ id(Var) ], 1200 !. 1201 1202value(URI) --> 1203 uri(URI). 1204value('$null$') --> 1205 [ null ]. 1206value(Literal) --> 1207 literal(Literal), 1208 !. 1209value(datatype(var(Var))) --> 1210 [ datatype, '(', id(Var), ')' ]. 1211value(lang(var(Var))) --> 1212 [ lang, '(', id(Var), ')' ]. 1213value(label(var(Var))) --> 1214 [ label, '(', id(Var), ')' ]. 1215 1216uri(uri(URI)) --> [uri(URI)]. 1217uri(uri(NS, URI)) --> [uri(NS, URI)]. 1218uri(old_uri(NS, URI)) --> [old_uri(NS, URI)]. 1219 1220literal(Literal) --> 1221 literal_value(Value), 1222 { Literal = literal(Value) }. 1223 1224literal_value(Lit) --> 1225 [ string(String) ], 1226 ( [@, id(Lang)] 1227 -> { Lit = lang(Lang, String) } 1228 ; [^^, URI] 1229 -> { Lit = type(URI, String) } 1230 ; { Lit = String } 1231 ). 1232 1233pos_int(I) --> 1234 [ int(I) ], { I >= 0 }. % bit weird not to have >0, but this 1235 % is the Sesame spec 1236 1237 1238 /******************************* 1239 * TOKENISER * 1240 *******************************/ 1241 1242tokens([H|T]) --> 1243 blank, 1244 token(H), 1245 !, 1246 tokens(T). 1247tokens([]) --> 1248 blank. 1249 1250token(uri(URI)) --> % Old style absolute URI 1251 "<!", uri_codes(Codes), ">", 1252 { atom_codes(URI, Codes) 1253 }. 1254token(old_uri(NS, Local)) --> % Old style local, new style absolute 1255 "<", identifier(NS), ":", uri_codes(Codes), ">", 1256 { atom_codes(Local, Codes) 1257 }. 1258token(string(String)) --> 1259 "\"", string_codes(Codes), "\"", 1260 { atom_codes(String, Codes) 1261 }. 1262token(Token) --> 1263 identifier(Id), 1264 !, 1265 ( ":", identifier(Local) % new style URI 1266 -> { Token = uri(Id, Local) 1267 } 1268 ; { downcase_atom(Id, Keyword), 1269 serql_keyword(Keyword) 1270 -> Token = Keyword 1271 ; Token = id(Id) 1272 } 1273 ). 1274token(int(Int)) --> 1275 digit(D0), 1276 !, 1277 digits(Digits), 1278 { number_codes(Int, [D0|Digits]) 1279 }. 1280token(cmp(Cmp)) --> 1281 cmp(Cmp), 1282 !. 1283token(^^) --> 1284 "^^", 1285 !. 1286token(Char) --> 1287 [C], 1288 { single(C), 1289 char_code(Char, C) 1290 }. 1291token(rest(Rest), In, []) :- % catch syntax errors. 1292 In \== [], 1293 atom_codes(Rest, In). 1294 1295 1296single(0'*). 1297single(0'=). 1298single(0'(). 1299single(0')). 1300single(0'{). 1301single(0'}). 1302single(0'[). 1303single(0']). 1304single(0'@). 1305single(0',). 1306single(0';). 1307 1308% cmp//1 1309% 1310% Returns Prolog comparison operators from the SeRQL ones. 1311 1312cmp(=<) --> "<=". 1313cmp(\=) --> "!=". 1314cmp(>=) --> ">=". 1315cmp(=) --> "=". 1316cmp(<) --> "<". 1317cmp(>) --> ">".
1325uri_codes([C0|Cs]) --> 1326 [C0], 1327 { uri_code(C0) 1328 }, 1329 !, 1330 uri_codes(Cs). 1331uri_codes([]) --> 1332 []. 1333 1334uri_code(C) :- 1335 code_type(C, csym), 1336 !. 1337uri_code(0'$). 1338uri_code(0'-). 1339uri_code(0'@). 1340uri_code(0'&). 1341uri_code(0'+). 1342uri_code(0'.). 1343uri_code(0'/). 1344uri_code(0'?). 1345uri_code(0'#). 1346uri_code(0'=). 1347uri_code(0':). 1348uri_code(0'~). % officially not 1349uri_code(0';). 1350uri_code(0'{). 1351uri_code(0'}).
1358string_codes([C0|Cs]) --> 1359 "\"", [C0], 1360 { C0 == 0'\\ ; C0 = 0'" }, 1361 !, 1362 string_codes(Cs). 1363string_codes([]) --> 1364 peek(0'"). 1365string_codes([C0|Cs]) --> 1366 [C0], 1367 string_codes(Cs).
1376identifier(Id) --> 1377 [C0], 1378 { code_type(C0, csymf) }, 1379 id_chars(Cs), 1380 { atom_codes(Id, [C0|Cs]) 1381 }. 1382 1383id_chars([C0|Cs]) --> 1384 [C0], 1385 { code_type(C0, csym) 1386 ; C0 == 0'. 1387 ; C0 == 0'- 1388 }, 1389 !, 1390 id_chars(Cs). 1391id_chars([]) --> 1392 []. 1393 1394digit(D) --> 1395 [D], 1396 { code_type(D, digit) }. 1397 1398digits([D0|Ds]) --> 1399 digit(D0), 1400 !, 1401 digits(Ds). 1402digits([]) --> 1403 []. 1404 1405blank --> 1406 [C], 1407 { code_type(C, space) }, 1408 !, 1409 blank. 1410blank --> 1411 [].
1417serql_keyword(select). 1418serql_keyword(construct). 1419serql_keyword(from). 1420serql_keyword(where). 1421serql_keyword(using). 1422serql_keyword(namespace). 1423serql_keyword(true). 1424serql_keyword(false). 1425serql_keyword(not). 1426serql_keyword(and). 1427serql_keyword(or). 1428serql_keyword(like). 1429serql_keyword(label). 1430serql_keyword(lang). 1431serql_keyword(datatype). 1432serql_keyword(null). 1433serql_keyword(isresource). 1434serql_keyword(isliteral). 1435serql_keyword(sort). 1436serql_keyword(in). 1437serql_keyword(union). 1438serql_keyword(intersect). 1439serql_keyword(minus). 1440serql_keyword(exists). 1441serql_keyword(forall). 1442serql_keyword(distinct). % SPEC: not in grammar 1443serql_keyword(limit). % SPEC: not in grammar 1444serql_keyword(offset). % SPEC: not in grammar 1445 1446 /******************************* 1447 * DCG BASICS * 1448 *******************************/ 1449 1450peek(C, L, L) :- 1451 L = [C|_]. 1452 1453 1454 /******************************* 1455 * HUMAN READABLE MESSAGES * 1456 *******************************/ 1457 1458:- multifile 1459 prolog:message/3. 1460 1461prologmessage(error(syntax_error(What), 1462 context(serql_parse/2, Location))) --> 1463 [ 'Syntax error in SeRQL query: ' ], 1464 explain(What), [ ' at **here** in', nl, nl], 1465 ['~w'-[Location] ]. 1466 1467explain(expected(X)) --> 1468 [ '"~w" expected'-[X] ]