View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2018, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdfql_util,
   37          [ select_results/6,           % +Distinct, +Offset, +Limit,
   38                                        % :SortBy, -Result, :Goal
   39            select_results/9,           % +Distinct, +Group, +Having, +Agg,
   40                                        % +Offset, +Limit,
   41                                        % :SortBy, -Result, :Goal
   42            entailment_module/2         % +Entailment, -Module
   43          ]).   44:- use_module(library(nb_set)).   45:- use_module(library(semweb/rdf_db)).   46:- use_module(library(lists)).   47:- use_module(library(pairs)).   48:- use_module(library(apply)).   49:- use_module(library(xsdp_types)).   50:- use_module(sparql_runtime).   51
   52:- meta_predicate
   53    select_results(+, +, 0, +, +, +, +, -, 0),
   54    select_results(+, +, +, +, -, 0),
   55    select_results(+, +, +, -, 0).   56
   57%!  select_results(+Distinct, +Offset, +Limit, +SortBy, -Result, :Goal)
   58%
   59%   Calls select_results/8 using Group=[] and Having=true.
   60
   61select_results(Distinct, Offset, Limit, SortBy, Result, Goal) :-
   62    select_results(Distinct, [], true, [],
   63                   Offset, Limit, SortBy, Result, Goal).
   64
   65%!  select_results(+Distinct, +Group, +Having, +Aggregates,
   66%!                 +Offset, +Limit, +SortBy, -Result, :Goal) is nondet.
   67%
   68%   Select results for the  template   Result  on  backtracking over
   69%   Goal.
   70%
   71%   @param Distinct
   72%   Iff 'distinct', only consider distinct solutions
   73%
   74%   @param Group
   75%   is a list of variables on which to group the results.  These
   76%   are the only variables that can be used in the HAVING filter
   77%   and final projection.
   78%
   79%   @param Having
   80%   is a constraint (similar to =FILTER=) to filter grouped
   81%   results.
   82%
   83%   @param Aggregates
   84%   List of aggregate(Function, Var)
   85%
   86%   @param Offset
   87%   Skip the first Offset results.  Offset is applied after
   88%   Distinct and SortBy
   89%
   90%   @param Limit
   91%   Only return the first Limit results. Limit is applied
   92%   after Distinct, SortBy and Offset. The value 'inf'
   93%   returns all values.
   94%
   95%   @param SortBy
   96%   Either 'unsorted' or a term order_by(Cols), where each Col
   97%   in Cols is a term ascending(Expr) or descending(Expr).
   98%
   99%   @tbd    Group, Having and Aggregate are currently ignored.
  100
  101:- discontiguous select_results/9.  102
  103/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  1041. ORDERED RESULTS WITHOUT AGGREGATES
  105- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  106
  107select_results(Distinct, [], _:true, [], Offset, Limit,
  108               order_by(Cols), Result, Goal) :-
  109    exclude(ground, Cols, SortCols), SortCols \== [],
  110    !,
  111    group_order(SortCols, GroupedCols),
  112    reverse(GroupedCols, RevGroupedCols),
  113    maplist(sort_key_goal, RevGroupedCols, GroupedKeys, KeyGenList),
  114    list_conj(KeyGenList, KeyGenGoal),
  115    sort_template(GroupedKeys, Result, Template),
  116    findall(Template, (Goal,rdfql_util:KeyGenGoal), Results0),
  117    (   Distinct == distinct
  118    ->  sort(Results0, Results1)
  119    ;   Results1 = Results0
  120    ),
  121    order_by(RevGroupedCols, Results1, Results2),
  122    apply_offset(Offset, Results2, Results3),
  123    apply_limit(Limit, Results3, Results),
  124    member(Result, Results).
  125
  126%!  group_order(+Cols, -GroupedCols) is det.
  127%
  128%   Group ordering expressions by the same ordering direction. E.g.,
  129%   [ascending(X), ascending(Y), descending(Z)] becomes
  130%   [ascending([X,Y]), descending([Z])]
  131
  132group_order([], []).
  133group_order([Col|CT0], [Grouped|GT]) :-
  134    Col =.. [Dir,Expr],
  135    Grouped =.. [Dir,[Expr|Exprs]],
  136    same_order(Dir, CT0, CT, Exprs),
  137    group_order(CT, GT).
  138
  139same_order(Dir, [Col|CT0], CT, [E0|ET]) :-
  140    Col =.. [Dir,E0],
  141    !,
  142    same_order(Dir, CT0, CT, ET).
  143same_order(_, CT, CT, []).
  144
  145list_conj([], true) :- !.
  146list_conj([G], G) :- !.
  147list_conj([G|T0], (G,T)) :-
  148    list_conj(T0, T).
  149
  150
  151%!  order_by(+Cols, +Results0, -Results) is det.
  152%
  153%   Order  the  results.  Cols  is  a   list  of  ascending(Var)  or
  154%   descending(Var). Note that the sorting is   done  with the least
  155%   importing (right most) order declaration first and relies on the
  156%   fact that keysort/2 is stable wrt to ordering the values.
  157%
  158%   @tbd    For DESC sorting, we need to reverse, but not the
  159%           order inside the groups.  We'd need a reverse keysort/2.
  160
  161order_by([], Results, Results).
  162order_by([ascending(_)|T], Results0, Results) :-
  163    !,
  164    keysort(Results0, Results1),
  165    pairs_values(Results1, Results2),
  166    order_by(T, Results2, Results).
  167order_by([descending(_)|T], Results0, Results) :-
  168    keysort(Results0, AscSorted),
  169    group_pairs_by_key(AscSorted, Grouped),
  170    reverse(Grouped, DescSortedKeyedGroups),
  171    pairs_values(DescSortedKeyedGroups, DescSortedGroups),
  172    append(DescSortedGroups, DescSorted),
  173    order_by(T, DescSorted, Results).
  174
  175
  176%!  sort_key_goal(+ColGroup, -KeyGroup, -Translate)
  177
  178sort_key_goal(ColGroup, KeyGroup, Translate) :-
  179    arg(1, ColGroup, Exprs),
  180    sort_expr_goal(Exprs, KeyGroup, Translate).
  181
  182sort_expr_goal([], [], true).
  183sort_expr_goal([V], [K], sort_key(V,K)) :- !.
  184sort_expr_goal([V|TV], [K|TK], (sort_key(V,K),G)) :-
  185    sort_expr_goal(TV, TK, G).
  186
  187sort_template([], Result, Result).
  188sort_template([H0|T], Result, H-Template) :-
  189    simplify_sort_key(H0, H),
  190    sort_template(T, Result, Template).
  191
  192simplify_sort_key([One], One) :- !.
  193simplify_sort_key(List, Term) :-
  194    Term =.. [v|List].
  195
  196%!  sort_key(+Result, -Key) is det.
  197%
  198%   Determine the sort-key from a result according to the SPARQL
  199%   standard:
  200%
  201%       1. undefined/null
  202%       2. blank nodes
  203%       3. IRIs
  204%       4. RDF Literals
  205%          a. Numbers are mapped to their value space
  206%          b. Other literals are mapped to their plain literal.
  207%             Note that this works fine for some types:
  208%             - xsd:date and variants
  209%             - xsd:boolean
  210%
  211%   @bug This is not good enough. Literals must be compared in their
  212%   value-space.  This requires some study.
  213%   @bug Result is a SPARQL expression.
  214
  215:- public sort_key/2.                   % Goal created by sort_key_goal/3.
  216
  217sort_key(Var, Var) :- var(Var), !.
  218sort_key(literal(L), sk(4, V)) :-
  219    !,
  220    simple_literal(L, V).
  221sort_key(IRI, sk(N, IRI)) :-
  222    (   rdf_is_bnode(IRI)
  223    ->  N = 2
  224    ;   N = 3
  225    ).
  226
  227simple_literal(lang(_Lang, SL), SL) :- !.
  228simple_literal(type(Type, SL), Number) :-
  229    xsdp_numeric_uri(Type, _),
  230    atom_number(SL, Number),
  231    !.
  232simple_literal(type(_Type, SL), SL) :- !.
  233simple_literal(SL, SL).
  234
  235
  236/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  2372. UNORDERED RESULTS WITHOUT AGGREGATES
  238- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  239
  240select_results(Distinct, [], _:true, [], Offset, Limit,
  241               _Unsorted, Result, Goal) :-
  242    !,
  243    select_results(Distinct, Offset, Limit, Result, Goal).
  244
  245%!  select_results(+Distinct, +Offset, +Limit, -Result, :Goal)
  246%
  247%   Unsorted version. In this case we can avoid first collecting all
  248%   results.
  249
  250select_results(distinct, Offset, Limit, Result, Goal) :-
  251    !,
  252    term_variables(Result, Vars),
  253    V =.. [v|Vars],
  254    empty_nb_set(Set),
  255    Counter = count(0),
  256    Goal,
  257       add_nb_set(V, Set, New),
  258       New == true,
  259       apply_limit(Counter, Offset, Limit, Last),
  260       ( Last == true -> ! ; true ).
  261select_results(_, 0, inf, _, G) :-
  262    !,
  263    G.
  264select_results(_, Offset, Limit, _, G) :-
  265    !,
  266    Counter = count(0),
  267    G,
  268        apply_limit(Counter, Offset, Limit, Last),
  269        ( Last == true -> ! ; true ).
  270
  271apply_limit(_, 0, inf, false) :- !.
  272apply_limit(Counter, Offset, Limit, Last) :-
  273    arg(1, Counter, N0),
  274    N is N0 + 1,
  275    nb_setarg(1, Counter, N),
  276    N > Offset,
  277    (   Limit \== inf,
  278        N =:= Offset+Limit
  279    ->  Last = true
  280    ;   Last = false
  281    ).
  282
  283%!  apply_offset(+Offset, +AllSolutions, -OffsetSolutions) is det.
  284
  285apply_offset(N, [_|T], List) :-
  286    N > 0,
  287    !,
  288    N2 is N - 1,
  289    apply_offset(N2, T, List).
  290apply_offset(_, List, List).
  291
  292%!  apply_limit(+Limit,  +AllSolutions, -LimitSolutions) is det.
  293
  294apply_limit(inf, List, List) :- !.
  295apply_limit(Limit, All, List) :-
  296    limit(Limit, All, List).
  297
  298limit(N, [H|T0], [H|T]) :-
  299    N > 0,
  300    !,
  301    N2 is N - 1,
  302    limit(N2, T0, T).
  303limit(_, _, []).
  304
  305
  306/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3073. AGGREGATE SUPPORT
  308
  309To support aggregation, we have to collect   results and group them into
  310sets that have equal values for the  variables that appear in Group. For
  311each group, we must:
  312
  313  1. Evaluate the Aggregate functions
  314  2. Evaluate the Having constraing and drop sets for which Having fails.
  315
  316Note that the projection (=Result) and   Having constraints can only use
  317variables Group and aggregate functions. This   implies  that we need to
  318track the grouped variables as well  as   variables  that  are needed to
  319compute the aggregates, but _no_ more.
  320
  321Q: Can we call select_results/9 recursively  with the iteration over the
  322groups to get OrderBy, Offset and Limit working?
  323
  324Q: When do we need to apply Distinct? Before or after grouping?
  325
  326Note that we do not need to do anything with the result term because the
  327output variables are already shared with it.
  328- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  329
  330select_results(_Distinct, [], Having, AggregateEval, _Offset, _Limit,
  331               _Order, _Result, Goal) :-
  332    !,
  333    aggregate_vars(AggregateEval, Aggregates, AggVars, Eval),
  334    AV =.. [a|AggVars],
  335    findall(AV, Goal, Group),
  336    aggregate(Group, Aggregates),
  337    call(Eval),
  338    call(Having).
  339select_results(_Distinct, Group, Having, AggregateEval, Offset, Limit,
  340               _Order, _Result, Goal) :-
  341    aggregate_vars(AggregateEval, Aggregates, AggVars, Eval),
  342    GV =.. [v|Group],
  343    AV =.. [a|AggVars],
  344    findall(GV-AV, Goal, Pairs),
  345    keysort(Pairs, SortedPairs),
  346    group_pairs_by_key(SortedPairs, Groups0),
  347    apply_offset(Offset, Groups0, Groups1),
  348    apply_limit(Limit, Groups1, Groups),
  349    member(GV-G, Groups),
  350    aggregate(G, Aggregates),
  351    call(Eval),
  352    call(Having).
  353
  354%!  aggregate(+Group, +Aggregates)
  355
  356aggregate([AVT|Group], Aggregates) :-
  357    !,
  358    AVT =.. [a|AV0],
  359    maplist(aggregate_setup, AV0, State0),
  360    aggregate_steps(Group, State0, State),
  361    maplist(aggregate_bind, Aggregates, State).
  362aggregate([], Aggregates) :-
  363    maplist(empty_aggregate, Aggregates).
  364
  365aggregate_setup(count(X), Count) :-
  366    aggregate_step(count(X), 0, Count).
  367aggregate_setup(distinct(X, _Op), Set) :-
  368    ( is_null(X) -> Set = [] ; Set = [X] ).
  369aggregate_setup(sum(X0), X) :-
  370    sparql_eval_raw(X0, X).
  371aggregate_setup(min(X0), X) :-
  372    sparql_eval_raw(X0, X).
  373aggregate_setup(max(X0), X) :-
  374    sparql_eval_raw(X0, X).
  375aggregate_setup(avg(X0), X-1) :-
  376    sparql_eval_raw(X0, X).
  377aggregate_setup(sample(X), X).
  378aggregate_setup(group_concat(Expr,_), [X]) :-
  379    group_concat_value(Expr, X).
  380
  381aggregate_steps([], State, State).
  382aggregate_steps([HT|T], State0, State) :-
  383    HT =.. [a|H],
  384    maplist(aggregate_step, H, State0, State1),
  385    aggregate_steps(T, State1, State).
  386
  387aggregate_step(count(X), Count0, Count) :-
  388    ( is_null(X) -> Count = Count0 ; Count is Count0 + 1 ).
  389aggregate_step(distinct(X, _Op), S0, S) :-
  390    ( is_null(X) -> S = S0 ; S = [X|S0] ).
  391aggregate_step(sum(X), Sum0, Sum) :-
  392    sparql_eval_raw(X+Sum0, Sum).
  393aggregate_step(min(X), Min0, Min) :-
  394    sparql_eval_raw(min(X, Min0), Min).
  395aggregate_step(max(X), Min0, Min) :-
  396    sparql_eval_raw(max(X, Min0), Min).
  397aggregate_step(avg(X), Sum0-Count0, Sum-Count) :-
  398    sparql_eval_raw(X+Sum0, Sum),
  399    Count is Count0+1.
  400aggregate_step(sample(X), S0, S) :-
  401    (   is_null(S0)
  402    ->  S = X
  403    ;   S = S0
  404    ).
  405aggregate_step(group_concat(Expr, _), S0, [X|S0]) :-
  406    group_concat_value(Expr, X).
  407
  408%!  aggregate_bind(+Aggregation, +State) is det.
  409%
  410%   @tbd: bind to error if the function does not evaluate?
  411
  412aggregate_bind(aggregate(Func, Var), State) :-
  413    aggregate_bind(Func, Var, State).
  414
  415aggregate_bind(count(_), Count, Count0) :-
  416    rdf_equal(xsd:integer, XSDInt),
  417    sparql_eval(numeric(XSDInt, Count0), Count).
  418aggregate_bind(sum(_), Sum, Sum0) :-
  419    bind_number(Sum0, Sum).
  420aggregate_bind(min(_), Min, Min0) :-
  421    bind_number(Min0, Min).
  422aggregate_bind(max(_), Max, Max0) :-
  423    bind_number(Max0, Max).
  424aggregate_bind(avg(_), Avg, Sum-Count) :-
  425    rdf_equal(xsd:integer, XSDInt),
  426    sparql_eval(Sum/numeric(XSDInt, Count), Avg).
  427aggregate_bind(sample(_), Sample, Sample).
  428aggregate_bind(group_concat(Expr, literal(Sep)), literal(Concat), Parts0) :-
  429    (   functor(Expr, distinct, 1)
  430    ->  sort(Parts0, Parts)
  431    ;   Parts = Parts0
  432    ),
  433    maplist(text_of, Parts, Texts),
  434    atomic_list_concat(Texts, Sep, Concat).
  435aggregate_bind(distinct(_, Op), Value, Set) :-
  436    sort(Set, Distinct),
  437    aggregate_distinct(Op, Distinct, Value).
  438
  439%!  aggregate_distinct(+Operation, +Set, -Value)
  440
  441aggregate_distinct(count, Set, Value) :-
  442    rdf_equal(xsd:integer, IntType),
  443    length(Set, Len),
  444    bind_number(numeric(IntType, Len), Value).
  445aggregate_distinct(sum, Set, Sum) :-
  446    rdf_equal(xsd:integer, IntType),
  447    foldl(add, Set, number(IntType, 0), Sum0),
  448    bind_number(Sum0, Sum).
  449aggregate_distinct(avg, Set, Avg) :-
  450    aggregate_distinct(sum, Set, Sum),
  451    length(Set, Count),
  452    rdf_equal(xsd:integer, XSDInt),
  453    sparql_eval(Sum/numeric(XSDInt, Count), Avg).
  454
  455add(X, Sum0, Sum) :-
  456    sparql_eval_raw(X+Sum0, Sum).
  457
  458text_of(Expr, Atom) :-
  459    sparql_eval_raw(Expr, V0),
  460    raw_text(V0, Atom).
  461
  462raw_text(string(X), X).
  463raw_text(simple_literal(X), X).
  464raw_text(lang(_Lang, SL), SL).          % allow lang qualified strings in
  465                                        % group_concat
  466
  467bind_number(V0, V) :-
  468    (   V0 = numeric(_, _)
  469    ->  sparql_eval(V0, V)
  470    ;   V = '$null$'
  471    ).
  472
  473group_concat_value(Expr, Value) :-
  474    (   functor(Expr, distinct, 1)
  475    ->  arg(1, Expr, Value)
  476    ;   Value = Expr
  477    ).
  478
  479:- rdf_meta empty_aggregate(t).  480
  481empty_aggregate(aggregate(count(_), literal(type(xsd:integer, '0')))).
  482empty_aggregate(aggregate(sum(_), literal(type(xsd:integer, '0')))).
  483empty_aggregate(aggregate(distinct(_,count), literal(type(xsd:integer, '0')))).
  484empty_aggregate(aggregate(group_concat(_,_), literal(''))).
  485
  486
  487%!  aggregate_vars(+AggregateEval, -Aggregates, -Template, -Query) is det.
  488%
  489%   @param AggregateEval is a combination of aggregate(Expr, Var)
  490%          and queries that depend on aggregation results and must
  491%          therefore be executed after computing the aggregates.%
  492%   @param Aggregates is a list of aggregate(Expr, Var), where Expr
  493%          is a plain aggregate function over a number of variables
  494%          and Var is shared with the place where the aggregate is
  495%          used.
  496
  497aggregate_vars([], [], [], true).
  498aggregate_vars([aggregate(Term0, Into)|T],
  499               [aggregate(Term,  Into)|AT], [Term|Templ], Q) :-
  500    !,
  501    x_distinct(Term0, Term),
  502    aggregate_vars(T, AT, Templ, Q).
  503aggregate_vars([Q0|T], Agg, Templ, Q) :-
  504    aggregate_vars(T, Agg, Templ, Q1),
  505    mkconj(Q1, Q0, Q).
  506
  507mkconj(true, Q, Q) :- !.
  508mkconj(Q, true, Q) :- !.
  509mkconj(A, B, (A,B)).
  510
  511x_distinct(Term0, Term) :-
  512    arg(1, Term0, Spec),
  513    compound(Spec),
  514    Spec = distinct(_),
  515    distinct_x(Term0, Term),
  516    !.
  517x_distinct(Term, Term).
  518
  519distinct_x(count(distinct(X)), distinct(X, count)).
  520distinct_x(count(sum(X)), distinct(X, sum)).
  521distinct_x(count(avg(X)), distinct(X, avg)).
  522
  523is_null(X) :-
  524    (   var(X)
  525    ->  true
  526    ;   X == '$null$'
  527    ).
  528
  529                 /*******************************
  530                 *           ENTAILMENT         *
  531                 *******************************/
  532
  533%!  entailment_module(+Entailment, -Module)
  534%
  535%   Find the Prolog module implementing the   entailment rules for a
  536%   semantic web language.
  537
  538entailment_module(Entailment, Module) :-
  539    cliopatria:entailment(Entailment, Module),
  540    !.
  541entailment_module(Entailment, _) :-
  542    throw(error(existence_error(entailment, Entailment), _)).
  543
  544:- multifile
  545    cliopatria:entailment/2.