// Copyright: Matthias Steffens and the file's // original author(s). // // This code is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY. Please see the GNU General Public // License for more details. // // File: ./opensearch.php // Repository: $HeadURL$ // Author(s): Matthias Steffens // // Created: 04-Feb-06, 21:53 // Modified: $Date: 2012-02-29 00:42:42 +0000 (Wed, 29 Feb 2012) $ // $Author$ // $Revision: 1356 $ // This script serves as a (faceless) routing page which takes an OpenSearch query and // converts the query into a native refbase query which is then passed to 'show.php'. // More info is given at . // Returns an OpenSearch response. Supports the CQL query language, i.e. it allows to // query all global refbase fields (the given index name must match either one of the // 'set.index' names listed in the 'sru.php' explain response or match a refbase field // name directly). If no index name is given 'cql.serverChoice' will be searched by // default. // Examples for recognized OpenSearch queries: // // - ask the server to return an OpenSearch Description file: // opensearch.php?operation=explain // // - find all records where any of the "main fields" contains 'immunology': // opensearch.php?query=immunology // opensearch.php?query=immunology&recordSchema=atom // // - find all records where the title field contains either 'ecology' or 'diversity' but // return only three records starting with record number 4: // opensearch.php?query=title%20any%20ecology%20diversity&startRecord=4&maximumRecords=3 // // - ask the server to return JSON-formatted search suggestions for authors whose last names // begin with either 'Mil' or 'Bel': // opensearch.php?query=author%20any%20Mil%20Bel&recordSchema=json&operation=suggest // By default, 'opensearch.php' will output OpenSearch Atom XML ('recordSchema=atom') if not // specified otherwise in the query. Additionally, 'rss', 'srw_dc', 'srw_mods', 'html' and // 'json' are currently supported as response formats. // For more info on OpenSearch, see: // // TODO: - I18n // - proper parsing of CQL query string (currently, 'opensearch.php' allows only for a limited set of CQL queries) // - offer support for the boolean CQL operators 'and/or/not' and parentheses // (both of the above goals would be accomplished by adopting Rob's CQL-PHP parser, see 'includes/cql.inc.php') // - if no context set & index name are given in the query, we should search the user's preferred list of "main fields" by default! (cql.serverChoice) // - currently, 'opensearch.php' does not omit the records list in the response if the OpenSearch query did contain 'maximumRecords=0' (as is the case for an SRU query) // - finish 'opensearch2xhtml.xsl', and serve it when returning Atom XML // - finish the form-based query builder (function 'showQueryPage()') // - what should be done with diagnostics when the client has requested html or json? // - fix '$citeOrder' issues (see notes in 'rss.php' and below) // - include OpenSearch elements in RSS & HTML output (see examples at ) // - it would be nice if users could somehow pass authentication details with the OpenSearch Query // - rewrite HTML using divs + CSS // - see also inline comments labeled with "TODO" // NOTES: - Currently, the JSON response format is only supported when returning search suggestions // ('operation=suggest'), i.e. you cannot (yet) retrieve full record data in JSON format // - ATM, querying of user-specific fields does only work with a user being logged in // Incorporate some include files: include 'initialize/db.inc.php'; // 'db.inc.php' is included to hide username and password include 'includes/header.inc.php'; // include header include 'includes/footer.inc.php'; // include footer include 'includes/include.inc.php'; // include common functions include 'initialize/ini.inc.php'; // include common variables include 'includes/atomxml.inc.php'; // include functions that deal with Atom XML include 'includes/opensearch.inc.php'; // include functions that return an OpenSearch response include 'includes/srwxml.inc.php'; // include functions that deal with SRW XML include_once 'includes/webservice.inc.php'; // include functions that are commonly used with the refbase webservices // -------------------------------------------------------------------- // Extract the ID of the client from which the query originated: // this identifier is used to identify queries that originated from the refbase command line clients ("cli-refbase-1.1", "cli-refbase_import-1.0"), // from a bookmarklet (e.g., "jsb-refbase-1.0") or from a browser such as Firefox that uses 'opensearch.php' for search suggestions ("sug-refbase_suggest-1.0") // (note that 'client' parameter has to be extracted *before* the call to the 'start_session()' function, since it's value is required by this function) if (isset($_REQUEST['client'])) $client = $_REQUEST['client']; else $client = ""; // START A SESSION: // call the 'start_session()' function (from 'include.inc.php') which will also read out available session variables: start_session(true); // -------------------------------------------------------------------- // Initialize preferred display language: // (note that 'locales.inc.php' has to be included *after* the call to the 'start_session()' function) include 'includes/locales.inc.php'; // include the locales // -------------------------------------------------------------------- // Extract mandatory parameters passed to the script: if (isset($_REQUEST['query'])) // contains the keywords to be searched for ('{searchTerms}') $cqlQuery = $_REQUEST['query']; else $cqlQuery = ""; // Extract optional parameters passed to the script: if (isset($_REQUEST['operation']) AND preg_match("/^(explain|suggest|advanced|CQL)$/i", $_REQUEST['operation'])) $operation = $_REQUEST['operation']; else $operation = ""; if (isset($_REQUEST['recordSchema']) AND !empty($_REQUEST['recordSchema'])) // contains the desired response format; currently supports 'atom', 'rss', 'srw_dc', 'srw_mods', 'html' and 'json' $recordSchema = $_REQUEST['recordSchema']; else $recordSchema = "atom"; if (isset($_REQUEST['maximumRecords'])) // contains the desired number of search results (OpenSearch equivalent: '{count}') $showRows = $_REQUEST['maximumRecords']; else $showRows = $_SESSION['userRecordsPerPage']; // get the default number of records per page preferred by the current user if (isset($_REQUEST['startRecord'])) // contains the offset of the first search result, starting with one (OpenSearch equivalent: '{startIndex}') $rowOffset = ($_REQUEST['startRecord']) - 1; // first row number in a MySQL result set is 0 (not 1) else $rowOffset = ""; // if no value to the 'startRecord' parameter is given, we'll output records starting with the first record in the result set if (isset($_REQUEST['stylesheet'])) // contains the desired stylesheet to be returned for transformation of XML data $exportStylesheet = $_REQUEST['stylesheet']; // if the 'stylesheet' parameter was given in the query without a value, this will suppress the default stylesheet else $exportStylesheet = "DEFAULT"; // the special keyword "DEFAULT" causes a default stylesheet to be assigned below based on the requested operation and response format // The following parameters are defined by the OpenSearch Query Syntax specification but aren't supported yet: // if (isset($_REQUEST['startPage'])) // indicates groups (= pages) of search results, starting with one ('{startPage}'); e.g., if 'maximumRecords=10', 'startPage=3' will cause records 21-30 to be returned // $pageOffset = ($_REQUEST['startPage']); // else // $pageOffset = ""; // if (isset($_REQUEST['language'])) // indicates that the client desires results in the specified language ('{language}') // $language = ($_REQUEST['language']); // else // $language = ""; // if (isset($_REQUEST['outputEncoding'])) // indicates that the client desires results in the specified character encoding ('{outputEncoding}') // $outputEncoding = ($_REQUEST['outputEncoding']); // else // $outputEncoding = ""; // if (isset($_REQUEST['inputEncoding'])) // indicates that query parameters are encoded via the specified character encoding ('{inputEncoding}') // $inputEncoding = ($_REQUEST['inputEncoding']); // else // $inputEncoding = ""; // Extract the view type requested by the user (either 'Mobile', 'Print', 'Web' or ''): // ('' will produce the default 'Web' output style) if (isset($_REQUEST['viewType'])) $viewType = $_REQUEST['viewType']; else $viewType = ""; // -------------------------------------------------------------------- // Set required variables based on the requested response format: if (preg_match("/^srw([ _]?(mods|dc))?([ _]?xml)?$/i", $recordSchema)) // if SRW XML is requested as response format { if (preg_match("/^srw[ _]?dc/i", $recordSchema)) { $exportFormat = "SRW_DC XML"; if ($exportStylesheet == "DEFAULT") $exportStylesheet = "srwdc2html.xsl"; } else { $exportFormat = "SRW_MODS XML"; if ($exportStylesheet == "DEFAULT") $exportStylesheet = "srwmods2html.xsl"; } $displayType = "Export"; $exportContentType = "application/xml"; $citeOrder = ""; } elseif (preg_match("/^rss([ _]?xml)?$/i", $recordSchema)) // if RSS XML is requested as response format { $exportFormat = "RSS XML"; $displayType = "Export"; $exportContentType = "application/rss+xml"; if ($exportStylesheet == "DEFAULT") $exportStylesheet = ""; $citeOrder = ""; // TODO/NOTE: currently, 'rss.php' always sorts records like as if '$citeOrder="creation-date"' was given, i.e. it sorts records such that newly added/edited records get listed top of the list; this means that Atom links to alternate formats (such as HTML or SRW XML) might return different records! } elseif (preg_match("/^html$/i", $recordSchema)) // if HTML is requested as response format { $exportFormat = ""; // since search results won't be routed thru the 'generateExport()' function, '$exportFormat' will be without effect (which is why we leave it blank) if (preg_match("/^Mobile$/i", $viewType)) // for Mobile view, we enforce the compact Citation view $displayType = "Cite"; else $displayType = ""; // if '$displayType' is empty, 'show.php' will use the default view that's given in session variable 'userDefaultView' $exportContentType = "text/html"; if ($exportStylesheet == "DEFAULT") $exportStylesheet = ""; $citeOrder = ""; } elseif (preg_match("/^json$/i", $recordSchema)) // if JSON is requested as response format { $exportFormat = "JSON"; $displayType = "Export"; $exportContentType = "application/json"; if ($exportStylesheet == "DEFAULT") $exportStylesheet = ""; $citeOrder = ""; } else // by default, OpenSearch Atom XML ('atom') is assumed as response format { $exportFormat = "Atom XML"; $displayType = "Export"; $exportContentType = "application/atom+xml"; if ($exportStylesheet == "DEFAULT") $exportStylesheet = ""; // TODO: finish 'opensearch2xhtml.xsl' $citeOrder = ""; // TODO/NOTE: '$citeOrder="creation-date"' would sort records such that newly added/edited records get listed top of the list, but then Atom links to alternate formats (such as HTML or SRW XML) would be mismatched! } // ------------------------------------------------------------------------------------------------------------------- // Handle the special index 'main_fields': if (!(preg_match("/^suggest$/i", $operation) AND preg_match("/^(html|json)$/i", $recordSchema)) AND (preg_match("/^main_fields( +(all|any|exact|within) +| *(<>|<=|>=|<|>|=) *)/i", $cqlQuery))) // if the 'main_fields' index is used in conjunction with a non-"suggest" operation $cqlQuery = preg_replace("/^main_fields(?= +(all|any|exact|within) +| *(<>|<=|>=|<|>|=) *)/i", "cql.serverChoice", $cqlQuery); // replace 'main_fields' index (which, ATM, is only supported for search suggestions) with 'cql.serverChoice' // Parse CQL query: $searchArray = parseCQL("1.1", $cqlQuery, $operation); // function 'parseCQL()' is defined in 'webservice.inc.php' // Build SQL WHERE clause: $query = ""; // NOTE: although we don't supply a full SQL query here, the variable MUST be named '$query' to have function 'appendToWhereClause()' work correctly if (!empty($searchArray)) appendToWhereClause($searchArray); // function 'appendToWhereClause()' is defined in 'include.inc.php' // ------------------------------------------------------------------------------------------------------------------- // Check that mandatory parameters have been passed: // - if 'opensearch.php' was called with 'operation=explain', we'll return an appropriate OpenSearch description document: if (preg_match("/^explain$/i", $operation)) { // Use an appropriate default stylesheet: if ($exportStylesheet == "DEFAULT") $exportStylesheet = ""; // TODO: create a stylesheet ('opensearchDescription2html.xsl') that's appropriate for the OpenSearch description // Set the appropriate mimetype & set the character encoding to the one given // in '$contentTypeCharset' (which is defined in 'ini.inc.php'): setHeaderContentType("application/opensearchdescription+xml", $contentTypeCharset); // function 'setHeaderContentType()' is defined in 'include.inc.php' echo openSearchDescription($exportStylesheet); // function 'openSearchDescription()' is defined in 'opensearch.inc.php' } // - if 'opensearch.php' was called with 'operation=suggest' and HTML (or JSON) as the requested response format, // we'll return search suggestions that match the 'WHERE' clause given in '$query': elseif (preg_match("/^suggest$/i", $operation) AND preg_match("/^(html|json)$/i", $recordSchema)) { // Set the appropriate mimetype & set the character encoding to the one given // in '$contentTypeCharset' (which is defined in 'ini.inc.php'): setHeaderContentType($exportContentType, $contentTypeCharset); echo searchSuggestions($cqlQuery, $query); } // - If 'opensearch.php' was called without any recognized parameters, we'll present a form where a user can build a query: elseif (!isset($_REQUEST['query']) AND !isset($_REQUEST['recordSchema']) AND !isset($_REQUEST['maximumRecords']) AND !isset($_REQUEST['startRecord']) AND !isset($_REQUEST['stylesheet'])) showQueryPage($operation, $viewType, $showRows, $rowOffset); // - If 'opensearch.php' was called without any valid (or with incorrect) parameters, we'll return appropriate 'diagnostics': elseif (empty($cqlQuery)) returnDiagnostic(7, "query"); // required 'query' parameter is missing // - Currently, no other schemas than OpenSearch Atom XML, SRW_DC XML, SRW_MODS XML, RSS XML, HTML and JSON are supported: elseif (!preg_match("/^((atom|rss)([ _]?xml)?|srw([ _]?(mods|dc))?([ _]?xml)?|html|json)$/i",$recordSchema)) returnDiagnostic(66, $recordSchema); // unknown record schema // ------------------------------------------------------------------------------------------------------------------- else // the script was called at least with the required 'query' parameter { // Write the current OpenSearch/CQL query into a session variable: // (this session variable is used by functions 'atomCollection()' and 'citeRecords()' (in 'cite_html.php') to re-establish the original OpenSearch/CQL query; // function 'atomCollection()' uses the OpenSearch/CQL query to output 'opensearch.php' URLs instead of 'show.php' URLs) saveSessionVariable("cqlQuery", $cqlQuery); // function 'saveSessionVariable()' is defined in 'include.inc.php' // Build the correct query URL: // (we skip unnecessary parameters here since function 'generateURL()' and 'show.php' will use their default values for them) $queryParametersArray = array("where" => $query, "submit" => $displayType, "viewType" => $viewType, "exportStylesheet" => $exportStylesheet ); // NOTE: The 'show.php' script allows anonymous users to query the 'cite_key' field (if a valid 'userID' is included in the query URL). // However, this requires that the cite key is passed in the 'cite_key' URL parameter. Since 'opensearch.php' uses the 'where' // parameter to pass its query, anonymous querying of the 'cite_key' field currently does not work for 'opensearch.php'. But // querying of user-specific fields will work if a user is logged in. if (isset($_SESSION['loginEmail'])) // we only include the 'userID' parameter if the user is logged in $queryParametersArray["userID"] = $loginUserID; // for user-specific fields (such as the 'cite_key' field), 'show.php' requires the 'userID' parameter // call 'show.php' (or 'rss.php' in case of RSS XML) with the correct query URL in order to output record details in the requested format: $queryURL = generateURL("show.php", $exportFormat, $queryParametersArray, false, $showRows, $rowOffset, "", $citeOrder); // function 'generateURL()' is defined in 'include.inc.php' header("Location: $queryURL"); } // ------------------------------------------------------------------------------------------------------------------- // Return a diagnostic error message: function returnDiagnostic($diagCode, $diagDetails) { global $recordSchema; global $exportContentType; global $contentTypeCharset; // '$contentTypeCharset' is defined in 'ini.inc.php' global $exportStylesheet; // Set the appropriate mimetype & set the character encoding to the one given in '$contentTypeCharset': setHeaderContentType($exportContentType, $contentTypeCharset); // function 'setHeaderContentType()' is defined in 'include.inc.php' if (preg_match("/^srw([ _]?(mods|dc))?([ _]?xml)?$/i", $recordSchema)) // Return SRW diagnostics (i.e. SRW error information) wrapped into SRW XML ('searchRetrieveResponse'): echo srwDiagnostics($diagCode, $diagDetails, $exportStylesheet); // function 'srwDiagnostics()' is defined in 'srwxml.inc.php' // elseif (preg_match("/^html$/i", $recordSchema)) // TODO! // elseif (preg_match("/^json$/i", $recordSchema)) // TODO! else // Return OpenSearch diagnostics (i.e. OpenSearch error information) wrapped into OpenSearch Atom XML: echo openSearchDiagnostics($diagCode, $diagDetails, $exportStylesheet); // function 'openSearchDiagnostics()' is defined in 'opensearch.inc.php' } // ------------------------------------------------------------------------------------------------------------------- // Return search suggestions that match the 'WHERE' clause given in '$query': // // NOTE: Currently, if you specify a multi-item field with 'all' as a relation (as in 'keywords all ...'), only the // first search term is used to generate search suggestions (though the other search terms will be used to // restrict the list of search suggestions to only those where the queried field contains ALL search terms). // // TODO: - should we support the 'maximumRecords' and 'startRecord' URL parameters for search suggestions? // - search suggestions for the 'location' field (and possibly other fields) should be omitted if the user isn't logged in! function searchSuggestions($cqlQuery, $query) { global $recordSchema; global $loginUserID; global $tableRefs, $tableUserData; // defined in 'db.inc.php' global $connection; global $client; // Extract the first field & search pattern from the 'WHERE' clause: // (these will be used to retrieve search suggestions) $origSearchSuggestionsField = preg_replace("/^[ ()]*(\w+).*/i", "\\1", $query); $searchSuggestionsPattern = preg_replace("/.*? (?:RLIKE|[=<>]+) \"?(.+?)\"?(?=( *\) *?)*( +(AND|OR)\b|$)).*/i", "\\1", $query); // see NOTE above if (preg_match("/^main_fields$/i", $origSearchSuggestionsField)) // fetch search suggestions for all of the user's "main fields" $searchSuggestionsFieldsArray = preg_split("/ *, */", $_SESSION['userMainFields']); // get the list of "main fields" preferred by the current user else $searchSuggestionsFieldsArray = array($origSearchSuggestionsField); // we only need to fetch search suggestions for one field $outputDataArray = array(); // make sure that the buffer variable is empty // Retrieve matching search suggestions for each field given in '$searchSuggestionsFieldsArray': foreach ($searchSuggestionsFieldsArray as $searchSuggestionsField) { if (preg_match("/^main_fields$/i", $origSearchSuggestionsField)) $searchSuggestionsQuery = preg_replace("/\bmain_fields\b/i", $searchSuggestionsField, $query); // replace 'main_fields' (which doesn't exist as SQL field name) with the current field else $searchSuggestionsQuery = $query; // Check whether we need to split field values for this field: if (preg_match("/^(author|keywords|abstract|address|corporate_author|place|editor|language|summary_language|series_editor|area|expedition|notes|location|call_number|created_by|modified_by|user_keys|user_notes|user_groups|related)$/i", $searchSuggestionsField)) $splitValues = true; else $splitValues = false; // Define split patterns for this field: if (preg_match("/^(author|corporate_author|editor|series_editor)$/i", $searchSuggestionsField)) $splitPattern = " *[;()/]+ *"; elseif (preg_match("/^abstract$/i", $searchSuggestionsField)) $splitPattern = "\s*[,.()/?!]+\s+|\s+[,.()/?!]\s*|\s+-\s+"; // TODO: can (or should) abstracts be splitted in a better way? elseif (preg_match("/^(place|notes|location|user_notes|user_groups|related)$/i", $searchSuggestionsField)) $splitPattern = " *[;]+ *"; elseif (preg_match("/^(call_number)$/i", $searchSuggestionsField)) $splitPattern = " *[;@]+ *"; else $splitPattern = " *[,;()/]+ *"; // Produce the list of search suggestions for this field: // (function 'selectDistinct()' is defined in 'include.inc.php') $searchSuggestionsArray = selectDistinct($connection, $tableRefs, "serial", $tableUserData, "record_id", "user_id", $loginUserID, $searchSuggestionsField, "", "", "", "", "serial", "\".+\" AND $searchSuggestionsQuery", // this is a somewhat hacky workaround that works around current limitations in function 'selectDistinct()' $splitValues, $splitPattern, "ARRAY", $searchSuggestionsPattern, false); if (!empty($searchSuggestionsArray)) { // Prefix each item with an index name and relation: // // NOTE: When the user selects a search suggestion in Firefox's search box, Firefox replaces the // user-entered data in the browser's search field with the chosen search suggestion. This // removes any CQL index and relation that was entered by the user (e.g. "keywords any ...") // and 'cql.serverChoice' will be searched instead. Since this would lead to unexpected (or // zero) results, we prefix all search suggestions with the index name and the '=' relation. // // TODO: This will need to be revised if 'cql.serverChoice' is mapped to the user's preferred list // of "main fields". Even better would be if browsers would support alternate query URLs for // each suggestion in the completion list. if (preg_match("/^json$/i", $recordSchema) AND preg_match("/^sug/i", $client)) // e.g. "sug-refbase_suggest-1.0" $searchSuggestionsArray = preg_replace('/^/', "$searchSuggestionsField = ", $searchSuggestionsArray); $outputDataArray = array_merge($outputDataArray, $searchSuggestionsArray); // append this field's search suggestions to the array of found search suggestions } } if (!empty($outputDataArray)) { if (preg_match("/^main_fields$/i", $origSearchSuggestionsField)) // otherwise, data are already unique and ordered { // Remove duplicate values from array: $outputDataArray = array_unique($outputDataArray); // Sort in ascending order: sort($outputDataArray); } if (preg_match("/^json$/i", $recordSchema)) $outputData = '"' . implode('", "', $outputDataArray) . '"'; else // unordered HTML list $outputData = "
  • " . implode("
  • ", $outputDataArray) . "
  • "; } else $outputData = ""; if (preg_match("/^json$/i", $recordSchema)) // return JSON-formatted search suggestions: return '["' . $cqlQuery . '", [' . $outputData . ']]'; // e.g.: ["fir", ["firefox", "first choice", "mozilla firefox"]] else // return HTML-formatted search suggestions: return "
      " . $outputData . "
    "; // e.g.:
    • firefox
    • first choice
    • mozilla firefox
    } // ------------------------------------------------------------------------------------------------------------------- // Present a form where a user can build a query: function showQueryPage($operation, $viewType, $showRows, $rowOffset) { global $officialDatabaseName; // defined in 'ini.inc.php' global $displayType; global $loc; // defined in 'locales/core.php' global $client; // If there's no stored message available: if (!isset($_SESSION['HeaderString'])) $HeaderString = $loc["SearchDB"].":"; // Provide the default message else { $HeaderString = $_SESSION['HeaderString']; // extract 'HeaderString' session variable (only necessary if register globals is OFF!) // Note: though we clear the session variable, the current message is still available to this script via '$HeaderString': deleteSessionVariable("HeaderString"); // function 'deleteSessionVariable()' is defined in 'include.inc.php' } // For HTML output, we'll need to reset the value of the '$displayType' variable // (which, by default, is set to "Export"; see above); otherwise, the 'originalDisplayType' // parameter in the 'quickSearch' form of the page header would be incorrectly set to "Export" $displayType = ""; // if '$displayType' is empty, 'show.php' will use the default view that's given in session variable 'userDefaultView' // Show the login status: showLogin(); // (function 'showLogin()' is defined in 'include.inc.php') // DISPLAY header: // call the 'displayHTMLhead()' and 'showPageHeader()' functions (which are defined in 'header.inc.php'): displayHTMLhead(encodeHTML($officialDatabaseName) . " -- " . $loc["Search"], "index,follow", "Search the " . encodeHTML($officialDatabaseName), "", true, "", $viewType, array()); if ((!preg_match("/^Mobile$/i", $viewType)) AND (!preg_match("/^inc/i", $client))) // Note: we omit the visible header in mobile view ('viewType=Mobile') and for include mechanisms! showPageHeader($HeaderString); // Define variables holding common drop-down elements, i.e. build properly formatted