You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

688 lines
36 KiB

  1. <?php
  2. // Project: Web Reference Database (refbase) <http://www.refbase.net>
  3. // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
  4. // original author(s).
  5. //
  6. // This code is distributed in the hope that it will be useful,
  7. // but WITHOUT ANY WARRANTY. Please see the GNU General Public
  8. // License for more details.
  9. //
  10. // File: ./includes/webservice.inc.php
  11. // Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/includes/webservice.inc.php $
  12. // Author(s): Matthias Steffens <mailto:refbase@extracts.de>
  13. //
  14. // Created: 04-Feb-06, 22:02
  15. // Modified: $Date: 2012-02-27 20:25:30 +0000 (Mon, 27 Feb 2012) $
  16. // $Author: msteffens $
  17. // $Revision: 1337 $
  18. // This include file contains functions that are used in conjunction with the refbase webservices.
  19. // Requires ActiveLink PHP XML Package, which is available under the GPL from:
  20. // <http://www.active-link.com/software/>. See 'sru.php' and 'opensearch.php' for more info.
  21. // Import the ActiveLink Packages
  22. require_once("classes/include.php");
  23. import("org.active-link.xml.XML");
  24. import("org.active-link.xml.XMLDocument");
  25. // --------------------------------------------------------------------
  26. // Add a new XML branch, optionally with an attribute and tag content:
  27. //
  28. // TODO: this function should also accept arrays to add multiple content tags
  29. function addNewBranch(&$thisBranch, $elementName, $elementAttributeArray, $elementContent)
  30. {
  31. $newBranch = new XMLBranch($elementName);
  32. if (!empty($elementAttributeArray))
  33. foreach ($elementAttributeArray as $elementAttributeKey => $elementAttributeValue)
  34. $newBranch->setTagAttribute($elementAttributeKey, $elementAttributeValue);
  35. if (!empty($elementContent))
  36. $newBranch->setTagContent($elementContent);
  37. $thisBranch->addXMLBranch($newBranch);
  38. }
  39. // -------------------------------------------------------------------------------------------------------------------
  40. // Parse CQL query:
  41. // This function parses a CQL query into its elements (context set, index, relation and search term(s)),
  42. // builds appropriate SQL search terms and returns a hierarchical array containing the converted search terms
  43. // (this array, in turn, gets merged into a full SQL WHERE clause by function 'appendToWhereClause()' in
  44. // 'include.inc.php')
  45. //
  46. // NOTE: we don't provide a full CQL parser here but will (for now) concentrate on a rather limited feature
  47. // set that makes sense in conjunction with refbase. However, future versions should employ far better
  48. // CQL parsing logic.
  49. //
  50. // TODO: the special index 'main_fields' should be mapped to the user's preferred list of "main fields"
  51. function parseCQL($sruVersion, $sruQuery, $operation = "")
  52. {
  53. global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
  54. // map CQL indexes to refbase field names:
  55. $indexNamesArray = mapCQLIndexes();
  56. $searchArray = array(); // intialize array that will hold information about context set, index name, relation and search value
  57. $searchSubArray1 = array();
  58. // --------------------------------
  59. if (!empty($sruQuery))
  60. {
  61. // check for presence of context set/index name and any of the main relations:
  62. if (!preg_match('/^[^\" <>=]+( +(all|any|exact|within) +| *(<>|<=|>=|<|>|=) *)/', $sruQuery))
  63. {
  64. // if no context set/index name and relation was given we'll add meaningful defaults:
  65. if (preg_match("/^suggest$/i", $operation))
  66. $sruQuery = "main_fields all " . $sruQuery; // for OpenSearch search suggestions, we use the special 'main_fields' index by default
  67. else
  68. $sruQuery = "cql.serverChoice all " . $sruQuery; // otherwise we currently use 'cql.serverChoice' (since 'main_fields' isn't yet supported for regular OpenSearch queries)
  69. }
  70. // extract the context set:
  71. if (preg_match('/^([^\" <>=.]+)\./', $sruQuery))
  72. $contextSet = preg_replace('/^([^\" <>=.]+)\..*/', '\\1', $sruQuery);
  73. else
  74. $contextSet = ""; // use the default context set
  75. // extract the index:
  76. $indexName = preg_replace('/^(?:[^\" <>=.]+\.)?([^\" <>=.]+).*/', '\\1', $sruQuery);
  77. // ----------------
  78. // return a fatal diagnostic if the CQL query does contain an unrecognized 'set.index' identifier:
  79. // (a) verify that the given context set (if any) is recognized:
  80. if (!empty($contextSet))
  81. {
  82. $contextSetIndexConnector = ".";
  83. $contextSetLabel = "context set '" . $contextSet . "'";
  84. if (!preg_match("/^(dc|bath|rec|bib|cql)$/", $contextSet))
  85. {
  86. returnDiagnostic(15, $contextSet); // unsupported context set (function 'returnDiagnostic()' is defined in 'opensearch.php' and 'sru.php')
  87. exit;
  88. }
  89. }
  90. else
  91. {
  92. $contextSetIndexConnector = "";
  93. $contextSetLabel = "empty context set";
  94. }
  95. // (b) verify that the given 'set.index' term is recognized:
  96. if (!isset($indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName]))
  97. {
  98. if (isset($indexNamesArray[$indexName]) OR isset($indexNamesArray["dc." . $indexName]) OR isset($indexNamesArray["bath." . $indexName]) OR isset($indexNamesArray["rec." . $indexName]) OR isset($indexNamesArray["bib." . $indexName]) OR isset($indexNamesArray["cql." . $indexName])) // this may be clumsy but I don't know any better, right now
  99. {
  100. returnDiagnostic(10, "Unsupported combination of " . $contextSetLabel . " with index '" . $indexName . "'"); // unsupported combination of context set & index
  101. }
  102. else
  103. {
  104. returnDiagnostic(16, $indexName); // unsupported index
  105. }
  106. exit;
  107. }
  108. // ----------------
  109. // extract the main relation (relation modifiers aren't supported yet!):
  110. $mainRelation = preg_replace('/^[^\" <>=]+( +(all|any|exact|within) +| *(<>|<=|>=|<|>|=) *).*/', '\\1', $sruQuery);
  111. // remove any runs of leading or trailing whitespace:
  112. $mainRelation = trim($mainRelation);
  113. // ----------------
  114. // extract the search term:
  115. $searchTerm = preg_replace('/^[^\" <>=]+(?: +(?:all|any|exact|within) +| *(?:<>|<=|>=|<|>|=) *)(.*)/', '\\1', $sruQuery);
  116. // remove slashes from search term if 'magic_quotes_gpc = On':
  117. $searchTerm = stripSlashesIfMagicQuotes($searchTerm); // function 'stripSlashesIfMagicQuotes()' is defined in 'include.inc.php'
  118. // remove any leading or trailing quotes from the search term:
  119. // (note that multiple query parts connected with boolean operators aren't supported yet!)
  120. $searchTerm = preg_replace('/^\"/', '', $searchTerm);
  121. $searchTerm = preg_replace('/\"$/', '', $searchTerm);
  122. // OpenSearch search suggestions ('$operation=suggest'): since CQL matches full words (not sub-strings),
  123. // we need to make sure that every search term ends with the '*' masking character:
  124. if (preg_match("/^suggest$/i", $operation) AND ($mainRelation != "exact"))
  125. $searchTerm = preg_replace("/([$word]+)(?![?*^])/$patternModifiers", "\\1*", $searchTerm);
  126. // escape meta characters (including '/' that is used as delimiter for the PCRE replace functions below and which gets passed as second argument):
  127. $searchTerm = preg_quote($searchTerm, "/"); // escape special regular expression characters: . \ + * ? [ ^ ] $ ( ) { } = ! < > | :
  128. // account for CQL anchoring ('^') and masking ('*' and '?') characters:
  129. // NOTE: in the code block above we quote everything to escape possible meta characters,
  130. // so all special chars in the block below have to be matched in their escaped form!
  131. // (The expression '\\\\' in the patterns below describes only *one* backslash! -> '\'.
  132. // The reason for this is that before the regex engine can interpret the \\ into \, PHP interprets it.
  133. // Thus, you have to escape your backslashes twice: once for PHP, and once for the regex engine.)
  134. //
  135. // more info about masking characters in CQL: <http://zing.z3950.org/cql/intro.html#6>
  136. // more info about word anchoring in CQL: <http://zing.z3950.org/cql/intro.html#6.1>
  137. // recognize any anchor at the beginning of a search term (like '^foo'):
  138. // (in CQL, a word beginning with ^ must be the first in its field)
  139. $searchTerm = preg_replace('/(^| )\\\\\^/', '\\1^', $searchTerm);
  140. // convert any anchor at the end of a search term (like 'foo^') to the correct MySQL variant ('foo$'):
  141. // (in CQL, a word ending with ^ must be the last in its field)
  142. $searchTerm = preg_replace('/\\\\\^( |$)/', '$\\1', $searchTerm);
  143. // recognize any masking ('*' and '?') characters:
  144. // Note: by "character" we do refer to *word* characters here, i.e., any character that is not a space or punctuation character (see below);
  145. // however, I'm not sure if the masking characters '*' and '?' should also include non-word characters!
  146. $searchTerm = preg_replace('/(?<!\\\\)\\\\\*/', '[^[:space:][:punct:]]*', $searchTerm); // a single asterisk ('*') is used to mask zero or more characters
  147. $searchTerm = preg_replace('/(?<!\\\\)\\\\\?/', '[^[:space:][:punct:]]', $searchTerm); // a single question mark ('?') is used to mask a single character, thus N consecutive question-marks means mask N characters
  148. // ----------------
  149. // construct the WHERE clause:
  150. $whereClausePart = $indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName]; // start WHERE clause with field name
  151. if ($mainRelation == "all") // matches full words (not sub-strings); 'all' means "all of these words"
  152. {
  153. if (preg_match("/ /", $searchTerm))
  154. {
  155. $searchTermArray = preg_split("/ +/", $searchTerm);
  156. foreach ($searchTermArray as $searchTermItem)
  157. $whereClauseSubPartsArray[] = " RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTermItem . "([[:space:][:punct:]]|$)");
  158. // NOTE: For word-matching relations (like 'all', 'any' or '=') we could also use word boundaries which would be more (too?) restrictive:
  159. //
  160. // [[:<:]] , [[:>:]]
  161. //
  162. // They match the beginning and end of words, respectively. A word is a sequence of word characters that is not preceded by or
  163. // followed by word characters. A word character is an alphanumeric character in the alnum class or an underscore (_).
  164. $whereClausePart .= implode(" AND " . $indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName], $whereClauseSubPartsArray);
  165. }
  166. else
  167. $whereClausePart .= " RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTerm . "([[:space:][:punct:]]|$)");
  168. }
  169. elseif ($mainRelation == "any") // matches full words (not sub-strings); 'any' means "any of these words"
  170. {
  171. $searchTerm = splitAndMerge("/ +/", "|", $searchTerm); // function 'splitAndMerge()' is defined in 'include.inc.php'
  172. $whereClausePart .= " RLIKE " . quote_smart("(^|[[:space:][:punct:]])(" . $searchTerm . ")([[:space:][:punct:]]|$)");
  173. }
  174. elseif ($mainRelation == "exact") // 'exact' is used for exact string matching, i.e., it matches field contents exactly
  175. $whereClausePart .= " = " . quote_smart($searchTerm);
  176. elseif ($mainRelation == "within") // matches a range (i.e. requires two space-separated dimensions)
  177. {
  178. if (preg_match("/[^ ]+ [^ ]+/", $searchTerm))
  179. {
  180. $searchTermArray = preg_split("/ +/", $searchTerm);
  181. $whereClausePart .= " >= " . quote_smart($searchTermArray[0]) . " AND " . $indexNamesArray[$contextSet . $contextSetIndexConnector . $indexName] . " <= " . quote_smart($searchTermArray[1]);
  182. }
  183. else
  184. {
  185. returnDiagnostic(36, "Search term requires two space-separated dimensions. Example: dc.date within \"2004 2005\"");
  186. exit;
  187. }
  188. }
  189. elseif ($mainRelation == "=") // matches full words (not sub-strings); '=' is used for word adjacency, the words appear in that order with no others intervening
  190. $whereClausePart .= " RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTerm . "([[:space:][:punct:]]|$)");
  191. elseif ($mainRelation == "<>") // does this also match full words (and not sub-strings) ?:-/
  192. $whereClausePart .= " NOT RLIKE " . quote_smart("(^|[[:space:][:punct:]])" . $searchTerm . "([[:space:][:punct:]]|$)");
  193. elseif ($mainRelation == "<")
  194. $whereClausePart .= " < " . quote_smart($searchTerm);
  195. elseif ($mainRelation == "<=")
  196. $whereClausePart .= " <= " . quote_smart($searchTerm);
  197. elseif ($mainRelation == ">")
  198. $whereClausePart .= " > " . quote_smart($searchTerm);
  199. elseif ($mainRelation == ">=")
  200. $whereClausePart .= " >= " . quote_smart($searchTerm);
  201. $searchSubArray1[] = array("_boolean" => "",
  202. "_query" => $whereClausePart);
  203. }
  204. // --------------------------------
  205. else // '$sruQuery' was empty -> return all records:
  206. {
  207. $searchSubArray1[] = array("_boolean" => "",
  208. "_query" => "serial RLIKE " . quote_smart(".+"));
  209. }
  210. // --------------------------------
  211. if (!empty($searchSubArray1))
  212. $searchArray[] = array("_boolean" => "",
  213. "_query" => $searchSubArray1);
  214. return $searchArray;
  215. }
  216. // -------------------------------------------------------------------------------------------------------------------
  217. // Add a metadata element to the given object:
  218. // As an example, the function call 'addMetaElement($object, "dc", "title", array("lang" => "en"), "this is a title")'
  219. // would add '<dc:title lang="en">this is a title</dc:title>' as a new branch to the given '$object'.
  220. //
  221. // TODO: expand function so that it can be also used for formats other than XML (e.g. HTML)
  222. function addMetaElement(&$object, $namespace, $elementName, $elementAttributeArray, $elementContent, $elementType = "", $format = "xml")
  223. {
  224. $addStatus = false;
  225. if (!empty($elementName) AND !empty($elementContent))
  226. {
  227. // Preprocess element contents (if necessary):
  228. // - 'creator', 'contributor':
  229. if (preg_match("/^(creator|contributor)$/", $elementName))
  230. $elementContent = getPersons($elementContent); // get an array of all creators (i.e. authors) or contributors (e.g. editors)
  231. // - 'identifier':
  232. // NOTE: should we support any other identifiers from the "info" URI scheme?
  233. // see <http://info-uri.info/registry/OAIHandler?verb=ListRecords&metadataPrefix=oai_dc>
  234. // - DOI:
  235. elseif ($elementName == "identifier" AND $elementType == "doi")
  236. $elementContent = "info:doi/" . $elementContent;
  237. // - PMID:
  238. elseif ($elementName == "identifier" AND $elementType == "pmid")
  239. {
  240. // extract any PubMed ID from the given '$elementContent':
  241. // NOTE: should this better be done in the calling function?
  242. $pubmedID = preg_replace("/.*?PMID *: *(\d+).*/i", "\\1", $elementContent);
  243. $elementContent = "info:pmid/" . $pubmedID;
  244. }
  245. // - arXiv:
  246. elseif ($elementName == "identifier" AND $elementType == "arxiv")
  247. {
  248. // extract any arXiv ID from the given '$elementContent':
  249. // NOTE: see note for PMID
  250. $arxivID = preg_replace("/.*?arXiv *: *([^ ;]+).*/i", "\\1", $elementContent);
  251. $elementContent = "info:arxiv/" . $arxivID;
  252. }
  253. // - ISBN:
  254. // NOTE: we could also output the ISBN or ISSN as a value URI within a
  255. // 'dcterms:isPartOf' relation property, e.g.:
  256. // '<dcterms:isPartOf>urn:ISSN:0740-8188</dcterms:isPartOf>'
  257. elseif ($elementName == "identifier" AND $elementType == "isbn")
  258. $elementContent = "urn:ISBN:" . $elementContent;
  259. // - ISSN:
  260. // NOTE: see note for ISBN above
  261. elseif ($elementName == "identifier" AND $elementType == "issn")
  262. $elementContent = "urn:ISSN:" . $elementContent;
  263. // - OpenURL:
  264. elseif ($elementName == "identifier" AND $elementType == "openurl")
  265. {
  266. if (!preg_match("/^openurl:/", $elementContent))
  267. $elementContent = "openurl:" . $elementContent; // use "openurl:" prefix if doesn't already exist in the given OpenURL
  268. }
  269. // - URL:
  270. // NOTE: the 'url:' prefix is non-standard, is there a better way to
  271. // include a permanent URL for a record in Simple Dublin Core XML output?
  272. elseif ($elementName == "identifier" AND $elementType == "url")
  273. $elementContent = "url:" . $elementContent;
  274. // - Cite key:
  275. // NOTE: the 'citekey:' prefix is non-standard, is there a better way to
  276. // include the cite key in Simple Dublin Core XML output?
  277. elseif ($elementName == "identifier" AND $elementType == "citekey")
  278. $elementContent = "citekey:" . $elementContent;
  279. // - Bibliographic citation:
  280. // NOTE: the 'citation:' prefix is non-standard, is there a better way to
  281. // include the bibliographic citation in Simple Dublin Core XML output?
  282. elseif ($elementName == "identifier" AND $elementType == "citation")
  283. $elementContent = "citation:" . $elementContent;
  284. // - 'source':
  285. // - Series:
  286. // NOTE: the 'series:' prefix is non-standard, is there a better way to
  287. // include series information in Simple Dublin Core XML output?
  288. elseif ($elementName == "source" AND $elementType == "series")
  289. $elementContent = "series:" . $elementContent;
  290. // - ISSN:
  291. // NOTE: see note for ISBN above
  292. elseif ($elementName == "source" AND $elementType == "issn")
  293. $elementContent = "urn:ISSN:" . $elementContent;
  294. // - 'relation':
  295. // - URL:
  296. // NOTE: the 'url:' prefix is non-standard, is there a better way to
  297. // include a permanent URL for a record in Simple Dublin Core XML output?
  298. elseif ($elementName == "relation" AND $elementType == "url")
  299. $elementContent = "url:" . $elementContent;
  300. // - FILE:
  301. // NOTE: the 'file:' prefix is non-standard, is there a better way to
  302. // include an URL to a file representing this record in Simple Dublin Core XML output?
  303. elseif ($elementName == "relation" AND $elementType == "file")
  304. $elementContent = "file:" . $elementContent;
  305. // - 'type':
  306. elseif ($elementName == "type")
  307. {
  308. if (preg_match("/^((Simple|oai)?[- _]?(dc|Dublin[- _]?Core)[- _]?(terms)?)$/i", $namespace))
  309. {
  310. // Map refbase types to the corresponding eprint/resource types suggested for Simple
  311. // Dublin Core (<http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/#type>):
  312. $dcTypesArray = mapDCTypes();
  313. // NOTE: for '$elementName="type"', variable '$elementType' is supposed to contain the
  314. // thesis type from the refbase 'thesis' field (e.g. "Ph.D. thesis")
  315. if (isset($dcTypesArray[$elementContent]) AND empty($elementType))
  316. $elementContent = $dcTypesArray[$elementContent];
  317. elseif (!empty($elementType))
  318. $elementContent = $dcTypesArray["Thesis"];
  319. }
  320. }
  321. // - 'subject':
  322. if ($elementName == "subject")
  323. $elementContent = preg_split("/\s*;\s*/", $elementContent, -1, PREG_SPLIT_NO_EMPTY); // get an array of all keywords
  324. // - 'language':
  325. // TODO: convert to ISO notation (i.e. "en" instead of "English", etc)
  326. // see <http://www.loc.gov/standards/iso639-2/php/code_list.php>
  327. if ($elementName == "language")
  328. $elementContent = preg_split("/\s*[;,]\s*/", $elementContent, -1, PREG_SPLIT_NO_EMPTY); // get an array of all languages
  329. // Prefix element name with given namespace:
  330. if (!empty($namespace))
  331. $elementName = $namespace . ":" . $elementName;
  332. // Add metadata element(s) to the given object:
  333. if (is_array($elementContent)) // add each array item as a new element:
  334. {
  335. foreach ($elementContent as $singleElement)
  336. addNewBranch($object, $elementName, $elementAttributeArray, $singleElement);
  337. }
  338. else // add string in '$elementContent' as a new element:
  339. addNewBranch($object, $elementName, $elementAttributeArray, $elementContent);
  340. $addStatus = true;
  341. }
  342. return $addStatus;
  343. }
  344. // --------------------------------------------------------------------
  345. // Split a string of person names (authors/editors) into an array:
  346. function getPersons($personString, $standardizePersonNames = true, $betweenNamesDelim = "/ *; */", $nameGivenDelim = "/ *, */", $newBetweenGivensDelim = ".")
  347. {
  348. if ($standardizePersonNames)
  349. {
  350. // NOTE: We standardize person names (e.g. add dots between initials if missing) in an attempt to adhere to
  351. // the recommendations given at <http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/#creator>
  352. //
  353. // Call the 'reArrangeAuthorContents()' function (defined in 'include.inc.php') in order to re-order contents of the author field. Required Parameters:
  354. // 1. input: contents of the author field
  355. // 2. input: boolean value that specifies whether the author's family name comes first (within one author) in the source string
  356. // ('true' means that the family name is followed by the given name (or initials), 'false' if it's the other way around)
  357. //
  358. // 3. input: pattern describing old delimiter that separates different authors
  359. // 4. output: for all authors except the last author: new delimiter that separates different authors
  360. // 5. output: for the last author: new delimiter that separates the last author from all other authors
  361. //
  362. // 6. input: pattern describing old delimiter that separates author name & initials (within one author)
  363. // 7. output: for the first author: new delimiter that separates author name & initials (within one author)
  364. // 8. output: for all authors except the first author: new delimiter that separates author name & initials (within one author)
  365. // 9. output: new delimiter that separates multiple initials (within one author)
  366. // 10. output: for the first author: boolean value that specifies if initials go *before* the author's name ['true'], or *after* the author's name ['false'] (which is the default in the db)
  367. // 11. output: for all authors except the first author: boolean value that specifies if initials go *before* the author's name ['true'], or *after* the author's name ['false'] (which is the default in the db)
  368. // 12. output: boolean value that specifies whether an author's full given name(s) shall be shortened to initial(s)
  369. //
  370. // 13. output: if the total number of authors is greater than the given number (integer >= 1), only the number of authors given in (14) will be included in the citation along with the string given in (15); keep empty if all authors shall be returned
  371. // 14. output: number of authors (integer >= 1) that is included in the citation if the total number of authors is greater than the number given in (13); keep empty if not applicable
  372. // 15. output: string that's appended to the number of authors given in (14) if the total number of authors is greater than the number given in (13); the actual number of authors can be printed by including '__NUMBER_OF_AUTHORS__' (without quotes) within the string
  373. //
  374. // 16. output: boolean value that specifies whether the re-ordered string shall be returned with higher ASCII chars HTML encoded
  375. $personString = reArrangeAuthorContents($personString, // 1.
  376. true, // 2.
  377. $betweenNamesDelim, // 3.
  378. "; ", // 4.
  379. "; ", // 5.
  380. $nameGivenDelim, // 6.
  381. ", ", // 7.
  382. ", ", // 8.
  383. $newBetweenGivensDelim, // 9.
  384. false, // 10.
  385. false, // 11.
  386. true, // 12.
  387. "", // 13.
  388. "", // 14.
  389. "", // 15.
  390. false); // 16.
  391. $betweenNamesDelim = "/\s*;\s*/";
  392. }
  393. $nameArray = array();
  394. if (!preg_match("#^/.*/$#", $betweenNamesDelim))
  395. $betweenNamesDelim = "/" . $betweenNamesDelim . "/"; // add search pattern delimiters
  396. $nameArray = preg_split($betweenNamesDelim, $personString, -1, PREG_SPLIT_NO_EMPTY); // get a list of all authors/editors
  397. return $nameArray;
  398. }
  399. // -------------------------------------------------------------------------------------------------------------------
  400. // Map CQL indexes to refbase field names:
  401. function mapCQLIndexes()
  402. {
  403. // TODO: - add support for the OAI indexes 'oai.identifier' and 'oai.datestamp'
  404. // - the CQL indexes 'creationDate' and 'lastModificationDate'
  405. // contain both date & time info so this needs to be parsed into two
  406. // refbase fields (which isn't done yet!)
  407. // - if no context set & index name are given in the query, we should search
  408. // the user's preferred list of "main fields" by default! (cql.serverChoice)
  409. $indexNamesArray = array("dc.creator" => "author", // "CQL context_set.index_name" => "refbase field name"
  410. "dc.title" => "title",
  411. "dc.date" => "year",
  412. "dc.language" => "language",
  413. "dc.description" => "abstract",
  414. "dc.contributor" => "editor",
  415. "dc.subject" => "keywords",
  416. "dc.format" => "medium",
  417. "dc.type" => "type",
  418. "dc.publisher" => "publisher",
  419. "dc.coverage" => "area",
  420. // "bath.name" => "author",
  421. // "bath.topicalSubject" => "keywords",
  422. "bath.isbn" => "isbn",
  423. "bath.issn" => "issn",
  424. "bath.corporateName" => "corporate_author",
  425. "bath.conferenceName" => "conference",
  426. "bath.notes" => "notes",
  427. "rec.identifier" => "serial",
  428. "rec.creationDate" => "created_date-created_time", // see TODO note above
  429. "rec.creationAgentName" => "created_by",
  430. "rec.lastModificationDate" => "modified_date-modified_time", // see TODO note above
  431. "rec.lastModificationAgentName" => "modified_by",
  432. "bib.citekey" => "cite_key",
  433. "oai.identifier" => "serial",
  434. // "oai.datestamp" => "modified_date-modified_time", // see TODO note above (same as 'rec.lastModificationDate')
  435. "cql.serverChoice" => "keywords", // TODO: the special index 'main_fields' should resolve to 'cql.serverChoice', and that, in turn, should resolve to the user's preferred list of "main fields";
  436. // alternatively, function 'parseCQL()' could map 'main_fields' to the user's preferred list of "main fields" -- and 'cql.serverChoice' would just resolve to a single field (as specified here)
  437. "main_fields" => "main_fields", // NOTE: the special index 'main_fields' currently only works for OpenSearch search suggestions, otherwise we'll fall back to 'cql.serverChoice'
  438. "author" => "author", // for indexes that have no public context set we simply accept refbase field names
  439. "title" => "title",
  440. "year" => "year",
  441. "publication" => "publication",
  442. "abbrev_journal" => "abbrev_journal",
  443. "volume" => "volume",
  444. "issue" => "issue",
  445. "pages" => "pages",
  446. "address" => "address",
  447. "corporate_author" => "corporate_author",
  448. "keywords" => "keywords",
  449. "abstract" => "abstract",
  450. "publisher" => "publisher",
  451. "place" => "place",
  452. "editor" => "editor",
  453. "language" => "language",
  454. "summary_language" => "summary_language",
  455. "orig_title" => "orig_title",
  456. "series_editor" => "series_editor",
  457. "series_title" => "series_title",
  458. "abbrev_series_title" => "abbrev_series_title",
  459. "series_volume" => "series_volume",
  460. "series_issue" => "series_issue",
  461. "edition" => "edition",
  462. "issn" => "issn",
  463. "isbn" => "isbn",
  464. "medium" => "medium",
  465. "area" => "area",
  466. "expedition" => "expedition",
  467. "conference" => "conference",
  468. "notes" => "notes",
  469. "approved" => "approved",
  470. "location" => "location",
  471. "call_number" => "call_number",
  472. "serial" => "serial",
  473. "type" => "type",
  474. "thesis" => "thesis",
  475. "file" => "file",
  476. "url" => "url",
  477. "doi" => "doi",
  478. "contribution_id" => "contribution_id",
  479. "online_publication" => "online_publication",
  480. "online_citation" => "online_citation",
  481. "created_date-created_time" => "created_date-created_time", // see TODO note above
  482. "created_by" => "created_by",
  483. "modified_date-modified_time" => "modified_date-modified_time", // see TODO note above
  484. "modified_by" => "modified_by",
  485. "orig_record" => "orig_record",
  486. "marked" => "marked", // in case of 'sru.php', querying for user-specific fields requires that the 'x-...authenticationToken' is given in the SRU query
  487. "copy" => "copy",// for 'opensearch.php', querying of user-specific fields will only work with a user being logged in
  488. "selected" => "selected",
  489. "user_keys" => "user_keys",
  490. "user_notes" => "user_notes",
  491. "user_file" => "user_file",
  492. "user_groups" => "user_groups",
  493. "related" => "related",
  494. "cite_key" => "cite_key" // currently, only the user-specific 'cite_key' field can be queried by every user using 'sru.php'
  495. );
  496. return $indexNamesArray;
  497. }
  498. // -------------------------------------------------------------------------------------------------------------------
  499. // Map SRU/W diagnostic numbers to their corresponding messages:
  500. // Spec: <http://www.loc.gov/standards/sru/specs/diagnostics.html>,
  501. // <http://www.loc.gov/standards/sru/resources/diagnostics-list.html>
  502. function mapSRWDiagnostics()
  503. {
  504. $diagMessagesArray = array(1 => "General system error", // Details: Debugging information (traceback)
  505. 2 => "System temporarily unavailable",
  506. 3 => "Authentication error",
  507. 4 => "Unsupported operation",
  508. 5 => "Unsupported version", // Details: Highest version supported
  509. 6 => "Unsupported parameter value", // Details: Name of parameter
  510. 7 => "Mandatory parameter not supplied", // Details: Name of missing parameter
  511. 8 => "Unsupported Parameter", // Details: Name of the unsupported parameter
  512. 10 => "Query syntax error",
  513. 15 => "Unsupported context set", // Details: URI or short name of context set
  514. 16 => "Unsupported index", // Details: Name of index
  515. 24 => "Unsupported combination of relation and term",
  516. 36 => "Term in invalid format for index or relation",
  517. 39 => "Proximity not supported",
  518. 50 => "Result sets not supported",
  519. 61 => "First record position out of range",
  520. 64 => "Record temporarily unavailable",
  521. 65 => "Record does not exist",
  522. 66 => "Unknown schema for retrieval", // Details: Schema URI or short name (of the unsupported one)
  523. 67 => "Record not available in this schema", // Details: Schema URI or short name
  524. 68 => "Not authorised to send record",
  525. 69 => "Not authorised to send record in this schema",
  526. 70 => "Record too large to send", // Details: Maximum record size
  527. 71 => "Unsupported record packing",
  528. 72 => "XPath retrieval unsupported",
  529. 80 => "Sort not supported",
  530. 110 => "Stylesheets not supported"
  531. );
  532. return $diagMessagesArray;
  533. }
  534. // -------------------------------------------------------------------------------------------------------------------
  535. // Map refbase types to the corresponding eprint/resource types suggested for Simple Dublin Core[1]:
  536. // [1]: <http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/#type>
  537. // for mappings marked with (*), the above article doesn't offer a type that sufficiently matches the refbase type
  538. function mapDCTypes()
  539. {
  540. $dcTypesArray = array("Journal Article" => "JournalArticle",
  541. "Abstract" => "Abstract", // (*)
  542. "Book Chapter" => "BookChapter",
  543. "Book Whole" => "Book",
  544. "Conference Article" => "ConferencePaper",
  545. "Conference Volume" => "ConferenceProceedings",
  546. "Journal" => "Journal", // (*)
  547. "Magazine Article" => "MagazineArticle", // (*)
  548. "Manual" => "Manual", // (*)
  549. "Manuscript" => "Preprint",
  550. "Map" => "Map", // (*)
  551. "Miscellaneous" => "Other",
  552. "Newspaper Article" => "NewsArticle",
  553. "Patent" => "Patent", // (*)
  554. "Report" => "TechnicalReport",
  555. "Software" => "Software", // (*)
  556. // "" => "ConferencePoster",
  557. // "" => "InCollection",
  558. // "" => "OnlineJournalArticle",
  559. "Thesis" => "Thesis" // since refbase currently doesn't use a 'Thesis' type, this has to be dealt with in the calling function
  560. );
  561. return $dcTypesArray;
  562. }
  563. // --------------------------------------------------------------------
  564. ?>