You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

987 lines
44 KiB

  1. <?php
  2. // Project: Web Reference Database (refbase) <http://www.refbase.net>
  3. // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
  4. // original author(s).
  5. //
  6. // This code is distributed in the hope that it will be useful,
  7. // but WITHOUT ANY WARRANTY. Please see the GNU General Public
  8. // License for more details.
  9. //
  10. // File: ./includes/modsxml.inc.php
  11. // Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/includes/modsxml.inc.php $
  12. // Author(s): Richard Karnesky <mailto:karnesky@gmail.com>
  13. //
  14. // Created: 02-Oct-04, 12:00
  15. // Modified: $Date: 2017-04-13 02:00:18 +0000 (Thu, 13 Apr 2017) $
  16. // $Author: karnesky $
  17. // $Revision: 1416 $
  18. // This include file contains functions that'll export records to MODS XML.
  19. // Requires ActiveLink PHP XML Package, which is available under the GPL from:
  20. // <http://www.active-link.com/software/>
  21. // Incorporate some include files:
  22. include_once 'includes/transtab_refbase_unicode.inc.php'; // include refbase markup -> Unicode search & replace patterns
  23. // Import the ActiveLink Packages
  24. require_once("classes/include.php");
  25. import("org.active-link.xml.XML");
  26. import("org.active-link.xml.XMLDocument");
  27. // For more on MODS, see:
  28. // <http://www.loc.gov/standards/mods/>
  29. // <http://www.scripps.edu/~cdputnam/software/bibutils/>
  30. // TODO:
  31. // Stuff in '// NOTE' comments
  32. // There's a lot of overlap in the portions that depend on types. I plan
  33. // on refactoring this, so that they can make calls to the same function.
  34. // I don't know what to do with some fields
  35. // See <http://www.loc.gov/standards/mods/v3/mods-3-0-outline.html>
  36. // - Require clever parsing
  37. // - address (?name->affiliation?)
  38. // - medium (?typeOfResource?)
  39. // - Don't know how refbase users use these
  40. // - area (could be either topic or geographic, so we do nothing)
  41. // - expedition
  42. // --------------------------------------------------------------------
  43. // Generates relatedItem branch for series
  44. function serialBranch($series_editor, $series_title, $abbrev_series_title,
  45. $series_volume, $series_issue) {
  46. // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
  47. global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct,
  48. $space, $upper, $word, $patternModifiers;
  49. $series = new XMLBranch("relatedItem");
  50. $series->setTagAttribute("type", "series");
  51. // title
  52. if (!empty($series_title))
  53. $series->setTagContent(encodeXMLField('series_title', $series_title), "relatedItem/titleInfo/title");
  54. // abbrev. title
  55. if (!empty($abbrev_series_title)) {
  56. $titleabbrev = NEW XMLBranch("titleInfo");
  57. $titleabbrev->setTagAttribute("type", "abbreviated");
  58. $titleabbrev->setTagContent(encodeXMLField('abbrev_series_title', $abbrev_series_title), "titleInfo/title");
  59. $series->addXMLBranch($titleabbrev);
  60. }
  61. // editor
  62. if (!empty($series_editor)) {
  63. if (preg_match("/ *\(eds?\)$/", $series_editor))
  64. $series_editor = preg_replace("/[ \r\n]*\(eds?\)/i", "", $series_editor);
  65. $nameArray = separateNames("series_editor", "/\s*;\s*/", "/\s*,\s*/",
  66. "/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
  67. $series_editor, "personal", "editor");
  68. foreach ($nameArray as $singleName)
  69. $series->addXMLBranch($singleName);
  70. }
  71. // volume, issue
  72. if ((!empty($series_volume)) || (!empty($series_issue))) {
  73. $part = new XMLBranch("part");
  74. if (!empty($series_volume)) {
  75. $detailvolume = new XMLBranch("detail");
  76. $detailvolume->setTagContent(encodeXMLField('series_volume', $series_volume), "detail/number");
  77. $detailvolume->setTagAttribute("type", "volume");
  78. $part->addXMLBranch($detailvolume);
  79. }
  80. if (!empty($series_issue)) {
  81. $detailnumber = new XMLBranch("detail");
  82. $detailnumber->setTagContent(encodeXMLField('series_issue', $series_issue), "detail/number");
  83. $detailnumber->setTagAttribute("type", "issue");
  84. $part->addXMLBranch($detailnumber);
  85. }
  86. $series->addXMLBranch($part);
  87. }
  88. return $series;
  89. }
  90. // --------------------------------------------------------------------
  91. // Separates people's names and then those names into their functional parts:
  92. // {{Family1,{Given1-1,Given1-2}},{Family2,{Given2}}})
  93. // Adds these to an array of XMLBranches.
  94. function separateNames($rowFieldName, $betweenNamesDelim, $nameGivenDelim,
  95. $betweenGivensDelim, $names, $type, $role) {
  96. // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
  97. global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct,
  98. $space, $upper, $word, $patternModifiers;
  99. $nameArray = array();
  100. $nameArray = preg_split($betweenNamesDelim, $names); // get a list of all authors
  101. foreach ($nameArray as $singleName){
  102. $nameBranch = new XMLBranch("name");
  103. $nameBranch->setTagAttribute("type", $type);
  104. if (preg_match($nameGivenDelim, $singleName))
  105. list($singleNameFamily, $singleNameGivens) = preg_split($nameGivenDelim,
  106. $singleName);
  107. else {
  108. $singleNameFamily = $singleName;
  109. $singleNameGivens = "";
  110. }
  111. $nameFamilyBranch = new XMLBranch("namePart");
  112. $nameFamilyBranch->setTagAttribute("type", "family");
  113. $nameFamilyBranch->setTagContent(encodeXMLField($rowFieldName, $singleNameFamily));
  114. $nameBranch->addXMLBranch($nameFamilyBranch);
  115. if (!empty($singleNameGivens)) {
  116. // before splitting given names into their parts, we remove any non-word chars
  117. // between initials/forenames that are connected with a hyphen (which ensures
  118. // that they are kept together and that the hyphen is maintained):
  119. $singleNameGivens = preg_replace("/(?<=[$word])[^-$word]*([$dash])[^-$word]*(?=[$upper])/$patternModifiers",
  120. "\\1", $singleNameGivens);
  121. $singleNameGivenArray = preg_split($betweenGivensDelim, $singleNameGivens,
  122. -1, PREG_SPLIT_NO_EMPTY);
  123. foreach ($singleNameGivenArray as $singleNameGiven) {
  124. $nameGivenBranch = new XMLBranch("namePart");
  125. $nameGivenBranch->setTagAttribute("type", "given");
  126. $nameGivenBranch->setTagContent(encodeXMLField($rowFieldName, $singleNameGiven));
  127. $nameBranch->addXMLBranch($nameGivenBranch);
  128. }
  129. }
  130. $nameBranch->setTagContent(encodeXMLField('name_role', $role), "name/role/roleTerm");
  131. $nameBranch->setTagAttribute("authority", "marcrelator",
  132. "name/role/roleTerm");
  133. $nameBranch->setTagAttribute("type", "text", "name/role/roleTerm");
  134. array_push($nameArray, $nameBranch);
  135. }
  136. return $nameArray;
  137. }
  138. // --------------------------------------------------------------------
  139. function modsCollection($result) {
  140. global $contentTypeCharset; // these variables are defined in 'ini.inc.php'
  141. global $convertExportDataToUTF8;
  142. global $citeKeysArray; // '$citeKeysArray' is made globally available from
  143. // within this function
  144. // The array '$transtab_refbase_unicode' contains search & replace patterns
  145. // for conversion from refbase markup to Unicode entities.
  146. global $transtab_refbase_unicode; // defined in 'transtab_refbase_unicode.inc.php'
  147. global $fieldSpecificSearchReplaceActionsArray;
  148. // Individual records are objects and collections of records are strings
  149. $exportArray = array(); // array for individually exported records
  150. $citeKeysArray = array(); // array of cite keys (used to ensure uniqueness of
  151. // cite keys among all exported records)
  152. // Defines field-specific search & replace 'actions' that will be applied to all
  153. // those refbase fields that are listed in the corresponding 'fields' element:
  154. // (If you don't want to perform any search and replace actions, specify an empty
  155. // array, like: '$fieldSpecificSearchReplaceActionsArray = array();'.
  156. // Note that the search patterns MUST include the leading & trailing slashes --
  157. // which is done to allow for mode modifiers such as 'imsxU'.)
  158. $fieldSpecificSearchReplaceActionsArray = array();
  159. if ($convertExportDataToUTF8 == "yes")
  160. $fieldSpecificSearchReplaceActionsArray[] = array(
  161. 'fields' => array("title", "publication", "abbrev_journal", "address", "keywords", "abstract", "orig_title", "series_title", "abbrev_series_title", "notes"),
  162. 'actions' => $transtab_refbase_unicode
  163. );
  164. // Generate the export for each record and push them onto an array:
  165. while ($row = @ mysqli_fetch_array($result)) {
  166. // Export the current record as MODS XML
  167. $record = modsRecord($row);
  168. if (!empty($record)) // unless the record buffer is empty...
  169. array_push($exportArray, $record); // ...add it to an array of exports
  170. }
  171. $modsCollectionDoc = new XMLDocument();
  172. if (($convertExportDataToUTF8 == "yes") AND ($contentTypeCharset != "UTF-8"))
  173. $modsCollectionDoc->setEncoding("UTF-8");
  174. else
  175. $modsCollectionDoc->setEncoding($contentTypeCharset);
  176. $modsCollection = new XML("modsCollection");
  177. $modsCollection->setTagAttribute("xmlns", "http://www.loc.gov/mods/v3");
  178. foreach ($exportArray as $mods)
  179. $modsCollection->addXMLasBranch($mods);
  180. $modsCollectionDoc->setXML($modsCollection);
  181. $modsCollectionString = $modsCollectionDoc->getXMLString();
  182. return $modsCollectionString;
  183. }
  184. // --------------------------------------------------------------------
  185. // Returns an XML object (mods) of a single record
  186. function modsRecord($row) {
  187. global $databaseBaseURL; // these variables are defined in 'ini.inc.php'
  188. global $contentTypeCharset;
  189. global $fileVisibility;
  190. global $fileVisibilityException;
  191. global $filesBaseURL;
  192. global $convertExportDataToUTF8;
  193. // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
  194. global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct,
  195. $space, $upper, $word, $patternModifiers;
  196. $exportPrivate = True; // This will be a global variable or will be used
  197. // when modsRow is called and will determine if we
  198. // export user-specific data
  199. $exportRecordURL = True; // Specifies whether an attribution string containing
  200. // the URL to the refbase database record (and the last
  201. // modification date) shall be written to the notes branch.
  202. // Note that this string is required by the "-A|--append"
  203. // feature of the 'refbase' command line client
  204. // convert this record's modified date/time info to UNIX time stamp format:
  205. // => "date('D, j M Y H:i:s O')", e.g. "Sat, 15 Jul 2006 22:24:16 +0200"
  206. // function 'generateRFC2822TimeStamp()' is defined in 'include.inc.php'
  207. $currentDateTimeStamp = generateRFC2822TimeStamp($row['modified_date'], $row['modified_time']);
  208. // --- BEGIN TYPE * ---
  209. // |
  210. // | These apply to everything
  211. // this is a stupid hack that maps the names of the '$row' array keys to those used
  212. // by the '$formVars' array (which is required by function 'generateCiteKey()')
  213. // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys)
  214. $formVars = buildFormVarsArray($row); // function 'buildFormVarsArray()' is defined in 'include.inc.php'
  215. // generate or extract the cite key for this record
  216. // (note that charset conversion can only be done *after* the cite key has been generated,
  217. // otherwise cite key generation will produce garbled text!)
  218. $citeKey = generateCiteKey($formVars); // function 'generateCiteKey()' is defined in 'include.inc.php'
  219. // Create an XML object for a single record.
  220. $record = new XML("mods");
  221. $record->setTagAttribute("version", "3.2");
  222. if (!empty($citeKey))
  223. $record->setTagAttribute("ID", $citeKey);
  224. // titleInfo
  225. // Regular Title
  226. if (!empty($row['title']))
  227. $record->setTagContent(encodeXMLField('title', $row['title']), "mods/titleInfo/title");
  228. // Translated Title
  229. // NOTE: This field is excluded by the default cite SELECT method
  230. if (!empty($row['orig_title'])) {
  231. $orig_title = new XMLBranch("titleInfo");
  232. $orig_title->setTagAttribute("type", "translated");
  233. $orig_title->setTagContent(encodeXMLField('orig_title', $row['orig_title']), "titleInfo/title");
  234. $record->addXMLBranch($orig_title);
  235. }
  236. // name
  237. // author
  238. if (!empty($row['author'])) {
  239. if (preg_match("/ *\(eds?\)$/", $row['author'])) {
  240. $author = preg_replace("/[ \r\n]*\(eds?\)/i", "", $row['author']);
  241. $nameArray = separateNames("author", "/\s*;\s*/", "/\s*,\s*/",
  242. "/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
  243. $author, "personal", "editor");
  244. }
  245. else if ($row['type'] == "Map") {
  246. $nameArray = separateNames("author", "/\s*;\s*/", "/\s*,\s*/",
  247. "/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
  248. $row['author'], "personal", "cartographer");
  249. }
  250. else {
  251. $nameArray = separateNames("author", "/\s*;\s*/", "/\s*,\s*/",
  252. "/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
  253. $row['author'], "personal", "author");
  254. }
  255. foreach ($nameArray as $singleName) {
  256. $record->addXMLBranch($singleName);
  257. }
  258. }
  259. // originInfo
  260. if ((!empty($row['year'])) || (!empty($row['publisher'])) ||
  261. (!empty($row['place']))) {
  262. $origin = new XMLBranch("originInfo");
  263. // dateIssued
  264. if (!empty($row['year']))
  265. $origin->setTagContent(encodeXMLField('year', $row['year']), "originInfo/dateIssued");
  266. // Book Chapters and Journal Articles only have a dateIssued
  267. // (editions, places, and publishers are associated with the host)
  268. if (!preg_match("/^(Book Chapter|Journal Article)$/", $row['type'])) {
  269. // publisher
  270. if (!empty($row['publisher']))
  271. $origin->setTagContent(encodeXMLField('publisher', $row['publisher']), "originInfo/publisher");
  272. // place
  273. if (!empty($row['place'])) {
  274. $origin->setTagContent(encodeXMLField('place', $row['place']), "originInfo/place/placeTerm");
  275. $origin->setTagAttribute("type", "text",
  276. "originInfo/place/placeTerm");
  277. }
  278. // edition
  279. if (!empty($row['edition']))
  280. $origin->setTagContent(encodeXMLField('edition', $row['edition']), "originInfo/edition");
  281. }
  282. if ($origin->hasBranch())
  283. $record->addXMLBranch($origin);
  284. }
  285. // language
  286. if (!empty($row['language']))
  287. $record->setTagContent(encodeXMLField('language', $row['language']), "mods/language");
  288. // abstract
  289. // NOTE: This field is excluded by the default cite SELECT method
  290. if (!empty($row['abstract'])) {
  291. $abstract = new XMLBranch("abstract");
  292. $abstract->setTagContent(encodeXMLField('abstract', $row['abstract']));
  293. if (!empty($row['summary_language'])) {
  294. $abstract->setTagAttribute("lang", encodeXMLField('summary_language', $row['summary_language']));
  295. }
  296. $record->addXMLBranch($abstract);
  297. }
  298. // subject
  299. // keywords
  300. if (!empty($row['keywords'])) {
  301. $subjectArray = array();
  302. $subjectArray = preg_split("/\s*;\s*/", $row['keywords']); // "unrelated" keywords
  303. foreach ($subjectArray as $singleSubject) {
  304. $subjectBranch = new XMLBranch("subject");
  305. $topicArray = array();
  306. $topicArray = preg_split("/\s*,\s*/", $singleSubject); // "related" keywords
  307. foreach ($topicArray as $singleTopic) {
  308. $topicBranch = new XMLBranch("topic");
  309. $topicBranch->setTagContent(encodeXMLField('keywords', $singleTopic));
  310. $subjectBranch->addXMLBranch($topicBranch);
  311. }
  312. $record->addXMLBranch($subjectBranch);
  313. }
  314. }
  315. // user_keys
  316. // NOTE: a copy of the above. Needs to be a separate function later.
  317. if ((!empty($row['user_keys'])) && $exportPrivate) {
  318. $subjectArray = array();
  319. $subjectArray = preg_split("/\s*;\s*/", $row['user_keys']); // "unrelated" user_keys
  320. foreach ($subjectArray as $singleSubject) {
  321. $subjectBranch = new XMLBranch("subject");
  322. $topicArray = array();
  323. $topicArray = preg_split("/\s*,\s*/", $singleSubject); // "related" user_keys
  324. foreach ($topicArray as $singleTopic) {
  325. $topicBranch = new XMLBranch("topic");
  326. $topicBranch->setTagContent(encodeXMLField('user_keys', $singleTopic));
  327. $subjectBranch->addXMLBranch($topicBranch);
  328. }
  329. $record->addXMLBranch($subjectBranch);
  330. }
  331. }
  332. // user_groups
  333. // NOTE: a copy of the above. Needs to be a separate function later.
  334. if ((!empty($row['user_groups'])) && $exportPrivate) {
  335. $subjectArray = array();
  336. $subjectArray = preg_split("/\s*;\s*/", $row['user_groups']); // "unrelated" user_groups
  337. foreach ($subjectArray as $singleSubject) {
  338. $subjectBranch = new XMLBranch("subject");
  339. $topicArray = array();
  340. $topicArray = preg_split("/\s*,\s*/", $singleSubject); // "related" user_groups
  341. foreach ($topicArray as $singleTopic) {
  342. $topicBranch = new XMLBranch("topic");
  343. $topicBranch->setTagContent(encodeXMLField('user_groups', $singleTopic));
  344. $subjectBranch->addXMLBranch($topicBranch);
  345. }
  346. $record->addXMLBranch($subjectBranch);
  347. }
  348. }
  349. // notes
  350. if (!empty($row['notes']))
  351. $record->setTagContent(encodeXMLField('notes', $row['notes']), "mods/note");
  352. // user_notes
  353. if ((!empty($row['user_notes'])) && $exportPrivate) // replaces any generic notes
  354. $record->setTagContent(encodeXMLField('user_notes', $row['user_notes']), "mods/note");
  355. // refbase attribution string
  356. if ($exportRecordURL) {
  357. $attributionBranch = new XMLBranch("note");
  358. $attributionBranch->setTagContent("exported from refbase ("
  359. . $databaseBaseURL . "show.php?record=" . $row['serial']
  360. . "), last updated on " . $currentDateTimeStamp);
  361. $record->addXMLBranch($attributionBranch);
  362. }
  363. // typeOfResource
  364. // maps are 'cartographic', software is 'software, multimedia',
  365. // and everything else is 'text'
  366. $type = new XMLBranch("typeOfResource");
  367. if ($row['type'] == "Map") {
  368. $type->setTagContent("cartographic");
  369. }
  370. else if ($row['type'] == "Software") {
  371. $type->setTagContent("software, multimedia");
  372. }
  373. else {
  374. $type->setTagContent("text");
  375. }
  376. if ($row['type'] == "Manuscript") {
  377. $type->setTagAttribute("manuscript", "yes");
  378. }
  379. $record->addXMLBranch($type);
  380. // location
  381. // Physical Location
  382. // NOTE: This field is excluded by the default cite SELECT method
  383. // This should also be parsed later
  384. if (!empty($row['location'])) {
  385. $location = new XMLBranch("location");
  386. $locationArray = array();
  387. $locationArray = preg_split("/\s*;\s*/", $row['location']);
  388. foreach ($locationArray as $singleLocation) {
  389. $locationBranch = new XMLBranch("physicalLocation");
  390. $locationBranch->setTagContent(encodeXMLField('location', $singleLocation));
  391. $location->addXMLBranch($locationBranch);
  392. }
  393. $record->addXMLBranch($location);
  394. }
  395. // URL (also an identifier, see below)
  396. // NOTE: This field is excluded by the default cite SELECT method
  397. if (!empty($row['url'])) {
  398. $location = new XMLBranch("location");
  399. $location->setTagContent(encodeXMLField('url', $row['url']), "location/url");
  400. $record->addXMLBranch($location);
  401. }
  402. // Include a link to any corresponding FILE if one of the following conditions is met:
  403. // - the variable '$fileVisibility' (defined in 'ini.inc.php') is set to 'everyone'
  404. // - the variable '$fileVisibility' is set to 'login' AND the user is logged in
  405. // - the variable '$fileVisibility' is set to 'user-specific' AND the 'user_permissions' session variable contains 'allow_download'
  406. // - the array variable '$fileVisibilityException' (defined in 'ini.inc.php') contains a pattern (in array element 1) that matches the contents of the field given (in array element 0)
  407. if ($fileVisibility == "everyone" OR ($fileVisibility == "login" AND isset($_SESSION['loginEmail'])) OR ($fileVisibility == "user-specific" AND (isset($_SESSION['user_permissions']) AND preg_match("/allow_download/", $_SESSION['user_permissions']))) OR (!empty($fileVisibilityException) AND preg_match($fileVisibilityException[1], $row[$fileVisibilityException[0]])))
  408. {
  409. // file
  410. // Note that when converting MODS to Endnote or RIS, Bibutils will include the above
  411. // URL (if given), otherwise it'll take the URL from the 'file' field. I.e. for
  412. // Endnote or RIS, the URL to the PDF is only included if no regular URL is available.
  413. if (!empty($row['file'])) {
  414. $location = new XMLBranch("location");
  415. if (preg_match('#^(https?|ftp|file)://#i', $row['file'])) { // if the 'file' field contains a full URL (starting with "http://", "https://", "ftp://", or "file://")
  416. $URLprefix = ""; // we don't alter the URL given in the 'file' field
  417. }
  418. else { // if the 'file' field contains only a partial path (like 'polarbiol/10240001.pdf') or just a file name (like '10240001.pdf')
  419. // use the base URL of the standard files directory as prefix:
  420. if (preg_match('#^/#', $filesBaseURL)) // absolute path -> file dir is located outside of refbase root dir
  421. $URLprefix = 'http://' . $_SERVER['HTTP_HOST'] . $filesBaseURL;
  422. else // relative path -> file dir is located within refbase root dir
  423. $URLprefix = $databaseBaseURL . $filesBaseURL;
  424. }
  425. $location->setTagContent(encodeXMLField('file', $URLprefix . $row['file']), "location/url");
  426. $location->setTagAttribute("displayLabel", "Electronic full text", "location/url");
  427. // the 'access' attribute requires MODS v3.2 or greater:
  428. $location->setTagAttribute("access", "raw object", "location/url");
  429. $record->addXMLBranch($location);
  430. }
  431. }
  432. // identifier
  433. // url
  434. if (!empty($row['url'])) {
  435. $identifier = new XMLBranch("identifier");
  436. $identifier->setTagContent(encodeXMLField('url', $row['url']));
  437. $identifier->setTagAttribute("type", "uri");
  438. $record->addXMLBranch($identifier);
  439. }
  440. // doi
  441. if (!empty($row['doi'])) {
  442. $identifier = new XMLBranch("identifier");
  443. $identifier->setTagContent(encodeXMLField('doi', $row['doi']));
  444. $identifier->setTagAttribute("type", "doi");
  445. $record->addXMLBranch($identifier);
  446. }
  447. // pubmed
  448. // NOTE: Until refbase stores PubMed & arXiv IDs in a better way,
  449. // we extract them from the 'notes' field
  450. if (preg_match("/PMID *: *\d+/i", $row['notes'])) {
  451. $identifier = new XMLBranch("identifier");
  452. $identifier->setTagContent(preg_replace("/.*?PMID *: *(\d+).*/i", "\\1", $row['notes']));
  453. $identifier->setTagAttribute("type", "pubmed");
  454. $record->addXMLBranch($identifier);
  455. }
  456. // arxiv
  457. // NOTE: see note for pubmed
  458. if (preg_match("/arXiv *: *[^ ;]+/i", $row['notes'])) {
  459. $identifier = new XMLBranch("identifier");
  460. $identifier->setTagContent(preg_replace("/.*?arXiv *: *([^ ;]+).*/i", "\\1", $row['notes']));
  461. $identifier->setTagAttribute("type", "arxiv");
  462. $record->addXMLBranch($identifier);
  463. }
  464. // cite_key
  465. if (!empty($citeKey)) {
  466. $identifier = new XMLBranch("identifier");
  467. $identifier->setTagContent(encodeXMLField('cite_key', $citeKey));
  468. $identifier->setTagAttribute("type", "citekey");
  469. $record->addXMLBranch($identifier);
  470. }
  471. // local--CALL NUMBER
  472. // NOTE: This should really be parsed!
  473. if (!empty($row['call_number'])) {
  474. $identifierArray = array();
  475. $identifierArray = preg_split("/\s*;\s*/", $row['call_number']);
  476. foreach ($identifierArray as $singleIdentifier) {
  477. if (!preg_match("/@\s*$/", $singleIdentifier)) {
  478. $identifierBranch = new XMLBranch("identifier");
  479. $identifierBranch->setTagContent(encodeXMLField('call_number', $singleIdentifier));
  480. $identifierBranch->setTagAttribute("type", "local");
  481. $record->addXMLBranch($identifierBranch);
  482. }
  483. }
  484. }
  485. // --- END TYPE * ---
  486. // -----------------------------------------
  487. // --- BEGIN TYPE != ABSTRACT || BOOK CHAPTER || CONFERENCE ARTICLE || JOURNAL ARTICLE || MAGAZINE ARTICLE || NEWSPAPER ARTICLE ---
  488. // |
  489. // | BOOK WHOLE, CONFERENCE VOLUME, JOURNAL, MANUAL, MANUSCRIPT, MAP, MISCELLANEOUS, PATENT,
  490. // | REPORT, and SOFTWARE have some info as a branch off the root, whereas ABSTRACT, BOOK CHAPTER,
  491. // | CONFERENCE ARTICLE, JOURNAL ARTICLE, MAGAZINE ARTICLE and NEWSPAPER ARTICLE place it in the relatedItem branch.
  492. if (!preg_match("/^(Abstract|Book Chapter|Conference Article|Journal Article|Magazine Article|Newspaper Article)$/", $row['type'])) {
  493. // name
  494. // editor
  495. if (!empty($row['editor'])) {
  496. $editor=$row['editor'];
  497. $author=$row['author'];
  498. if (preg_match("/ *\(eds?\)$/", $editor))
  499. $editor = preg_replace("/[ \r\n]*\(eds?\)/i", "", $editor);
  500. if (preg_match("/ *\(eds?\)$/", $author))
  501. $author = preg_replace("/[ \r\n]*\(eds?\)/i", "", $author);
  502. if ($editor != $author) {
  503. $nameArray = separateNames("editor", "/\s*;\s*/", "/\s*,\s*/",
  504. "/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
  505. $editor, "personal", "editor");
  506. foreach ($nameArray as $singleName)
  507. $record->addXMLBranch($singleName);
  508. }
  509. }
  510. // corporate
  511. // (we treat a 'corporate_author' similar to how Bibutils converts the BibTeX
  512. // 'organization' field to MODS XML, i.e., we add a separate name element with
  513. // a 'type="corporate"' attribute and an 'author' role (or a 'degree grantor'
  514. // role in case of theses))
  515. if (!empty($row['corporate_author'])) {
  516. $nameBranch = new XMLBranch("name");
  517. $nameBranch->setTagAttribute("type", "corporate");
  518. $nameBranch->setTagContent(encodeXMLField('corporate_author', $row['corporate_author']), "name/namePart");
  519. if (empty($row['thesis']))
  520. $nameBranch->setTagContent("author", "name/role/roleTerm");
  521. else // thesis
  522. $nameBranch->setTagContent("degree grantor", "name/role/roleTerm");
  523. $nameBranch->setTagAttribute("authority", "marcrelator", "name/role/roleTerm");
  524. $nameBranch->setTagAttribute("type", "text", "name/role/roleTerm");
  525. $record->addXMLBranch($nameBranch);
  526. }
  527. // conference
  528. if (!empty($row['conference'])) {
  529. $nameBranch = new XMLBranch("name");
  530. $nameBranch->setTagAttribute("type", "conference");
  531. $nameBranch->setTagContent(encodeXMLField('conference', $row['conference']), "name/namePart");
  532. $record->addXMLBranch($nameBranch);
  533. }
  534. // genre
  535. // type
  536. // NOTE: Is there a better MARC genre[1] for 'manuscript?'
  537. // [1]<http://www.loc.gov/marc/sourcecode/genre/genrelist.html>
  538. $genremarc = new XMLBranch("genre");
  539. $genre = new XMLBranch("genre");
  540. // NOTE: According to the MARC "Source Codes for Genre"[1]
  541. // the MARC authority should be 'marcgt', not 'marc'.
  542. // [1]<http://www.loc.gov/marc/sourcecode/genre/genresource.html>
  543. $genremarc->setTagAttribute("authority", "marcgt");
  544. if (empty($row['thesis'])) { // theses will get their own genre (see below)
  545. if ($row['type'] == "Book Whole") {
  546. $record->setTagContent("monographic",
  547. "mods/originInfo/issuance");
  548. $genremarc->setTagContent("book");
  549. }
  550. else if ($row['type'] == "Conference Volume") {
  551. $genremarc->setTagContent("conference publication");
  552. }
  553. else if ($row['type'] == "Journal") {
  554. $genremarc->setTagContent("periodical");
  555. $genre->setTagContent("academic journal");
  556. }
  557. else if ($row['type'] == "Manual") { // should we set '<issuance>monographic' here (and for the ones below)?
  558. $genremarc->setTagContent("instruction");
  559. $genre->setTagContent("manual");
  560. }
  561. else if ($row['type'] == "Manuscript") {
  562. $genremarc->setTagContent("loose-leaf");
  563. $genre->setTagContent("manuscript");
  564. }
  565. else if ($row['type'] == "Map") {
  566. $genremarc->setTagContent("map");
  567. }
  568. else if ($row['type'] == "Miscellaneous") {
  569. $genre->setTagContent("miscellaneous");
  570. }
  571. else if ($row['type'] == "Patent") {
  572. $genremarc->setTagContent("patent");
  573. }
  574. else if ($row['type'] == "Report") {
  575. $genremarc->setTagContent("technical report");
  576. $genre->setTagContent("report");
  577. }
  578. else if ($row['type'] == "Software") {
  579. // $genremarc->setTagContent("programmed text"); // would this be correct?
  580. $genre->setTagContent("software");
  581. }
  582. else if (!empty($row['type'])) { // catch-all: don't use a MARC genre
  583. $genre->setTagContent(encodeXMLField('type', $row['type']));
  584. }
  585. if ($genremarc->hasLeaf())
  586. $record->addXMLBranch($genremarc);
  587. if ($genre->hasLeaf())
  588. $record->addXMLBranch($genre);
  589. }
  590. // thesis
  591. else { // if (!empty($row['thesis']))
  592. $record->setTagContent("monographic",
  593. "mods/originInfo/issuance");
  594. $thesismarc = new XMLBranch("genre");
  595. $thesis = new XMLBranch("genre");
  596. $thesismarc->setTagContent("thesis");
  597. $thesismarc->setTagAttribute("authority", "marcgt");
  598. // tweak thesis names so that Bibutils will recognize them:
  599. if ($row['thesis'] == "Master's thesis")
  600. $row['thesis'] = "Masters thesis";
  601. $thesis->setTagContent(encodeXMLField('thesis', $row['thesis']));
  602. $record->addXMLBranch($thesismarc);
  603. $record->addXMLBranch($thesis);
  604. }
  605. // physicalDescription
  606. // pages
  607. if (!empty($row['pages'])) {
  608. $description = new XMLBranch("physicalDescription");
  609. $pages = new XMLBranch("extent");
  610. $pages->setTagAttribute("unit", "pages");
  611. if (preg_match("/[0-9] *- *[0-9]/", $row['pages'])) { // if a page range
  612. // split the page range into start and end pages
  613. list($pagestart, $pageend) = preg_split('/\s*[-]\s*/', $row['pages']);
  614. if ($pagestart < $pageend) { // extents MUST span multiple pages
  615. $pages->setTagContent(encodeXMLField('pages', $pagestart), "extent/start");
  616. $pages->setTagContent(encodeXMLField('pages', $pageend), "extent/end");
  617. }
  618. else {
  619. $pages->setTagContent(encodeXMLField('pages', $row['pages']));
  620. }
  621. }
  622. else if (preg_match("/^\d\d*\s*pp?.?$/", $row['pages'])) {
  623. list($pagetotal) = preg_split('/\s*pp?/', $row['pages']);
  624. $pages->setTagContent(encodeXMLField('pages', $pagetotal), "extent/total");
  625. }
  626. else {
  627. $pages->setTagContent(encodeXMLField('pages', $row['pages']));
  628. }
  629. $description->addXMLBranch($pages);
  630. $record->addXMLBranch($description);
  631. }
  632. // identifier
  633. // isbn
  634. if (!empty($row['isbn'])) {
  635. $identifier = new XMLBranch("identifier");
  636. $identifier->setTagContent(encodeXMLField('isbn', $row['isbn']));
  637. $identifier->setTagAttribute("type", "isbn");
  638. $record->addXMLBranch($identifier);
  639. }
  640. // issn
  641. if (!empty($row['issn'])) {
  642. $identifier = new XMLBranch("identifier");
  643. $identifier->setTagContent(encodeXMLField('issn', $row['issn']));
  644. $identifier->setTagAttribute("type", "issn");
  645. $record->addXMLBranch($identifier);
  646. }
  647. // series
  648. if ((!empty($row['series_editor'])) || (!empty($row['series_title'])) ||
  649. (!empty($row['abbrev_series_title'])) ||
  650. (!empty($row['series_volume'])) || (!empty($row['series_issue']))) {
  651. $record->addXMLBranch(serialBranch($row['series_editor'],
  652. $row['series_title'],
  653. $row['abbrev_series_title'],
  654. $row['series_volume'],
  655. $row['series_issue']));
  656. }
  657. }
  658. // --- END TYPE != ABSTRACT || BOOK CHAPTER || CONFERENCE ARTICLE || JOURNAL ARTICLE || MAGAZINE ARTICLE || NEWSPAPER ARTICLE ---
  659. // -----------------------------------------
  660. // --- BEGIN TYPE == ABSTRACT || BOOK CHAPTER || CONFERENCE ARTICLE || JOURNAL ARTICLE || MAGAZINE ARTICLE || NEWSPAPER ARTICLE ---
  661. // |
  662. // | NOTE: These are currently the only types that have publication,
  663. // | abbrev_journal, volume, and issue added.
  664. // | A lot of info goes into the relatedItem branch.
  665. else { // if (preg_match("/^(Abstract|Book Chapter|Conference Article|Journal Article|Magazine Article|Newspaper Article)$/", $row['type']))
  666. // relatedItem
  667. $related = new XMLBranch("relatedItem");
  668. $related->setTagAttribute("type", "host");
  669. // title (Publication)
  670. if (!empty($row['publication']))
  671. $related->setTagContent(encodeXMLField('publication', $row['publication']),
  672. "relatedItem/titleInfo/title");
  673. // title (Abbreviated Journal)
  674. if (!empty($row['abbrev_journal'])) {
  675. $titleabbrev = NEW XMLBranch("titleInfo");
  676. $titleabbrev->setTagAttribute("type", "abbreviated");
  677. $titleabbrev->setTagContent(encodeXMLField('abbrev_journal', $row['abbrev_journal']), "titleInfo/title");
  678. $related->addXMLBranch($titleabbrev);
  679. }
  680. // name
  681. // editor
  682. if (!empty($row['editor'])) {
  683. $editor=$row['editor'];
  684. if (preg_match("/ *\(eds?\)$/", $editor))
  685. $editor = preg_replace("/[ \r\n]*\(eds?\)/i", "", $editor);
  686. $nameArray = separateNames("editor", "/\s*;\s*/", "/\s*,\s*/",
  687. "/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
  688. $editor, "personal", "editor");
  689. foreach ($nameArray as $singleName)
  690. $related->addXMLBranch($singleName);
  691. }
  692. // corporate
  693. // NOTE: a copy of the code for 'corporate_author' above.
  694. // Needs to be a separate function later.
  695. if (!empty($row['corporate_author'])) {
  696. $nameBranch = new XMLBranch("name");
  697. $nameBranch->setTagAttribute("type", "corporate");
  698. $nameBranch->setTagContent(encodeXMLField('corporate_author', $row['corporate_author']), "name/namePart");
  699. if (empty($row['thesis']))
  700. $nameBranch->setTagContent("author", "name/role/roleTerm");
  701. else // thesis
  702. $nameBranch->setTagContent("degree grantor", "name/role/roleTerm");
  703. $nameBranch->setTagAttribute("authority", "marcrelator", "name/role/roleTerm");
  704. $nameBranch->setTagAttribute("type", "text", "name/role/roleTerm");
  705. $related->addXMLBranch($nameBranch);
  706. }
  707. // conference
  708. // NOTE: a copy of the code for 'conference' above.
  709. // Needs to be a separate function later.
  710. if (!empty($row['conference'])) {
  711. $nameBranch = new XMLBranch("name");
  712. $nameBranch->setTagAttribute("type", "conference");
  713. $nameBranch->setTagContent(encodeXMLField('conference', $row['conference']), "name/namePart");
  714. $related->addXMLBranch($nameBranch);
  715. }
  716. // originInfo
  717. $relorigin = new XMLBranch("originInfo");
  718. // dateIssued
  719. if (!empty($row['year']))
  720. $relorigin->setTagContent(encodeXMLField('year', $row['year']), "originInfo/dateIssued");
  721. // publisher
  722. if (!empty($row['publisher']))
  723. $relorigin->setTagContent(encodeXMLField('publisher', $row['publisher']), "originInfo/publisher");
  724. // place
  725. if (!empty($row['place'])) {
  726. $relorigin->setTagContent(encodeXMLField('place', $row['place']), "originInfo/place/placeTerm");
  727. $relorigin->setTagAttribute("type", "text",
  728. "originInfo/place/placeTerm");
  729. }
  730. // edition
  731. if (!empty($row['edition']))
  732. $relorigin->setTagContent(encodeXMLField('edition', $row['edition']), "originInfo/edition");
  733. if ($relorigin->hasBranch())
  734. $related->addXMLBranch($relorigin);
  735. // genre (and originInfo/issuance)
  736. if (empty($row['thesis'])) { // theses will get their own genre (see below)
  737. if (preg_match("/^(Journal Article|Magazine Article)$/", $row['type'])) {
  738. $related->setTagContent("continuing",
  739. "relatedItem/originInfo/issuance");
  740. $genremarc = new XMLBranch("genre");
  741. $genre = new XMLBranch("genre");
  742. $genremarc->setTagContent("periodical");
  743. $genremarc->setTagAttribute("authority", "marcgt");
  744. if ($row['type'] == "Magazine Article")
  745. $genre->setTagContent("magazine");
  746. else
  747. $genre->setTagContent("academic journal");
  748. $related->addXMLBranch($genremarc);
  749. $related->addXMLBranch($genre);
  750. }
  751. else if ($row['type'] == "Abstract") {
  752. $record->setTagContent("abstract or summary", "mods/genre");
  753. $record->setTagAttribute("authority", "marcgt", "mods/genre");
  754. }
  755. else if ($row['type'] == "Conference Article") {
  756. $related->setTagContent("conference publication", "relatedItem/genre");
  757. $related->setTagAttribute("authority", "marcgt", "relatedItem/genre");
  758. }
  759. else if ($row['type'] == "Newspaper Article") {
  760. $related->setTagContent("continuing",
  761. "relatedItem/originInfo/issuance");
  762. $related->setTagContent("newspaper", "relatedItem/genre");
  763. $related->setTagAttribute("authority", "marcgt", "relatedItem/genre");
  764. }
  765. else { // if ($row['type'] == "Book Chapter")
  766. $related->setTagContent("monographic",
  767. "relatedItem/originInfo/issuance");
  768. $related->setTagContent("book", "relatedItem/genre");
  769. $related->setTagAttribute("authority", "marcgt", "relatedItem/genre");
  770. }
  771. }
  772. // thesis
  773. else { // if (!empty($row['thesis']))
  774. $thesismarc = new XMLBranch("genre");
  775. $thesis = new XMLBranch("genre");
  776. $thesismarc->setTagContent("thesis");
  777. $thesismarc->setTagAttribute("authority", "marcgt");
  778. // tweak thesis names so that Bibutils will recognize them:
  779. if ($row['thesis'] == "Master's thesis")
  780. $row['thesis'] = "Masters thesis";
  781. $thesis->setTagContent(encodeXMLField('thesis', $row['thesis']));
  782. $related->addXMLBranch($thesismarc);
  783. $related->addXMLBranch($thesis);
  784. }
  785. if ((!empty($row['year'])) || (!empty($row['volume'])) ||
  786. (!empty($row['issue'])) || (!empty($row['pages']))) {
  787. $part = new XMLBranch("part");
  788. if (!empty($row['year']))
  789. $part->setTagContent(encodeXMLField('year', $row['year']), "date");
  790. if (!empty($row['volume'])) {
  791. $detailvolume = new XMLBranch("detail");
  792. $detailvolume->setTagContent(encodeXMLField('volume', $row['volume']), "detail/number");
  793. $detailvolume->setTagAttribute("type", "volume");
  794. $part->addXMLBranch($detailvolume);
  795. }
  796. if (!empty($row['issue'])) {
  797. $detailnumber = new XMLBranch("detail");
  798. $detailnumber->setTagContent(encodeXMLField('issue', $row['issue']), "detail/number");
  799. $detailnumber->setTagAttribute("type", "issue");
  800. $part->addXMLBranch($detailnumber);
  801. }
  802. if (!empty($row['pages'])) {
  803. if (preg_match("/[0-9] *- *[0-9]/", $row['pages'])) { // if a page range
  804. // split the page range into start and end pages
  805. list($pagestart, $pageend) = preg_split('/\s*[-]\s*/', $row['pages']);
  806. if ($pagestart < $pageend) { // extents MUST span multiple pages
  807. $pages = new XMLBranch("extent");
  808. $pages->setTagContent(encodeXMLField('pages', $pagestart), "extent/start");
  809. $pages->setTagContent(encodeXMLField('pages', $pageend), "extent/end");
  810. $pages->setTagAttribute("unit", "page");
  811. }
  812. else {
  813. $pages = new XMLBranch("detail");
  814. if ($pagestart == $pageend) // single-page item
  815. $pages->setTagContent(encodeXMLField('pages', $pagestart), "detail/number");
  816. else
  817. $pages->setTagContent(encodeXMLField('pages', $row['pages']), "detail/number");
  818. $pages->setTagAttribute("type", "page");
  819. }
  820. }
  821. else {
  822. $pages = new XMLBranch("detail");
  823. $pages->setTagContent(encodeXMLField('pages', $row['pages']), "detail/number");
  824. $pages->setTagAttribute("type", "page");
  825. }
  826. $part->addXMLBranch($pages);
  827. }
  828. $related->addXMLBranch($part);
  829. }
  830. // identifier
  831. // isbn
  832. if (!empty($row['isbn'])) {
  833. $identifier = new XMLBranch("identifier");
  834. $identifier->setTagContent(encodeXMLField('isbn', $row['isbn']));
  835. $identifier->setTagAttribute("type", "isbn");
  836. $related->addXMLBranch($identifier);
  837. }
  838. // issn
  839. if (!empty($row['issn'])) {
  840. $identifier = new XMLBranch("identifier");
  841. $identifier->setTagContent(encodeXMLField('issn', $row['issn']));
  842. $identifier->setTagAttribute("type", "issn");
  843. $related->addXMLBranch($identifier);
  844. }
  845. // series
  846. if ((!empty($row['series_editor'])) || (!empty($row['series_title'])) ||
  847. (!empty($row['abbrev_series_title'])) ||
  848. (!empty($row['series_volume'])) || (!empty($row['series_issue']))) {
  849. $related->addXMLBranch(serialBranch($row['series_editor'],
  850. $row['series_title'],
  851. $row['abbrev_series_title'],
  852. $row['series_volume'],
  853. $row['series_issue']));
  854. }
  855. $record->addXMLBranch($related);
  856. }
  857. // --- END TYPE == ABSTRACT || BOOK CHAPTER || CONFERENCE ARTICLE || JOURNAL ARTICLE || MAGAZINE ARTICLE || NEWSPAPER ARTICLE ---
  858. return $record;
  859. }
  860. // --------------------------------------------------------------------
  861. // Encode special chars, perform charset conversions and apply any
  862. // field-specific search & replace actions:
  863. function encodeXMLField($fieldName, $fieldValue)
  864. {
  865. global $fieldSpecificSearchReplaceActionsArray; // defined in function 'modsCollection()'
  866. // function 'encodeField()' is defined in 'include.inc.php'
  867. $encodedFieldValue = encodeField($fieldName, $fieldValue, $fieldSpecificSearchReplaceActionsArray, array(), true, "XML");
  868. return $encodedFieldValue;
  869. }
  870. ?>