You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

717 lines
38 KiB

  1. <?php
  2. // Project: Web Reference Database (refbase) <http://www.refbase.net>
  3. // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
  4. // original author(s).
  5. //
  6. // This code is distributed in the hope that it will be useful,
  7. // but WITHOUT ANY WARRANTY. Please see the GNU General Public
  8. // License for more details.
  9. //
  10. // File: ./import_modify.php
  11. // Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/import_modify.php $
  12. // Author(s): Matthias Steffens <mailto:refbase@extracts.de>
  13. //
  14. // Created: 17-Feb-06, 20:57
  15. // Modified: $Date: 2012-02-28 23:23:23 +0000 (Tue, 28 Feb 2012) $
  16. // $Author: msteffens $
  17. // $Revision: 1343 $
  18. // This php script accepts input from 'import.php' and will process records exported from Endnote, Reference Manager (RIS), BibTeX, ISI Web of Science,
  19. // Pubmed, CSA or Copac. In case of a single record, the script will call 'record.php' with all provided fields pre-filled. The user can then verify
  20. // the data, add or modify any details as necessary and add the record to the database. Multiple records will be imported directly.
  21. // TODO: I18n
  22. // Incorporate some include files:
  23. include 'initialize/db.inc.php'; // 'db.inc.php' is included to hide username and password
  24. include 'includes/include.inc.php'; // include common functions
  25. include 'includes/execute.inc.php'; // include functions that deal with execution of shell commands
  26. include 'includes/import.inc.php'; // include common import functions
  27. include 'initialize/ini.inc.php'; // include common variables
  28. // --------------------------------------------------------------------
  29. // START A SESSION:
  30. // call the 'start_session()' function (from 'include.inc.php') which will also read out available session variables:
  31. start_session(true);
  32. // --------------------------------------------------------------------
  33. // Initialize preferred display language:
  34. // (note that 'locales.inc.php' has to be included *after* the call to the 'start_session()' function)
  35. include 'includes/locales.inc.php'; // include the locales
  36. // --------------------------------------------------------------------
  37. // Clear any errors that might have been found previously:
  38. $errors = array();
  39. // Write the (POST or GET) form variables into an array:
  40. foreach($_REQUEST as $varname => $value)
  41. {
  42. // remove slashes from parameter values if 'magic_quotes_gpc = On':
  43. $formVars[$varname] = stripSlashesIfMagicQuotes($value); // function 'stripSlashesIfMagicQuotes()' is defined in 'include.inc.php'
  44. }
  45. // --------------------------------------------------------------------
  46. // Extract the ID of the client from which the query originated:
  47. // this identifier is used to identify queries that originated from the refbase command line clients ("cli-refbase-1.0.1", "cli-refbase_import-1.0") or from a bookmarklet (e.g., "jsb-refbase-1.0.0")
  48. if (isset($formVars['client']))
  49. $client = $formVars['client'];
  50. else
  51. $client = "";
  52. if (preg_match("/^jsb/i", $client)) // if data were sent via a bookmarklet, we set some variables directly
  53. {
  54. $formVars['formType'] = "import";
  55. $formVars['importRecordsRadio'] = "all";
  56. $formVars['importRecords'] = "1";
  57. $formVars['showSource'] = "1";
  58. }
  59. // Save the URL of the referring page the 'referer' session variable:
  60. // NOTE: For 'import_modify.php' we probably want to *always* set the referrer to 'import.php' since the preference of function 'start_session()'
  61. // for a referrer that was saved in a session variable may lead back to the wrong page if the user used the back button of his browser.
  62. // This happens e.g. if:
  63. // 1. the user imports, say, ID 'arXiv:cond-mat/0703452' which gets loaded into the 'record.php' form
  64. // 2. the user uses his browser's back button to switch back to the 'import.php' form
  65. // 3. the user attempts to import 'arXiv:cond-mat/070345' (which is an incorrect arXiv ID)
  66. // In that case, if the referrer gets loaded from the session variable, it will redirect back to 'record.php' (instead of 'import.php').
  67. // This can be circumvented either by saving the '$_SERVER['HTTP_REFERER']' to the 'referer' session variable explicitly, or by simply
  68. // hardcoding '$referer' to "import.php" (which is what we do here)
  69. // $referer = $_SERVER['HTTP_REFERER'];
  70. // saveSessionVariable("referer", $referer); // function 'saveSessionVariable()' is defined in 'include.inc.php'
  71. // Set the default referrer if no referrer is available or if it just points to 'index.php' (or if the data were sent via a bookmarklet):
  72. // if (empty($referer) OR ($referer == "index.php") OR preg_match("/^jsb/i", $client)) // variable '$referer' is globally defined in function 'start_session()' in 'include.inc.php'
  73. $referer = "import.php"; // on error, we'll (by default) redirect to the import form
  74. // First of all, check if the user is logged in:
  75. if (!isset($_SESSION['loginEmail'])) // -> if the user isn't logged in
  76. {
  77. header("Location: user_login.php?referer=" . rawurlencode($referer)); // ask the user to login first, then he'll get directed back to the calling page (normally, 'import.php')
  78. exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  79. }
  80. // now, check if the (logged in) user is allowed to import any record into the database:
  81. if (isset($_SESSION['user_permissions']) AND !preg_match("/allow_import|allow_batch_import/", $_SESSION['user_permissions'])) // if the 'user_permissions' session variable does NOT contain either 'allow_import' or 'allow_batch_import'...
  82. {
  83. // return an appropriate error message:
  84. $HeaderString = returnMsg($loc["NoPermission"] . $loc["NoPermission_ForImport"] . "!", "warning", "strong", "HeaderString"); // function 'returnMsg()' is defined in 'include.inc.php'
  85. if (!preg_match("/^cli/i", $client))
  86. header("Location: index.php"); // redirect back to main page ('index.php')
  87. exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  88. }
  89. // --------------------------------------------------------------------
  90. // EXTRACT FORM VARIABLES:
  91. // Note: Although we could use the '$formVars' array directly below (e.g.: $formVars['sourceText'] etc., like in 'user_validation.php'), we'll read out
  92. // all variables individually again. This is done to enhance readability. (A smarter way of doing so seems to be the use of the 'extract()' function, but that
  93. // may expose yet another security hole...)
  94. // Get the form used by the user:
  95. if (isset($formVars['formType']))
  96. $formType = $formVars['formType'];
  97. else
  98. $formType = "import";
  99. // In case of the main import form, get the source text containing the bibliographic record(s):
  100. // Note that data from any successfully uploaded file will override data pasted into the 'sourceText' text entry field
  101. if (isset($formVars['sourceText']))
  102. $sourceText = $formVars['sourceText'];
  103. else
  104. $sourceText = "";
  105. // In case of the "Import IDs" form (which imports records from PubMed ID, arXiv ID, DOI or OpenURL), get the entered IDs:
  106. if (isset($formVars['sourceIDs']))
  107. $sourceIDs = $formVars['sourceIDs'];
  108. else
  109. $sourceIDs = "";
  110. // If data were sent via a bookmarklet, get the URL containing the posted data:
  111. if (isset($formVars['sourceURL']))
  112. $sourceURL = $formVars['sourceURL'];
  113. else
  114. $sourceURL = "";
  115. // Check whether we're supposed to display the original source data:
  116. if (isset($formVars['showSource']))
  117. $showSource = $formVars['showSource'];
  118. else
  119. $showSource = "";
  120. if (isset($_SESSION['user_permissions']) AND preg_match("/allow_batch_import/", $_SESSION['user_permissions'])) // if the 'user_permissions' session variable does contain 'allow_batch_import'...
  121. {
  122. // Check whether we're supposed to import all records ('all') or just particular ones ('only'):
  123. if (isset($formVars['importRecordsRadio']))
  124. $importRecordsRadio = $formVars['importRecordsRadio'];
  125. else
  126. $importRecordsRadio = "";
  127. // Get the record numbers of those records that shall be imported:
  128. // examples of recognized formats: '1-5' imports the first five records; '1 3 7' will import records 1, 3 and 7; '1-3 5-7 9' will import records 1, 2, 3, 5, 6, 7 and 9
  129. // (note that the first three records could be labelled e.g. as 'Record 12 of 52', 'Record 30 of 112' and 'Record 202 of 533' but they must be referred to as records '1-3'
  130. // in the 'importRecords' form)
  131. if (isset($formVars['importRecords']))
  132. $importRecords = $formVars['importRecords'];
  133. else
  134. $importRecords = "";
  135. }
  136. else // if the user is only allowed to import one record at a time, we'll always import the very first record
  137. {
  138. $importRecordsRadio = "only";
  139. $importRecords = "1";
  140. }
  141. // Check whether we're supposed to skip records with unrecognized data format:
  142. if (isset($formVars['skipBadRecords']))
  143. $skipBadRecords = $formVars['skipBadRecords'];
  144. else
  145. $skipBadRecords = "";
  146. // Check if a file was uploaded:
  147. // (note that to have file uploads work, HTTP file uploads must be allowed within your 'php.ini' configuration file
  148. // by setting the 'file_uploads' parameter to 'On'!)
  149. // extract file information into a four (or five) element associative array containing the following information about the file:
  150. // name - original name of file on client
  151. // type - MIME type of file
  152. // tmp_name - name of temporary file on server
  153. // error - holds an error number >0 if something went wrong, otherwise 0 (I don't know when this element was added. It may not be present in your PHP version... ?:-/)
  154. // size - size of file in bytes
  155. // depending what happend on upload, they will contain the following values (PHP 4.1 and above):
  156. // no file upload upload exceeds 'upload_max_filesize' successful upload
  157. // -------------- ------------------------------------ -----------------
  158. // name "" [name] [name]
  159. // type "" "" [type]
  160. // tmp_name "" OR "none" "" [tmp_name]
  161. // error 4 1 0
  162. // size 0 0 [size]
  163. $uploadFile = getUploadInfo("uploadFile"); // function 'getUploadInfo()' is defined in 'include.inc.php'
  164. $tmpFilePath = "";
  165. // Validate the 'uploadFile' field:
  166. // TODO: Move code that validates file uploads into its own function (and merge with related code from 'modify.php')
  167. // (which must not exceed the 'upload_max_filesize' specified within your 'php.ini' configuration file)
  168. if (!empty($uploadFile) && !empty($uploadFile["name"])) // if the user attempted to upload a file
  169. {
  170. // The 'is_uploaded_file()' function returns 'true' if the file indicated by '$uploadFile["tmp_name"]' was uploaded via HTTP POST. This is useful to help ensure
  171. // that a malicious user hasn't tried to trick the script into working on files upon which it should not be working - for instance, /etc/passwd.
  172. if (is_uploaded_file($uploadFile["tmp_name"]))
  173. {
  174. if (empty($uploadFile["tmp_name"])) // no tmp file exists => we assume that the maximum upload file size was exceeded!
  175. // or check via 'error' element instead: "if ($uploadFile["error"] == 1)" (the 'error' element exists since PHP 4.2.0)
  176. {
  177. $maxFileSize = ini_get("upload_max_filesize");
  178. $fileError = "File size must not be greater than " . $maxFileSize . ":";
  179. $errors["uploadFile"] = $fileError; // inform the user that the maximum upload file size was exceeded
  180. }
  181. else // a tmp file exists...
  182. {
  183. // prevent hackers from gaining access to the systems 'passwd' file (this should be prevented by the 'is_uploaded_file()' function but anyhow):
  184. if (preg_match("/^passwd$/i", $uploadFile["name"])) // file name must not be 'passwd'
  185. $errors["uploadFile"] = "This file name is not allowed!";
  186. // check for invalid file name extensions:
  187. elseif (preg_match("/\.(exe|com|bat|zip|php|phps|php3|cgi)$/i", $uploadFile["name"])) // file name has an invalid file name extension (adjust the regex pattern if you want more relaxed file name validation)
  188. $errors["uploadFile"] = "You cannot upload this type of file!"; // file name must not end with .exe, .com, .bat, .zip, .php, .phps, .php3 or .cgi
  189. else
  190. $tmpFilePath = $uploadFile["tmp_name"];
  191. }
  192. }
  193. else
  194. {
  195. switch($uploadFile["error"])
  196. {
  197. case 0: // no error; possible file attack!
  198. $errors["uploadFile"] = "There was a problem with your upload.";
  199. break;
  200. case 1: // uploaded file exceeds the 'upload_max_filesize' directive in 'php.ini'
  201. $maxFileSize = ini_get("upload_max_filesize");
  202. $fileError = "File size must not be greater than " . $maxFileSize . ":";
  203. $errors["uploadFile"] = $fileError;
  204. break;
  205. case 2: // uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the html form (Note: refbase doesn't currently specify MAX_FILE_SIZE but anyhow...)
  206. $errors["uploadFile"] = "The file you are trying to upload is too big.";
  207. break;
  208. case 3: // uploaded file was only partially uploaded
  209. $errors["uploadFile"] = "The file you are trying to upload was only partially uploaded.";
  210. break;
  211. case 4: // no file was uploaded
  212. $errors["uploadFile"] = "You must select a file for upload.";
  213. break;
  214. case 6:
  215. $errors["uploadFile"] = "Missing a temporary folder.";
  216. break;
  217. default: // a default error, just in case! :)
  218. $errors["uploadFile"] = "There was a problem with your upload.";
  219. break;
  220. }
  221. }
  222. }
  223. if (!empty($uploadFile) && !empty($tmpFilePath)) // if there was a file uploaded successfully
  224. {
  225. // Get file contents:
  226. $fileData = readFromFile($tmpFilePath); // function 'readFromFile()' is defined in 'execute.inc.php'
  227. if (!empty($fileData))
  228. // Data from any successfully uploaded file will override data pasted into the 'sourceText' text entry field
  229. $sourceText = $fileData;
  230. }
  231. // --------------------------------------------------------------------
  232. // PRE-PROCESS DATA INPUT:
  233. // In case of a latin1-based database, attempt to convert UTF-8 data to refbase markup & latin1:
  234. // NOTE: For a latin1-based database, data pasted into the 'sourceText' text entry field will be always returned in ISO-8859-1 encoding (see notes above function
  235. // 'decodeHTML()' below). However, data that were received via a file upload (or from a client such as Bookends) will have the encoding of the original file
  236. // (which may be UTF-8 encoded).
  237. if (($contentTypeCharset == "ISO-8859-1") AND (detectCharacterEncoding($sourceText) == "UTF-8")) // function 'detectCharacterEncoding()' is defined in 'include.inc.php'
  238. $sourceText = convertToCharacterEncoding("ISO-8859-1", "TRANSLIT", $sourceText, "UTF-8"); // function 'convertToCharacterEncoding()' is defined in 'include.inc.php'
  239. // Decode any HTML entities remaining in the source text:
  240. // NOTE: - Web browsers send back form data in the same encoding as the page containing the form. So if a user imports UTF-8 data (via the 'sourceText' text entry form) into
  241. // a latin1-based database, non-latin1 characters will be encoded by the browser as HTML entities (e.g., the greek delta character would be represented as '&#948;'
  242. // in the source text). Therefore, we'll use function 'decodeHTML()' to convert any remaining HTML entities first to UTF-8, then convert Unicode entities to refbase
  243. // markup (if possible), and finally transform all Unicode characters that can't be successfully converted to their ASCII equivalents.
  244. // - Alternatively, it might be easier to always use UTF-8 as page encoding for 'import.php' so that we'll always receive UTF-8 encoded data, then use function
  245. // 'detectCharacterEncoding()' to detect the actual character encoding of the given source text, and convert to refbase markup/latin1 if needed.
  246. //
  247. // TODO: - this conversion causes invalid XML when importing MODS XML that contains encoded angle brackets! (e.g. '<title>Harbours &lt;dt.&gt;</title>');
  248. // to work around this issue, the next line needs to be commented out; for a real fix, the decoding of characters needs to be adopted based on the '$sourceFormat'
  249. // (which, ATM, is only identified further down below)
  250. $sourceText = decodeHTML($contentTypeCharset, $sourceText); // function 'decodeHTML()' is defined in 'include.inc.php', and '$contentTypeCharset' is defined in 'ini.inc.php'
  251. // Process record number input:
  252. $importRecordNumbersArray = array(); // initialize array variable which will hold all the record numbers that shall be imported
  253. if (!empty($importRecords))
  254. {
  255. // split input string on all but digits or the hyphen ("-") character:
  256. // (the 'PREG_SPLIT_NO_EMPTY' flag causes only non-empty pieces to be returned)
  257. $importRecordsArray = preg_split("/[^0-9-]+/", $importRecords, -1, PREG_SPLIT_NO_EMPTY); // this keeps only elements such as '1', '3-5', '3-5-9' or '3-' (we'll deal with the last two cases below)
  258. foreach ($importRecordsArray as $importRecordsElement)
  259. {
  260. if (preg_match("/\d+-\d+/", $importRecordsElement)) // if we're dealing with a range of record numbers (such as '1-5')
  261. {
  262. $importRecordsElementArray = preg_split("/-/", $importRecordsElement); // split input string on hyphen ("-") character
  263. // generate an array that includes all numbers from start number to end number:
  264. // (in case of incorrect input (such as '3-5-9') we'll only take the first two numbers and ignore anything else)
  265. $importRecordRangeArray = range($importRecordsElementArray[0], $importRecordsElementArray[1]);
  266. foreach ($importRecordRangeArray as $importRecordNumber) // append all record numbers within range to array
  267. $importRecordNumbersArray[] = $importRecordNumber;
  268. }
  269. else // this element contains just a single record number
  270. {
  271. // append this record number to array:
  272. $importRecordNumbersArray[] = preg_replace("/(\d+).*/", "\\1", $importRecordsElement); // we account for the case that '$importRecordsElement' contains something like '3-'
  273. }
  274. }
  275. }
  276. // validation will throw up an error if we're supposed to import only particular records but no record numbers were specified
  277. // Remove any duplicate record number(s) from the list of extracted record numbers:
  278. $importRecordNumbersArray = array_unique($importRecordNumbersArray);
  279. // --------------------------------------------------------------------
  280. // IDENTIFY SOURCE FORMAT:
  281. // if the source text originated from the main 'import' form provided by 'import.php':
  282. if ($formType == "import")
  283. // attempt to identify the format of the input text:
  284. $sourceFormat = identifySourceFormat($sourceText); // function 'identifySourceFormat()' is defined in 'import.inc.php'
  285. // else if source text originated from the "Import IDs" form (which imports records from PubMed ID, arXiv ID, DOI or OpenURL):
  286. elseif ($formType == "importID")
  287. $sourceFormat = identifySourceID($sourceIDs); // function 'identifySourceID()' is defined in 'import.inc.php'
  288. // --------------------------------------------------------------------
  289. // FETCH DATA FROM URL:
  290. // In case of import via ID:
  291. // TODO: Modify the code so that '$sourceIDs' can contain a mixture of any supported IDs.
  292. if (($formType == "importID") AND !empty($sourceIDs) AND !empty($sourceFormat))
  293. {
  294. // - PubMed IDs:
  295. if (preg_match("/^Pubmed (Medline|XML)$/i", $sourceFormat) AND preg_match("/[0-9]/", $sourceIDs))
  296. {
  297. // Split on any whitespace between PubMed IDs:
  298. $idArray = preg_split("/\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
  299. // Fetch source data from PubMed.gov for all given PubMed IDs:
  300. list($errors, $sourceText) = fetchDataFromPubMed($idArray, $sourceFormat); // function 'fetchDataFromPubMed()' is defined in 'import.inc.php'
  301. }
  302. // - arXiv IDs:
  303. elseif (preg_match("/^arXiv XML$/i", $sourceFormat) AND preg_match("#(arXiv:|http://arxiv\.org/abs/)?([\w.-]+/\d{7}|\d{4}\.\d{4,})(v\d+)?#i", $sourceIDs))
  304. {
  305. // Remove any "arXiv:" or "http://arxiv.org/abs/" prefixes from the ID string:
  306. $sourceIDs = preg_replace("#(?<=^|\s)(arXiv:|http://arxiv\.org/abs/)#", "", $sourceIDs);
  307. // Split on any whitespace between arXiv IDs:
  308. $idArray = preg_split("/\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
  309. // Fetch source data from arXiv.org for all given arXiv IDs:
  310. list($errors, $sourceText) = fetchDataFromArXiv($idArray, $sourceFormat); // function 'fetchDataFromArXiv()' is defined in 'import.inc.php'
  311. // NOTE: In case of function 'fetchDataFromArXiv()', variable '$sourceText' contains the SimplePie object with the parsed Atom XML feed
  312. // TODO: This is inconsistent with the behaviour of the other 'fetchData*()' functions and we should do something about it!
  313. // NOTE: Since, for arXiv IDs, '$sourceText' contains the SimplePie object (and not just text), handling of any encoding issues is done
  314. // within function 'arxivToRefbase()'
  315. }
  316. // - DOIs/OpenURLs:
  317. // TODO: - to support OpenURL context objects from COinS or Atom XML, we need to decode ampersand characters ('&amp;' -> '&'), and allow for OpenURLs that don't start with '?' or '&'
  318. elseif (preg_match("/^CrossRef XML$/i", $sourceFormat) AND (preg_match("#(?<=^|\s)(doi:|http://dx\.doi\.org/)?10\.\d{4}/\S+?(?=$|\s)#i", $sourceIDs) OR preg_match("#(?<=^|\s)(openurl:|http://.+?(?=\?))?.*?(?<=[?&])ctx_ver=Z39\.88-2004(?=&|$).*?(?=$|\s)#i", $sourceIDs)))
  319. {
  320. // Remove any prefixes (like "doi:", "openurl:", "http://dx.doi.org/" or "http://...?") from the ID string:
  321. $sourceIDs = preg_replace("#(?<=^|\s)(doi:|http://dx\.doi\.org/)#", "", $sourceIDs);
  322. $sourceIDs = preg_replace("#(?<=^|\s)(openurl:|http://.+?(?=\?))#", "", $sourceIDs);
  323. // Split on any whitespace between DOIs/OpenURLs:
  324. $idArray = preg_split("/\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
  325. // Try to retrieve information from PubMed.gov before querying CrossRef.org:
  326. // TODO: Test with $sourceIDs containing a mixture of DOIs and OpenURLs, as well as with $sourceIDs containing DOIs for articles listed in PubMed AND NOT listed in PubMed!
  327. if (preg_match("#10\.\d{4}/\S+?(?=$|\s)#i", $sourceIDs))
  328. {
  329. list($errors, $sourceText, $idArray) = fetchDOIsFromPubMed($idArray); // function 'fetchDOIsFromPubMed()' is defined in 'import.inc.php'
  330. }
  331. if (!empty($idArray))
  332. {
  333. // Fetch record metadata from CrossRef.org for all given DOIs/OpenURLs:
  334. list($errors, $sourceText) = fetchDataFromCrossRef($idArray, $sourceFormat); // function 'fetchDataFromCrossRef()' is defined in 'import.inc.php'
  335. // In case of a latin1-based database, attempt to convert UTF-8 data to refbase markup & latin1:
  336. if (($contentTypeCharset == "ISO-8859-1") AND (detectCharacterEncoding($sourceText) == "UTF-8"))
  337. $sourceText = convertToCharacterEncoding("ISO-8859-1", "TRANSLIT", $sourceText, "UTF-8");
  338. }
  339. else
  340. {
  341. $sourceFormat = "Pubmed Medline";
  342. }
  343. }
  344. }
  345. // --------------------------------------------------------------------
  346. // PARSE SOURCE TEXT:
  347. if (!empty($sourceText) AND !empty($sourceFormat))
  348. {
  349. // fetch the path/name of the import format file that's associated with the import format given in '$sourceFormat':
  350. $importFormatFile = getFormatFile($sourceFormat, "import"); // function 'getFormatFile()' is defined in 'include.inc.php()'
  351. if (!empty($importFormatFile))
  352. {
  353. // Get all cite keys specified by the current user and build an array of uniquified cite keys ('$citeKeysArray')
  354. // which is used to ensure uniqueness of generated cite keys among all imported records as well as the user's existing records:
  355. $userCiteKeysArray = getUserCiteKeys($loginUserID); // '$loginUserID' is provided as session variable on login; function 'getUserCiteKeys()' is defined in 'include.inc.php'
  356. // Get all user options for the current user (which is required by function 'generateCiteKey()'
  357. // that, in turn, is called below & from within the 'addRecords()' function):
  358. $userOptionsArray = getUserOptions($loginUserID); // function 'getUserOptions()' is defined in 'include.inc.php'
  359. // Include the found import format file *once*:
  360. include_once "import/" . $importFormatFile;
  361. // Parse records from the specified import format:
  362. // function 'importRecords()' is defined in the import format file given in '$importFormatFile' (which, in turn, must reside in the 'import' directory of the refbase root directory)
  363. // NOTE: see note above below the 'fetchDataFromArXiv()' function
  364. list($importDataArray, $recordsCount, $importRecordNumbersRecognizedFormatArray, $importRecordNumbersNotRecognizedFormatArray, $errors) = importRecords($sourceText, $importRecordsRadio, $importRecordNumbersArray);
  365. }
  366. else
  367. $errors["sourceText"] = "Sorry, but the $sourceFormat importer is currently not available!";
  368. }
  369. else
  370. {
  371. $importDataArray = array();
  372. $recordsCount = 0;
  373. $importRecordNumbersRecognizedFormatArray = array();
  374. $importRecordNumbersNotRecognizedFormatArray = array();
  375. }
  376. // --------------------------------------------------------------------
  377. // VALIDATE DATA FIELDS:
  378. // For each parsed record, function 'validateRecords()' (in 'import.inc.php') will assign errors to '$errors["sourceText"]'.
  379. // In case of the "Import IDs" form, we'll redirect these error messages to '$errors["sourceIDs"]':
  380. if (($formType == "importID") AND isset($errors["sourceText"])) // some errors occurred
  381. {
  382. $errors["sourceIDs"] = $errors["sourceText"];
  383. unset($errors["sourceText"]);
  384. }
  385. // Verify that some source text was given:
  386. if (($formType == "import") AND empty($sourceText)) // no source data given
  387. $errors["sourceText"] = "Source data missing!";
  388. elseif (($formType == "importID") AND !isset($errors["sourceIDs"]) AND (empty($sourceIDs) OR empty($sourceFormat))) // no recognized IDs given
  389. $errors["sourceIDs"] = "You must specify at least one valid ID!";
  390. // If some source data were given but the source text format wasn't among the recognized formats:
  391. elseif (empty($sourceFormat))
  392. $errors["sourceText"] = "Unrecognized data format!";
  393. // Validate the 'importRecords' text entry field...
  394. elseif ($importRecordsRadio == "only") // ...if we're supposed to import only particular records
  395. {
  396. // ...make sure that some records were specified and that they are actually available in the input data:
  397. if (empty($importRecords) OR !preg_match("/[0-9]/", $importRecords)) // partial import requested but no record numbers given
  398. {
  399. $errors["importRecords"] = "Record number(s) missing!";
  400. }
  401. else // if some record numbers were given, check that these numbers are actually available in the input data:
  402. {
  403. $availableRecordNumbersArray = range(1, $recordsCount); // construct an array of available record numbers
  404. // get all record numbers to import which are NOT available in the source data:
  405. $importRecordNumbersNotAvailableArray = array_diff($importRecordNumbersArray, $availableRecordNumbersArray); // get all unique array elements from '$importRecordNumbersArray' that are not present in '$availableRecordNumbersArray'
  406. // just FYI, the line below would get all record numbers to import which ARE actually available in the source data:
  407. // $importRecordNumbersAvailableArray = array_diff($importRecordNumbersArray, $importRecordNumbersNotAvailableArray); // get all unique array elements from '$importRecordNumbersArray' that are not present in '$importRecordNumbersNotAvailableArray'
  408. if (!empty($importRecordNumbersNotAvailableArray)) // the user did request to import some record(s) that don't exist in the pasted source data
  409. {
  410. if ($recordsCount == 1) // one record available
  411. $errors["importRecords"] = "Only one record available! You can only use record number '1'.";
  412. else // several records available
  413. $errors["importRecords"] = "Only " . $recordsCount . " records available! You can only use record numbers '1-" . $recordsCount . "'.";
  414. }
  415. }
  416. }
  417. // the user did enter some source text and did input some recognized record numbers
  418. if (!empty($sourceText))
  419. {
  420. // NOTE: validation of individual records is done within the import functions and the '$errors' array is modified within these functions if any records of unrecognized format are found
  421. if (empty($importRecordNumbersRecognizedFormatArray)) // if none of the records to import had a recognized format
  422. {
  423. // we'll file an additional error element here, which will indicate whether the 'Skip records with unrecognized data format' checkbox shall be displayed or not
  424. $errors["badRecords"] = "all";
  425. if (!empty($sourceFormat) AND (count($importRecordNumbersNotRecognizedFormatArray) > 1)) // if the user attempted to import more than one record
  426. $errors["skipBadRecords"] = "Sorry, but all of the specified records were of unrecognized data format!";
  427. else // user tried to import one single record (will be also triggered if '$importRecords' is empty)
  428. $errors["skipBadRecords"] = ""; // we insert an empty 'skipBadRecords' element so that 'import.php' does the right thing
  429. }
  430. elseif (!empty($importRecordNumbersNotRecognizedFormatArray)) // some records had a recognized format but some were NOT recognized
  431. {
  432. $errors["badRecords"] = "some"; // see note above
  433. $errors["skipBadRecords"] = "Skip records with unrecognized data format";
  434. }
  435. }
  436. else
  437. {
  438. $errors["badRecords"] = "all";
  439. }
  440. // --------------------------------------------------------------------
  441. // Check if there were any validation errors:
  442. if (count($errors) > 0)
  443. {
  444. // we ignore errors regarding records with unrecognized format if:
  445. // - at least some of the specified records had a valid data format and
  446. // - the user did mark the 'Skip records with unrecognized data format' checkbox
  447. if (!(($errors["badRecords"] == "some") AND ($skipBadRecords == "1")))
  448. {
  449. // ...otherwise we'll present the error message(s):
  450. if (preg_match("/^be/i", $client)) // if the query originated from a Bookends upload request ("be-bookends_import-1.0")
  451. {
  452. // Include errors in redirection request:
  453. $redirectURL = $referer . "?";
  454. foreach ($errors as $varname => $value)
  455. $redirectURL .= "&" . $varname . "=" . rawurlencode($value);
  456. header("Location: " . $redirectURL);
  457. }
  458. elseif (preg_match("/^cli/i", $client)) // if the query originated from a command line client such as the refbase CLI clients ("cli-refbase-1.1", "cli-refbase_import-1.0")
  459. {
  460. echo "There were validation errors regarding the data you submitted:\n\n";
  461. if (($errors["badRecords"] == "all") && (!empty($errors["skipBadRecords"])))
  462. $skipBadInfo = $errors["skipBadRecords"] . "\n\n";
  463. elseif ($errors["badRecords"] == "some")
  464. $skipBadInfo = "Use '--skipbad=1' to skip records with unrecognized data format.\n\n";
  465. else
  466. $skipBadInfo = "";
  467. unset($errors["badRecords"]);
  468. unset($errors["skipBadRecords"]);
  469. foreach ($errors as $varname => $value)
  470. {
  471. $value = preg_replace("/<br>/i", "\n ", $value);
  472. echo $varname . ": " . $value . "\n\n";
  473. }
  474. echo $skipBadInfo;
  475. }
  476. else
  477. {
  478. // Write back session variables:
  479. saveSessionVariable("errors", $errors); // function 'saveSessionVariable()' is defined in 'include.inc.php'
  480. saveSessionVariable("formVars", $formVars);
  481. // Redirect the browser back to the import form:
  482. header("Location: " . $referer);
  483. }
  484. exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  485. }
  486. }
  487. // --------------------------------------------------------------------
  488. // If we made it here, then the data is considered valid!
  489. // IMPORT RECORDS:
  490. $importedRecordsArray = array();
  491. if ((count($importRecordNumbersRecognizedFormatArray) == 1) AND !preg_match("/^(cli|be)/i", $client)) // if this is the only record we'll need to import -AND- if the import didn't originate from a refbase command line client:
  492. {
  493. // If no specific cite key exists in the record data, any existing 'call_number' string gets also copied to the
  494. // user-specific 'cite_key' field (which will ensure that this original call number/cite key is retained as
  495. // cite key upon export); however, note that (depending on the user's settings) the cite key may get modified
  496. // or regenerated by function 'generateCiteKey()' below
  497. if (!empty($importDataArray['records'][0]['call_number']) AND empty($importDataArray['records'][0]['cite_key']))
  498. $importDataArray['records'][0]['cite_key'] = $importDataArray['records'][0]['call_number'];
  499. // This is a stupid hack that maps the names of the '$importDataArray['records'][0]' array keys to those
  500. // used by the '$parsedRecordFormVars' (='$formVars') array (which is required by function 'generateCiteKey()')
  501. // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys)
  502. $parsedRecordFormVars = buildFormVarsArray($importDataArray['records'][0]); // function 'buildFormVarsArray()' is defined in 'include.inc.php'
  503. // Generate or modify (e.g. uniquify) the cite key for this record:
  504. $importDataArray['records'][0]['cite_key'] = generateCiteKey($parsedRecordFormVars); // function 'generateCiteKey()' is defined in 'include.inc.php'
  505. // save import data to session variable:
  506. // NOTE: Saving import data to a session variable allows to retain large param/value strings (that would exceed
  507. // the maximum string limit for GET requests). This works around a limitation in Internet Explorer which
  508. // has a maximum URL length of 2,083 characters & a maximum path length of 2,048 characters.
  509. // More info: <http://support.microsoft.com/kb/208427/EN-US/>
  510. saveSessionVariable("importData", $importDataArray['records'][0]);
  511. // RELOCATE TO IMPORT PAGE:
  512. // call 'record.php' and load the form fields with the data of the current record
  513. header("Location: record.php?recordAction=add&mode=import&importSource=generic");
  514. exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  515. }
  516. else // import record(s) directly:
  517. {
  518. // Add all records to the database (i.e., for each record, add a row entry to MySQL table 'refs'):
  519. // ('$importedRecordsArray' will hold the serial numbers of all newly imported records)
  520. $importedRecordsArray = addRecords($importDataArray); // function 'addRecords()' is defined in 'include.inc.php'
  521. }
  522. // --------------------------------------------------------------------
  523. // DISPLAY RESULTS
  524. if (!empty($importedRecordsArray)) // if some records were successfully imported
  525. {
  526. $importedRecordsCount = count($importedRecordsArray);
  527. // build string of record serial numbers (to be used with the 'records' query parameter):
  528. $recordSerialsQueryString = $importedRecordsArray[0]; // add first serial number
  529. for ($i=1; $i < $importedRecordsCount; $i++) // for the second to the last serial number...
  530. {
  531. // implode consecutive serial numbers into a range (e.g. transform "150,151,152" into "150-152"):
  532. if ($importedRecordsArray[$i] == ($importedRecordsArray[$i - 1] + 1)) // if this number is consecutive to the previous one
  533. {
  534. if (!preg_match("/-$/", $recordSerialsQueryString))
  535. $recordSerialsQueryString .= "-"; // start range
  536. if ($i == ($importedRecordsCount - 1)) // if this is the last item in the array
  537. $recordSerialsQueryString .= $importedRecordsArray[$i]; // end range
  538. }
  539. else // this number is NOT consecutive to the previous one
  540. {
  541. if (preg_match("/-$/", $recordSerialsQueryString))
  542. $recordSerialsQueryString .= $importedRecordsArray[$i - 1]; // end any previous range
  543. $recordSerialsQueryString .= "," . $importedRecordsArray[$i]; // append this number using a comma as a delimiter
  544. }
  545. }
  546. // Send EMAIL announcement:
  547. if ($sendEmailAnnouncements == "yes")
  548. {
  549. // variables '$sendEmailAnnouncements', '$mailingListEmail', '$officialDatabaseName' and '$databaseBaseURL' are specified in 'ini.inc.php';
  550. // '$loginFirstName' and '$loginLastName' are provided as session variables by the 'start_session()' function in 'include.inc.php'
  551. // send a notification email to the mailing list email address given in '$mailingListEmail':
  552. $emailRecipient = "Literature Database Announcement List <" . $mailingListEmail . ">";
  553. if ($importedRecordsCount == 1)
  554. {
  555. $emailSubject = "New record added to the " . $officialDatabaseName;
  556. $emailBodyIntro = "One record has been added to the " . $officialDatabaseName . ":";
  557. $detailsURL = $databaseBaseURL . "show.php?record=" . $importedRecordsArray[0];
  558. }
  559. else // $importedRecordsCount > 1
  560. {
  561. $emailSubject = "New records added to the " . $officialDatabaseName;
  562. $emailBodyIntro = $importedRecordsCount . " records have been added to the " . $officialDatabaseName . ":";
  563. $detailsURL = $databaseBaseURL . "show.php?records=" . $recordSerialsQueryString;
  564. }
  565. $emailBody = $emailBodyIntro
  566. . "\n\n added by: " . $loginFirstName . " " . $loginLastName
  567. . "\n details: " . $detailsURL
  568. . "\n";
  569. sendEmail($emailRecipient, $emailSubject, $emailBody); // function 'sendEmail()' is defined in 'include.inc.php'
  570. }
  571. if ($importedRecordsCount == 1)
  572. $headerMessage = $importedRecordsCount . " " . $loc["RecordSuccessfullyImported"] . ":";
  573. else // $importedRecordsCount > 1
  574. $headerMessage = $importedRecordsCount . " " . $loc["RecordsSuccessfullyImported"] . ":";
  575. // DISPLAY all newly added records:
  576. header("Location: show.php?records=" . $recordSerialsQueryString . "&headerMsg=" . rawurlencode($headerMessage) . "&client=" . $client);
  577. }
  578. else // nothing imported
  579. {
  580. if (preg_match("/^cli/i", $client)) // if the query originated from a command line client such as the refbase CLI clients ("cli-refbase-1.1", "cli-refbase_import-1.0")
  581. {
  582. echo "No records imported!\n\n";
  583. }
  584. else
  585. {
  586. // we'll file again this additional error element here so that the 'errors' session variable isn't empty causing 'import.php' to re-load the form data that were submitted by the user
  587. $errors["badRecords"] = "all";
  588. // return an appropriate error message:
  589. $HeaderString = returnMsg($loc["NoRecordsImported"] . "!", "warning", "strong", "HeaderString"); // function 'returnMsg()' is defined in 'include.inc.php'
  590. // Write back session variables:
  591. saveSessionVariable("errors", $errors);
  592. saveSessionVariable("formVars", $formVars);
  593. header("Location: " . $referer); // redirect to the calling page (normally, 'import.php')
  594. }
  595. }
  596. // --------------------------------------------------------------------
  597. ?>