You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

158 lines
7.5 KiB

  1. <?php
  2. // Project: Web Reference Database (refbase) <http://www.refbase.net>
  3. // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
  4. // original author(s).
  5. //
  6. // This code is distributed in the hope that it will be useful,
  7. // but WITHOUT ANY WARRANTY. Please see the GNU General Public
  8. // License for more details.
  9. //
  10. // File: ./includes/transtab_refbase_unicode.inc.php
  11. // Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/includes/transtab_refbase_unicode.inc.php $
  12. // Author(s): Matthias Steffens <mailto:refbase@extracts.de>
  13. //
  14. // Created: 02-Jun-06, 01:41
  15. // Modified: $Date: 2008-07-30 14:50:42 +0000 (Wed, 30 Jul 2008) $
  16. // $Author: msteffens $
  17. // $Revision: 1183 $
  18. // Search & replace patterns and functions for conversion from refbase markup to Unicode entities.
  19. // Search & replace patterns must be specified as perl-style regular expression and search patterns must include the leading & trailing slashes.
  20. global $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
  21. $transtab_refbase_unicode = array(
  22. // "/__(?!_)(.+?)__/" => '\\1', // the pattern for underline (__...__) must come before the one for italic (_..._)
  23. // "/_(.+?)_/" => '\\1', // fontshape markup is currently NOT converted (uncomment to strip fontshape markup from exported text)
  24. // "/\\*\\*(.+?)\\*\\*/" => '\\1',
  25. "/\\[super:(.+?)\\]/ie" => "superScriptToUnicode('\\1')", // function 'superScriptToUnicode()' will convert superscript text to appropriate Unicode entities
  26. "/\\[sub:(.+?)\\]/ie" => "subScriptToUnicode('\\1')", // function 'subScriptToUnicode()' will convert subscript text to appropriate Unicode entities
  27. "/\\[permil\\]/" => '‰', // <U2030> (per mille sign)
  28. "/\\[infinity\\]/" => '∞', // <U221E> (infinity)
  29. "/\\[alpha\\]/" => 'α',
  30. "/\\[beta\\]/" => 'β',
  31. "/\\[gamma\\]/" => 'γ',
  32. "/\\[delta\\]/" => 'δ',
  33. "/\\[epsilon\\]/" => 'ε',
  34. "/\\[zeta\\]/" => 'ζ',
  35. "/\\[eta\\]/" => 'η',
  36. "/\\[theta\\]/" => 'θ',
  37. "/\\[iota\\]/" => 'ι',
  38. "/\\[kappa\\]/" => 'κ',
  39. "/\\[lambda\\]/" => 'λ',
  40. "/\\[mu\\]/" => 'μ',
  41. "/\\[nu\\]/" => 'ν',
  42. "/\\[xi\\]/" => 'ξ',
  43. "/\\[omicron\\]/" => 'ο',
  44. "/\\[pi\\]/" => 'π',
  45. "/\\[rho\\]/" => 'ρ',
  46. "/\\[sigmaf\\]/" => 'ς',
  47. "/\\[sigma\\]/" => 'σ',
  48. "/\\[tau\\]/" => 'τ',
  49. "/\\[upsilon\\]/" => 'υ',
  50. "/\\[phi\\]/" => 'φ',
  51. "/\\[chi\\]/" => 'χ',
  52. "/\\[psi\\]/" => 'ψ',
  53. "/\\[omega\\]/" => 'ω',
  54. "/\\[Alpha\\]/" => 'Α',
  55. "/\\[Beta\\]/" => 'Β',
  56. "/\\[Gamma\\]/" => 'Γ',
  57. "/\\[Delta\\]/" => 'Δ',
  58. "/\\[Epsilon\\]/" => 'Ε',
  59. "/\\[Zeta\\]/" => 'Ζ',
  60. "/\\[Eta\\]/" => 'Η',
  61. "/\\[Theta\\]/" => 'Θ',
  62. "/\\[Iota\\]/" => 'Ι',
  63. "/\\[Kappa\\]/" => 'Κ',
  64. "/\\[Lambda\\]/" => 'Λ',
  65. "/\\[Mu\\]/" => 'Μ',
  66. "/\\[Nu\\]/" => 'Ν',
  67. "/\\[Xi\\]/" => 'Ξ',
  68. "/\\[Omicron\\]/" => 'Ο',
  69. "/\\[Pi\\]/" => 'Π',
  70. "/\\[Rho\\]/" => 'Ρ',
  71. "/\\[Sigma\\]/" => 'Σ',
  72. "/\\[Tau\\]/" => 'Τ',
  73. "/\\[Upsilon\\]/" => 'Υ',
  74. "/\\[Phi\\]/" => 'Φ',
  75. "/\\[Chi\\]/" => 'Χ',
  76. "/\\[Psi\\]/" => 'Ψ',
  77. "/\\[Omega\\]/" => 'Ω',
  78. "/\"(.+?)\"/" => '“\\1”', // <U201C>...<U201D> (left and right double quotation marks)
  79. "/ +- +/" => ' – ', // <U2013> (endash)
  80. "/–/$patternModifiers" => '–' // <U2013> (endash)
  81. // Note that for UTF-8 based systems, '$patternModifiers' contains the "u" (PCRE_UTF8) pattern modifier which causes PHP/PCRE
  82. // to treat pattern strings as UTF-8 (otherwise this conversion pattern would garble UTF-8 characters such as "Ö")
  83. );
  84. $unicodeSuperScriptSearchReplaceActionsArray = array(
  85. "/1/" => '¹', // <U00B9> (superscript one)
  86. "/2/" => '²', // <U00B2> (superscript two)
  87. "/3/" => '³', // <U00B3> (superscript three)
  88. "/4/" => '⁴', // <U2074> (superscript four)
  89. "/5/" => '⁵', // <U2075> (superscript five)
  90. "/6/" => '⁶', // <U2076> (superscript six)
  91. "/7/" => '⁷', // <U2077> (superscript seven)
  92. "/8/" => '⁸', // <U2078> (superscript eight)
  93. "/9/" => '⁹', // <U2079> (superscript nine)
  94. "/0/" => '⁰', // <U2070> (superscript zero)
  95. "/\\+/" => '⁺', // <U207A> (superscript plus sign)
  96. "/-/" => '⁻', // <U207B> (superscript minus)
  97. "/=/" => '⁼', // <U207C> (superscript equals sign)
  98. "/\\(/" => '⁽', // <U207D> (superscript left parenthesis)
  99. "/\\)/" => '⁾', // <U207E> (superscript right parenthesis)
  100. "/n/" => 'ⁿ', // <U207F> (superscript latin small letter n)
  101. "/([^¹²³⁴⁵⁶⁷⁸⁹⁰⁺⁻⁼⁽⁾ⁿ]+)/" => '[super:\\1]' // keep superscript markup in place for any text that has no matching superscript entity in Unicode
  102. );
  103. $unicodeSubScriptSearchReplaceActionsArray = array(
  104. "/1/" => '₁', // <U2081> (subscript one)
  105. "/2/" => '₂', // <U2082> (subscript two)
  106. "/3/" => '₃', // <U2083> (subscript three)
  107. "/4/" => '₄', // <U2084> (subscript four)
  108. "/5/" => '₅', // <U2085> (subscript five)
  109. "/6/" => '₆', // <U2086> (subscript six)
  110. "/7/" => '₇', // <U2087> (subscript seven)
  111. "/8/" => '₈', // <U2088> (subscript eight)
  112. "/9/" => '₉', // <U2089> (subscript nine)
  113. "/0/" => '₀', // <U2080> (subscript zero)
  114. "/\\+/" => '₊', // <U208A> (subscript plus sign)
  115. "/-/" => '₋', // <U208B> (subscript minus)
  116. "/=/" => '₌', // <U208C> (subscript equals sign)
  117. "/\\(/" => '₍', // <U208D> (subscript left parenthesis)
  118. "/\\)/" => '₎', // <U208E> (subscript right parenthesis)
  119. "/([^₁₂₃₄₅₆₇₈₉₀₊₋₌₍₎]+)/" => '[sub:\\1]' // keep subscript markup in place for any text that has no matching subscript entity in Unicode
  120. );
  121. // --------------------------------------------------------------------
  122. // Converts superscript text to appropriate Unicode entities:
  123. function superScriptToUnicode($sourceString)
  124. {
  125. global $unicodeSuperScriptSearchReplaceActionsArray;
  126. $sourceString = searchReplaceText($unicodeSuperScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
  127. return $sourceString;
  128. }
  129. // --------------------------------------------------------------------
  130. // Converts subscript text to appropriate Unicode entities:
  131. function subScriptToUnicode($sourceString)
  132. {
  133. global $unicodeSubScriptSearchReplaceActionsArray;
  134. $sourceString = searchReplaceText($unicodeSubScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
  135. return $sourceString;
  136. }
  137. ?>