You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

148 lines
7.0 KiB

  1. <?php
  2. // Project: Web Reference Database (refbase) <http://www.refbase.net>
  3. // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
  4. // original author(s).
  5. //
  6. // This code is distributed in the hope that it will be useful,
  7. // but WITHOUT ANY WARRANTY. Please see the GNU General Public
  8. // License for more details.
  9. //
  10. // File: ./includes/transtab_unicode_refbase.inc.php
  11. // Repository: $HeadURL$
  12. // Author(s): Matthias Steffens <mailto:refbase@extracts.de>
  13. //
  14. // Created: 11-Jun-08, 13:00
  15. // Modified: $Date: 2008-06-19 17:56:34 +0000 (Thu, 19 Jun 2008) $
  16. // $Author$
  17. // $Revision: 1146 $
  18. // Search & replace patterns and functions for conversion from Unicode entities to refbase markup.
  19. // Search & replace patterns must be specified as perl-style regular expression and search patterns must include the leading & trailing slashes.
  20. $transtab_unicode_refbase = array(
  21. '/‰|‰/' => "[permil]", // ‰: <U2030> (per mille sign); ‰: ?
  22. '/∞/' => "[infinity]", // <U221E> (infinity)
  23. '/α/' => "[alpha]",
  24. '/β/' => "[beta]",
  25. '/γ/' => "[gamma]",
  26. '/δ/' => "[delta]",
  27. '/ε/' => "[epsilon]",
  28. '/ζ/' => "[zeta]",
  29. '/η/' => "[eta]",
  30. '/θ/' => "[theta]",
  31. '/ι/' => "[iota]",
  32. '/κ/' => "[kappa]",
  33. '/λ/' => "[lambda]",
  34. '/μ/' => "[mu]",
  35. '/ν/' => "[nu]",
  36. '/ξ/' => "[xi]",
  37. '/ο/' => "[omicron]",
  38. '/π/' => "[pi]",
  39. '/ρ/' => "[rho]",
  40. '/ς/' => "[sigmaf]",
  41. '/σ/' => "[sigma]",
  42. '/τ/' => "[tau]",
  43. '/υ/' => "[upsilon]",
  44. '/φ/' => "[phi]",
  45. '/χ/' => "[chi]",
  46. '/ψ/' => "[psi]",
  47. '/ω/' => "[omega]",
  48. '/Α/' => "[Alpha]",
  49. '/Β/' => "[Beta]",
  50. '/Γ/' => "[Gamma]",
  51. '/Δ/' => "[Delta]",
  52. '/Ε/' => "[Epsilon]",
  53. '/Ζ/' => "[Zeta]",
  54. '/Η/' => "[Eta]",
  55. '/Θ/' => "[Theta]",
  56. '/Ι/' => "[Iota]",
  57. '/Κ/' => "[Kappa]",
  58. '/Λ/' => "[Lambda]",
  59. '/Μ/' => "[Mu]",
  60. '/Ν/' => "[Nu]",
  61. '/Ξ/' => "[Xi]",
  62. '/Ο/' => "[Omicron]",
  63. '/Π/' => "[Pi]",
  64. '/Ρ/' => "[Rho]",
  65. '/Σ/' => "[Sigma]",
  66. '/Τ/' => "[Tau]",
  67. '/Υ/' => "[Upsilon]",
  68. '/Φ/' => "[Phi]",
  69. '/Χ/' => "[Chi]",
  70. '/Ψ/' => "[Psi]",
  71. '/Ω/' => "[Omega]",
  72. "/((?:¹|²|³|⁴|⁵|⁶|⁷|⁸|⁹|⁰|⁺|⁻|⁼|⁽|⁾|ⁿ)+)/ie" => "unicodeSuperScriptToRefbase('\\1')", // function 'unicodeSuperScriptToRefbase()' will convert Unicode superscript entities to appropriate refbase superscript markup
  73. "/((?:₁|₂|₃|₄|₅|₆|₇|₈|₉|₀|₊|₋|₌|₍|₎)+)/ie" => "unicodeSubScriptToRefbase('\\1')", // function 'unicodeSubScriptToRefbase()' will convert Unicode subscript entities to appropriate refbase subscript markup
  74. // Note that, when matching superscript or subscript Unicode characters, we cannot use the double-byte characters within character classes
  75. // (like [¹²³⁴⁵⁶⁷⁸⁹⁰⁺⁻⁼⁽⁾ⁿ] or ([₁₂₃₄₅₆₇₈₉₀₊₋₌₍₎]) since this may cause the single-byte parts of these characters to be matched and replaced as well!
  76. );
  77. $unicodeSuperScriptSearchReplaceActionsArray = array(
  78. '/¹/' => "1", // <U00B9> (superscript one)
  79. '/²/' => "2", // <U00B2> (superscript two)
  80. '/³/' => "3", // <U00B3> (superscript three)
  81. '/⁴/' => "4", // <U2074> (superscript four)
  82. '/⁵/' => "5", // <U2075> (superscript five)
  83. '/⁶/' => "6", // <U2076> (superscript six)
  84. '/⁷/' => "7", // <U2077> (superscript seven)
  85. '/⁸/' => "8", // <U2078> (superscript eight)
  86. '/⁹/' => "9", // <U2079> (superscript nine)
  87. '/⁰/' => "0", // <U2070> (superscript zero)
  88. '/⁺/' => "+", // <U207A> (superscript plus sign)
  89. '/⁻/' => "-", // <U207B> (superscript minus)
  90. '/⁼/' => "=", // <U207C> (superscript equals sign)
  91. '/⁽/' => "(", // <U207D> (superscript left parenthesis)
  92. '/⁾/' => ")", // <U207E> (superscript right parenthesis)
  93. '/ⁿ/' => "n", // <U207F> (superscript latin small letter n)
  94. );
  95. $unicodeSubScriptSearchReplaceActionsArray = array(
  96. '/₁/' => "1", // <U2081> (subscript one)
  97. '/₂/' => "2", // <U2082> (subscript two)
  98. '/₃/' => "3", // <U2083> (subscript three)
  99. '/₄/' => "4", // <U2084> (subscript four)
  100. '/₅/' => "5", // <U2085> (subscript five)
  101. '/₆/' => "6", // <U2086> (subscript six)
  102. '/₇/' => "7", // <U2087> (subscript seven)
  103. '/₈/' => "8", // <U2088> (subscript eight)
  104. '/₉/' => "9", // <U2089> (subscript nine)
  105. '/₀/' => "0", // <U2080> (subscript zero)
  106. '/₊/' => "+", // <U208A> (subscript plus sign)
  107. '/₋/' => "-", // <U208B> (subscript minus)
  108. '/₌/' => "=", // <U208C> (subscript equals sign)
  109. '/₍/' => "(", // <U208D> (subscript left parenthesis)
  110. '/₎/' => ")", // <U208E> (subscript right parenthesis)
  111. );
  112. // --------------------------------------------------------------------
  113. // Converts Unicode superscript entities to appropriate refbase superscript markup:
  114. function unicodeSuperScriptToRefbase($sourceString)
  115. {
  116. global $unicodeSuperScriptSearchReplaceActionsArray;
  117. $sourceString = searchReplaceText($unicodeSuperScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
  118. return "[super:" . $sourceString . "]";
  119. }
  120. // --------------------------------------------------------------------
  121. // Converts Unicode subscript entities to appropriate refbase subscript markup:
  122. function unicodeSubScriptToRefbase($sourceString)
  123. {
  124. global $unicodeSubScriptSearchReplaceActionsArray;
  125. $sourceString = searchReplaceText($unicodeSubScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
  126. return "[sub:" . $sourceString . "]";
  127. }
  128. ?>