You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

415 lines
8.4 KiB

  1. <?php
  2. // Project: Web Reference Database (refbase) <http://www.refbase.net>
  3. // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
  4. // original author(s).
  5. //
  6. // This code is distributed in the hope that it will be useful,
  7. // but WITHOUT ANY WARRANTY. Please see the GNU General Public
  8. // License for more details.
  9. //
  10. // File: ./includes/transtab_latin1_ascii.inc.php
  11. // Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/includes/transtab_latin1_ascii.inc.php $
  12. // Author(s): Matthias Steffens <mailto:refbase@extracts.de>
  13. //
  14. // Created: 24-Aug-05, 20:11
  15. // Modified: $Date: 2007-02-17 01:10:14 +0000 (Sat, 17 Feb 2007) $
  16. // $Author: msteffens $
  17. // $Revision: 894 $
  18. // This is a transliteration table for a best-effort conversion from ISO-8859-1 to ASCII. It contains a list of substitution strings for 'ISO-8859-1 West European' characters,
  19. // comparable to the fallback notations that people use commonly in email and on typewriters to represent unavailable characters. Adopted from 'transtab' by Markus Kuhn
  20. // (transtab.utf v1.8 2000-10-12 11:01:28+01 mgk25 Exp); see <http://www.cl.cam.ac.uk/~mgk25/unicode.html> for more info about Unicode and transtab.
  21. $transtab_latin1_ascii = array(
  22. // APOSTROPHE
  23. "'" => "'",
  24. // <U0027> <U2019>
  25. // GRAVE ACCENT
  26. "`" => "'",
  27. // <U0060> <U201B>;<U2018>
  28. // NO-BREAK SPACE
  29. "" => " ",
  30. // <U00A0> <U0020>
  31. // INVERTED EXCLAMATION MARK
  32. "" => "!",
  33. // <U00A1> <U0021>
  34. // CENT SIGN
  35. "" => "c",
  36. // <U00A2> <U0063>
  37. // POUND SIGN
  38. "" => "GBP",
  39. // <U00A3> "<U0047><U0042><U0050>"
  40. // YEN SIGN
  41. "" => "Y",
  42. // <U00A5> <U0059>
  43. // BROKEN BAR
  44. "" => "|",
  45. // <U00A6> <U007C>
  46. // SECTION SIGN
  47. "" => "S",
  48. // <U00A7> <U0053>
  49. // DIAERESIS
  50. "" => "\"",
  51. // <U00A8> <U0022>
  52. // COPYRIGHT SIGN
  53. "" => "(c)", // "c"
  54. // <U00A9> "<U0028><U0063><U0029>";<U0063>
  55. // FEMININE ORDINAL INDICATOR
  56. "" => "a",
  57. // <U00AA> <U0061>
  58. // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
  59. "" => "<<",
  60. // <U00AB> "<U003C><U003C>"
  61. // NOT SIGN
  62. "" => "-",
  63. // <U00AC> <U002D>
  64. // SOFT HYPHEN
  65. "" => "-",
  66. // <U00AD> <U002D>
  67. // REGISTERED SIGN
  68. "" => "(R)",
  69. // <U00AE> "<U0028><U0052><U0029>"
  70. // MACRON
  71. "" => "-",
  72. // <U00AF> <U002D>
  73. // DEGREE SIGN
  74. "" => " ",
  75. // <U00B0> <U0020>
  76. // PLUS-MINUS SIGN
  77. "" => "+/-",
  78. // <U00B1> "<U002B><U002F><U002D>"
  79. // SUPERSCRIPT TWO
  80. "" => "^2", // "2"
  81. // <U00B2> "<U005E><U0032>";<U0032>
  82. // SUPERSCRIPT THREE
  83. "" => "^3", // "3"
  84. // <U00B3> "<U005E><U0033>";<U0033>
  85. // ACUTE ACCENT
  86. "" => "'",
  87. // <U00B4> <U0027>
  88. // MICRO SIGN
  89. "" => "u",
  90. // <U00B5> <U03BC>;<U0075>
  91. // PILCROW SIGN
  92. "" => "P",
  93. // <U00B6> <U0050>
  94. // MIDDLE DOT
  95. "" => ".",
  96. // <U00B7> <U002E>
  97. // CEDILLA
  98. "" => ",",
  99. // <U00B8> <U002C>
  100. // SUPERSCRIPT ONE
  101. "" => "^1", // "1"
  102. // <U00B9> "<U005E><U0031>";<U0031>
  103. // MASCULINE ORDINAL INDICATOR
  104. "" => "o",
  105. // <U00BA> <U006F>
  106. // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
  107. "" => ">>",
  108. // <U00BB> "<U003E><U003E>"
  109. // VULGAR FRACTION ONE QUARTER
  110. "" => " 1/4",
  111. // <U00BC> "<U0020><U0031><U002F><U0034>"
  112. // VULGAR FRACTION ONE HALF
  113. "" => " 1/2",
  114. // <U00BD> "<U0020><U0031><U002F><U0032>"
  115. // VULGAR FRACTION THREE QUARTERS
  116. "" => " 3/4",
  117. // <U00BE> "<U0020><U0033><U002F><U0034>"
  118. // INVERTED QUESTION MARK
  119. "" => "?",
  120. // <U00BF> <U003F>
  121. // LATIN CAPITAL LETTER A WITH GRAVE
  122. "" => "A",
  123. // <U00C0> <U0041>
  124. // LATIN CAPITAL LETTER A WITH ACUTE
  125. "" => "A",
  126. // <U00C1> <U0041>
  127. // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
  128. "" => "A",
  129. // <U00C2> <U0041>
  130. // LATIN CAPITAL LETTER A WITH TILDE
  131. "" => "A",
  132. // <U00C3> <U0041>
  133. // LATIN CAPITAL LETTER A WITH DIAERESIS
  134. "" => "Ae", // "A"
  135. // <U00C4> "<U0041><U0065>";<U0041>
  136. // LATIN CAPITAL LETTER A WITH RING ABOVE
  137. "" => "Aa", // "A"
  138. // <U00C5> "<U0041><U0061>";<U0041>
  139. // LATIN CAPITAL LETTER AE
  140. "" => "AE", // "A"
  141. // <U00C6> "<U0041><U0045>";<U0041>
  142. // LATIN CAPITAL LETTER C WITH CEDILLA
  143. "" => "C",
  144. // <U00C7> <U0043>
  145. // LATIN CAPITAL LETTER E WITH GRAVE
  146. "" => "E",
  147. // <U00C8> <U0045>
  148. // LATIN CAPITAL LETTER E WITH ACUTE
  149. "" => "E",
  150. // <U00C9> <U0045>
  151. // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
  152. "" => "E",
  153. // <U00CA> <U0045>
  154. // LATIN CAPITAL LETTER E WITH DIAERESIS
  155. "" => "E",
  156. // <U00CB> <U0045>
  157. // LATIN CAPITAL LETTER I WITH GRAVE
  158. "" => "I",
  159. // <U00CC> <U0049>
  160. // LATIN CAPITAL LETTER I WITH ACUTE
  161. "" => "I",
  162. // <U00CD> <U0049>
  163. // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
  164. "" => "I",
  165. // <U00CE> <U0049>
  166. // LATIN CAPITAL LETTER I WITH DIAERESIS
  167. "" => "I",
  168. // <U00CF> <U0049>
  169. // LATIN CAPITAL LETTER ETH
  170. "" => "D",
  171. // <U00D0> <U0044>
  172. // LATIN CAPITAL LETTER N WITH TILDE
  173. "" => "N",
  174. // <U00D1> <U004E>
  175. // LATIN CAPITAL LETTER O WITH GRAVE
  176. "" => "O",
  177. // <U00D2> <U004F>
  178. // LATIN CAPITAL LETTER O WITH ACUTE
  179. "" => "O",
  180. // <U00D3> <U004F>
  181. // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
  182. "" => "O",
  183. // <U00D4> <U004F>
  184. // LATIN CAPITAL LETTER O WITH TILDE
  185. "" => "O",
  186. // <U00D5> <U004F>
  187. // LATIN CAPITAL LETTER O WITH DIAERESIS
  188. "" => "Oe", // "O"
  189. // <U00D6> "<U004F><U0065>";<U004F>
  190. // MULTIPLICATION SIGN
  191. "" => "x",
  192. // <U00D7> <U0078>
  193. // LATIN CAPITAL LETTER O WITH STROKE
  194. "" => "O",
  195. // <U00D8> <U004F>
  196. // LATIN CAPITAL LETTER U WITH GRAVE
  197. "" => "U",
  198. // <U00D9> <U0055>
  199. // LATIN CAPITAL LETTER U WITH ACUTE
  200. "" => "U",
  201. // <U00DA> <U0055>
  202. // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
  203. "" => "U",
  204. // <U00DB> <U0055>
  205. // LATIN CAPITAL LETTER U WITH DIAERESIS
  206. "" => "Ue", // "U"
  207. // <U00DC> "<U0055><U0065>";<U0055>
  208. // LATIN CAPITAL LETTER Y WITH ACUTE
  209. "" => "Y",
  210. // <U00DD> <U0059>
  211. // LATIN CAPITAL LETTER THORN
  212. "" => "Th",
  213. // <U00DE> "<U0054><U0068>"
  214. // LATIN SMALL LETTER SHARP S
  215. "" => "ss",
  216. // <U00DF> "<U0073><U0073>";<U03B2>
  217. // LATIN SMALL LETTER A WITH GRAVE
  218. "" => "a",
  219. // <U00E0> <U0061>
  220. // LATIN SMALL LETTER A WITH ACUTE
  221. "" => "a",
  222. // <U00E1> <U0061>
  223. // LATIN SMALL LETTER A WITH CIRCUMFLEX
  224. "" => "a",
  225. // <U00E2> <U0061>
  226. // LATIN SMALL LETTER A WITH TILDE
  227. "" => "a",
  228. // <U00E3> <U0061>
  229. // LATIN SMALL LETTER A WITH DIAERESIS
  230. "" => "ae", // "a"
  231. // <U00E4> "<U0061><U0065>";<U0061>
  232. // LATIN SMALL LETTER A WITH RING ABOVE
  233. "" => "aa", // "a"
  234. // <U00E5> "<U0061><U0061>";<U0061>
  235. // LATIN SMALL LETTER AE
  236. "" => "ae", // "a"
  237. // <U00E6> "<U0061><U0065>";<U0061>
  238. // LATIN SMALL LETTER C WITH CEDILLA
  239. "" => "c",
  240. // <U00E7> <U0063>
  241. // LATIN SMALL LETTER E WITH GRAVE
  242. "" => "e",
  243. // <U00E8> <U0065>
  244. // LATIN SMALL LETTER E WITH ACUTE
  245. "" => "e",
  246. // <U00E9> <U0065>
  247. // LATIN SMALL LETTER E WITH CIRCUMFLEX
  248. "" => "e",
  249. // <U00EA> <U0065>
  250. // LATIN SMALL LETTER E WITH DIAERESIS
  251. "" => "e",
  252. // <U00EB> <U0065>
  253. // LATIN SMALL LETTER I WITH GRAVE
  254. "" => "i",
  255. // <U00EC> <U0069>
  256. // LATIN SMALL LETTER I WITH ACUTE
  257. "" => "i",
  258. // <U00ED> <U0069>
  259. // LATIN SMALL LETTER I WITH CIRCUMFLEX
  260. "" => "i",
  261. // <U00EE> <U0069>
  262. // LATIN SMALL LETTER I WITH DIAERESIS
  263. "" => "i",
  264. // <U00EF> <U0069>
  265. // LATIN SMALL LETTER ETH
  266. "" => "d",
  267. // <U00F0> <U0064>
  268. // LATIN SMALL LETTER N WITH TILDE
  269. "" => "n",
  270. // <U00F1> <U006E>
  271. // LATIN SMALL LETTER O WITH GRAVE
  272. "" => "o",
  273. // <U00F2> <U006F>
  274. // LATIN SMALL LETTER O WITH ACUTE
  275. "" => "o",
  276. // <U00F3> <U006F>
  277. // LATIN SMALL LETTER O WITH CIRCUMFLEX
  278. "" => "o",
  279. // <U00F4> <U006F>
  280. // LATIN SMALL LETTER O WITH TILDE
  281. "" => "o",
  282. // <U00F5> <U006F>
  283. // LATIN SMALL LETTER O WITH DIAERESIS
  284. "" => "oe", // "o"
  285. // <U00F6> "<U006F><U0065>";<U006F>
  286. // DIVISION SIGN
  287. "" => ":",
  288. // <U00F7> <U003A>
  289. // LATIN SMALL LETTER O WITH STROKE
  290. "" => "o",
  291. // <U00F8> <U006F>
  292. // LATIN SMALL LETTER U WITH GRAVE
  293. "" => "u",
  294. // <U00F9> <U0075>
  295. // LATIN SMALL LETTER U WITH ACUTE
  296. "" => "u",
  297. // <U00FA> <U0075>
  298. // LATIN SMALL LETTER U WITH CIRCUMFLEX
  299. "" => "u",
  300. // <U00FB> <U0075>
  301. // LATIN SMALL LETTER U WITH DIAERESIS
  302. "" => "ue", // "u"
  303. // <U00FC> "<U0075><U0065>";<U0075>
  304. // LATIN SMALL LETTER Y WITH ACUTE
  305. "" => "y",
  306. // <U00FD> <U0079>
  307. // LATIN SMALL LETTER THORN
  308. "" => "th",
  309. // <U00FE> "<U0074><U0068>"
  310. // LATIN SMALL LETTER Y WITH DIAERESIS
  311. "" => "y"
  312. // <U00FF> <U0079>
  313. );
  314. ?>