You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

158 lines
7.5 KiB

<?php
// Project: Web Reference Database (refbase) <http://www.refbase.net>
// Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
// original author(s).
//
// This code is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY. Please see the GNU General Public
// License for more details.
//
// File: ./includes/transtab_refbase_unicode.inc.php
// Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/includes/transtab_refbase_unicode.inc.php $
// Author(s): Matthias Steffens <mailto:refbase@extracts.de>
//
// Created: 02-Jun-06, 01:41
// Modified: $Date: 2008-07-30 14:50:42 +0000 (Wed, 30 Jul 2008) $
// $Author: msteffens $
// $Revision: 1183 $
// Search & replace patterns and functions for conversion from refbase markup to Unicode entities.
// Search & replace patterns must be specified as perl-style regular expression and search patterns must include the leading & trailing slashes.
global $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
$transtab_refbase_unicode = array(
// "/__(?!_)(.+?)__/" => '\\1', // the pattern for underline (__...__) must come before the one for italic (_..._)
// "/_(.+?)_/" => '\\1', // fontshape markup is currently NOT converted (uncomment to strip fontshape markup from exported text)
// "/\\*\\*(.+?)\\*\\*/" => '\\1',
"/\\[super:(.+?)\\]/ie" => "superScriptToUnicode('\\1')", // function 'superScriptToUnicode()' will convert superscript text to appropriate Unicode entities
"/\\[sub:(.+?)\\]/ie" => "subScriptToUnicode('\\1')", // function 'subScriptToUnicode()' will convert subscript text to appropriate Unicode entities
"/\\[permil\\]/" => '‰', // <U2030> (per mille sign)
"/\\[infinity\\]/" => '∞', // <U221E> (infinity)
"/\\[alpha\\]/" => 'α',
"/\\[beta\\]/" => 'β',
"/\\[gamma\\]/" => 'γ',
"/\\[delta\\]/" => 'δ',
"/\\[epsilon\\]/" => 'ε',
"/\\[zeta\\]/" => 'ζ',
"/\\[eta\\]/" => 'η',
"/\\[theta\\]/" => 'θ',
"/\\[iota\\]/" => 'ι',
"/\\[kappa\\]/" => 'κ',
"/\\[lambda\\]/" => 'λ',
"/\\[mu\\]/" => 'μ',
"/\\[nu\\]/" => 'ν',
"/\\[xi\\]/" => 'ξ',
"/\\[omicron\\]/" => 'ο',
"/\\[pi\\]/" => 'π',
"/\\[rho\\]/" => 'ρ',
"/\\[sigmaf\\]/" => 'ς',
"/\\[sigma\\]/" => 'σ',
"/\\[tau\\]/" => 'τ',
"/\\[upsilon\\]/" => 'υ',
"/\\[phi\\]/" => 'φ',
"/\\[chi\\]/" => 'χ',
"/\\[psi\\]/" => 'ψ',
"/\\[omega\\]/" => 'ω',
"/\\[Alpha\\]/" => 'Α',
"/\\[Beta\\]/" => 'Β',
"/\\[Gamma\\]/" => 'Γ',
"/\\[Delta\\]/" => 'Δ',
"/\\[Epsilon\\]/" => 'Ε',
"/\\[Zeta\\]/" => 'Ζ',
"/\\[Eta\\]/" => 'Η',
"/\\[Theta\\]/" => 'Θ',
"/\\[Iota\\]/" => 'Ι',
"/\\[Kappa\\]/" => 'Κ',
"/\\[Lambda\\]/" => 'Λ',
"/\\[Mu\\]/" => 'Μ',
"/\\[Nu\\]/" => 'Ν',
"/\\[Xi\\]/" => 'Ξ',
"/\\[Omicron\\]/" => 'Ο',
"/\\[Pi\\]/" => 'Π',
"/\\[Rho\\]/" => 'Ρ',
"/\\[Sigma\\]/" => 'Σ',
"/\\[Tau\\]/" => 'Τ',
"/\\[Upsilon\\]/" => 'Υ',
"/\\[Phi\\]/" => 'Φ',
"/\\[Chi\\]/" => 'Χ',
"/\\[Psi\\]/" => 'Ψ',
"/\\[Omega\\]/" => 'Ω',
"/\"(.+?)\"/" => '“\\1”', // <U201C>...<U201D> (left and right double quotation marks)
"/ +- +/" => ' – ', // <U2013> (endash)
"/–/$patternModifiers" => '–' // <U2013> (endash)
// Note that for UTF-8 based systems, '$patternModifiers' contains the "u" (PCRE_UTF8) pattern modifier which causes PHP/PCRE
// to treat pattern strings as UTF-8 (otherwise this conversion pattern would garble UTF-8 characters such as "Ö")
);
$unicodeSuperScriptSearchReplaceActionsArray = array(
"/1/" => '¹', // <U00B9> (superscript one)
"/2/" => '²', // <U00B2> (superscript two)
"/3/" => '³', // <U00B3> (superscript three)
"/4/" => '⁴', // <U2074> (superscript four)
"/5/" => '⁵', // <U2075> (superscript five)
"/6/" => '⁶', // <U2076> (superscript six)
"/7/" => '⁷', // <U2077> (superscript seven)
"/8/" => '⁸', // <U2078> (superscript eight)
"/9/" => '⁹', // <U2079> (superscript nine)
"/0/" => '⁰', // <U2070> (superscript zero)
"/\\+/" => '⁺', // <U207A> (superscript plus sign)
"/-/" => '⁻', // <U207B> (superscript minus)
"/=/" => '⁼', // <U207C> (superscript equals sign)
"/\\(/" => '⁽', // <U207D> (superscript left parenthesis)
"/\\)/" => '⁾', // <U207E> (superscript right parenthesis)
"/n/" => 'ⁿ', // <U207F> (superscript latin small letter n)
"/([^¹²³⁴⁵⁶⁷⁸⁹⁰⁺⁻⁼⁽⁾ⁿ]+)/" => '[super:\\1]' // keep superscript markup in place for any text that has no matching superscript entity in Unicode
);
$unicodeSubScriptSearchReplaceActionsArray = array(
"/1/" => '₁', // <U2081> (subscript one)
"/2/" => '₂', // <U2082> (subscript two)
"/3/" => '₃', // <U2083> (subscript three)
"/4/" => '₄', // <U2084> (subscript four)
"/5/" => '₅', // <U2085> (subscript five)
"/6/" => '₆', // <U2086> (subscript six)
"/7/" => '₇', // <U2087> (subscript seven)
"/8/" => '₈', // <U2088> (subscript eight)
"/9/" => '₉', // <U2089> (subscript nine)
"/0/" => '₀', // <U2080> (subscript zero)
"/\\+/" => '₊', // <U208A> (subscript plus sign)
"/-/" => '₋', // <U208B> (subscript minus)
"/=/" => '₌', // <U208C> (subscript equals sign)
"/\\(/" => '₍', // <U208D> (subscript left parenthesis)
"/\\)/" => '₎', // <U208E> (subscript right parenthesis)
"/([^₁₂₃₄₅₆₇₈₉₀₊₋₌₍₎]+)/" => '[sub:\\1]' // keep subscript markup in place for any text that has no matching subscript entity in Unicode
);
// --------------------------------------------------------------------
// Converts superscript text to appropriate Unicode entities:
function superScriptToUnicode($sourceString)
{
global $unicodeSuperScriptSearchReplaceActionsArray;
$sourceString = searchReplaceText($unicodeSuperScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
return $sourceString;
}
// --------------------------------------------------------------------
// Converts subscript text to appropriate Unicode entities:
function subScriptToUnicode($sourceString)
{
global $unicodeSubScriptSearchReplaceActionsArray;
$sourceString = searchReplaceText($unicodeSubScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
return $sourceString;
}
?>