<?php /* CZECH TYPO This function replaces some widely used symbol combinations with corresponding typographic entities for czech language: ... => … -- => – --- => — "" => „“ It will not do any replaces between '<' and '>' characters and inside PRE and CODE containers. Usage: $result = czechtypo($text); Want english version? Just convert it in your favorite shell like this :-) sed -e 's/czech/english/;s/CZECH/ENGLISH/;' \ -e 's/ldquo/rdquo/;s/bdquo/ldquo/;16,21D' \ czechtypo.php > englishtypo.php 2007, Radek Brich (radek at brich.org) public domain, no rights reserved */ // read a token -- that is a HTML tag, continuous dashes or dots, // or just one char function readtoken($text, &$i) { $state = 0; $output = ''; while ($state < 10 && $i < strlen($text)) { $c = $text[$i++]; switch ($state) { // start case 0: if ($c == '<') $state = 1; else if ($c == '-') $state = 2; else if ($c == '.') $state = 3; else $state = 10; break; // tag case 1: if ($c == '>') $state = 10; break; // dashes case 2: if ($c != '-') $state = 11; break; // dots case 3: if ($c != '.') $state = 11; break; } if ($state == 11) $i--; else $output .= $c; } return $output; } // replace quotes, ellipsis and dashes with proper typographic // entities for czech language function czechtypo($text) { $state = 0; $quot = 0; $i = 0; $output = ''; while (($token = readtoken($text, $i)) != '') { switch ($state) { // start case 0: if (!strcmp($token, '--')) $output .= '–'; else if (!strcmp($token, '---')) $output .= '—'; else if (!strcmp($token, '...')) $output .= '…'; else if ($token == '"') { if ($quot) $output .= '“'; else $output .= '„'; $quot = !$quot; } else $output .= $token; if (stristr($token, '<pre') || stristr($token, '<code')) $state = 1; break; // inside PRE or CODE case 1: if (stristr($token, '/pre') || stristr($token, '/code')) $state = 0; $output .= $token; break; } } return $output; } ?>