<?php
/*
CZECH TYPO

This function replaces some widely used symbol combinations
with corresponding typographic entities for czech language:
... => &hellip;
--  => &ndash;
--- => &mdash;
""  => &bdquo;&ldquo;
It will not do any replaces between '<' and '>' characters
and inside PRE and CODE containers.

Usage:
$result = czechtypo($text);

Want english version? Just convert it in your favorite shell
like this :-)
   sed -e 's/czech/english/;s/CZECH/ENGLISH/;' \
   -e 's/ldquo/rdquo/;s/bdquo/ldquo/;16,21D' \
   czechtypo.php > englishtypo.php

2007, Radek Brich (radek at brich.org)
public domain, no rights reserved
*/


// read a token -- that is a HTML tag, continuous dashes or dots,
// or just one char
function readtoken($text, &$i)
{
	$state = 0;
	$output = '';
	while ($state < 10 && $i < strlen($text))
	{
		$c = $text[$i++];
		switch ($state)
		{
			// start
			case 0:
			if ($c == '<')
				$state = 1;
			else
			if ($c == '-')
				$state = 2;
			else
			if ($c == '.')
				$state = 3;
			else
				$state = 10;
			break;

			// tag
			case 1:
			if ($c == '>')
				$state = 10;
			break;

			// dashes
			case 2:
			if ($c != '-')
				$state = 11;
			break;

			// dots
			case 3:
			if ($c != '.')
				$state = 11;
			break;
		}
		if ($state == 11)
			$i--;
		else
			$output .= $c;
	}
	return $output;
}

// replace quotes, ellipsis and dashes with proper typographic
// entities for czech language
function czechtypo($text)
{
	$state = 0;
	$quot = 0;
	$i = 0;
	$output = '';
	while (($token = readtoken($text, $i)) != '')
	{
		switch ($state)
		{
			// start
			case 0:
			if (!strcmp($token, '--'))
				$output .= '&ndash;';
			else
			if (!strcmp($token, '---'))
				$output .= '&mdash;';
			else
			if (!strcmp($token, '...'))
				$output .= '&hellip;';
			else
			if ($token == '"')
			{
				if ($quot)
					$output .= '&ldquo;';
				else
					$output .= '&bdquo;';
				$quot = !$quot;
			}
			else
				$output .= $token;
			if (stristr($token, '<pre')
			|| stristr($token, '<code'))
				$state = 1;
			break;

			// inside PRE or CODE
			case 1:
			if (stristr($token, '/pre')
			|| stristr($token, '/code'))
				$state = 0;
			$output .= $token;
			break;
		}
	}
	return $output;
}

?>