accept-to-gettext.inc 6.82 KiB
<?php
/*
 * accept-to-gettext.inc -- convert information in 'Accept-*' headers to
 * gettext language identifiers.
 * Copyright (c) 2003, Wouter Verhelst <wouter@debian.org>
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
 * Usage:
 *  $locale=al2gt(<array of supported languages/charsets in gettext syntax>,
 *                <MIME type of document>);
 *  setlocale('LC_ALL', $locale); // or 'LC_MESSAGES', or whatever...
 * Example:
 *  $langs=array('nl_BE.ISO-8859-15','nl_BE.UTF-8','en_US.UTF-8','en_GB.UTF-8');
 *  $locale=al2gt($langs, 'text/html');
 *  setlocale('LC_ALL', $locale);
 * Note that this will send out header information (to be
 * RFC2616-compliant), so it must be called before anything is sent to
 * the user.
 * Assumptions made:
 * * Charset encodings are written the same way as the Accept-Charset
 *   HTTP header specifies them (RFC2616), except that they're parsed
 *   case-insensitive.
 * * Country codes and language codes are the same in both gettext and
 *   the Accept-Language syntax (except for the case differences, which
 *   are dealt with easily). If not, some input may be ignored.
 * * The provided gettext-strings are fully qualified; i.e., no "en_US";
 *   always "en_US.ISO-8859-15" or "en_US.UTF-8", or whichever has been
 *   used. "en.ISO-8859-15" is OK, though.
 * * The language is more important than the charset; i.e., if the
 *   following is given:
 *   Accept-Language: nl-be, nl;q=0.8, en-us;q=0.5, en;q=0.3
 *   Accept-Charset: ISO-8859-15, utf-8;q=0.5
 *   And the supplied parameter contains (amongst others) nl_BE.UTF-8
 *   and nl.ISO-8859-15, then nl_BE.UTF-8 will be picked.
 * $Log: accept-to-gettext.inc,v $
 * Revision 1.1.1.1  2003/11/19 19:31:15  wouter
 * * moved to new CVS repo after death of the old
 * * Fixed code to apply a default to both Accept-Charset and
 *   Accept-Language if none of those headers are supplied; patch from
 *   Dominic Chambers <dominic@encasa.com>
 * Revision 1.2  2003/08/14 10:23:59  wouter
 * Removed little error in Content-Type header syntaxis.
/* not really important, this one; perhaps I could've put it inline with
 * the rest. */
function find_match($curlscore,$curcscore,$curgtlang,$langval,$charval,
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
$gtlang) { if($curlscore < $langval) { $curlscore=$langval; $curcscore=$charval; $curgtlang=$gtlang; } else if ($curlscore == $langval) { if($curcscore < $charval) { $curcscore=$charval; $curgtlang=$gtlang; } } return array($curlscore, $curcscore, $curgtlang); } function al2gt($gettextlangs, $mime) { /* Check if ACCEPT_LANGUAGE isset */ if(!isset($_SERVER["HTTP_ACCEPT_LANGUAGE"])){ $_SERVER["HTTP_ACCEPT_LANGUAGE"] = ""; } if(!isset($_SERVER["HTTP_ACCEPT_CHARSET"])){ $_SERVER["HTTP_ACCEPT_CHARSET"] = ""; } /* default to "everything is acceptable", as RFC2616 specifies */ $acceptLang=(($_SERVER["HTTP_ACCEPT_LANGUAGE"] == '') ? '*' : $_SERVER["HTTP_ACCEPT_LANGUAGE"]); $acceptChar=(($_SERVER["HTTP_ACCEPT_CHARSET"] == '') ? 'ISO-8859-1,utf-8;q=0.7,*;q=0.7' : $_SERVER["HTTP_ACCEPT_CHARSET"]); $alparts=@preg_split("/,/",$acceptLang); $acparts=@preg_split("/,/",$acceptChar); /* Parse the contents of the Accept-Language header.*/ foreach($alparts as $part) { $part=trim($part); if(preg_match("/;/", $part)) { $lang=@preg_split("/;/",$part); $score=@preg_split("/=/",$lang[1]); $alscores[$lang[0]]=$score[1]; } else { $alscores[$part]=1; } } /* Do the same for the Accept-Charset header. */ /* RFC2616: ``If no "*" is present in an Accept-Charset field, then * all character sets not explicitly mentioned get a quality value of * 0, except for ISO-8859-1, which gets a quality value of 1 if not * explicitly mentioned.'' * * Making it 2 for the time being, so that we * can distinguish between "not specified" and "specified as 1" later * on. */ $acscores["ISO-8859-1"]=2; foreach($acparts as $part) { $part=trim($part); if(preg_match("/;/", $part)) { $cs=@preg_split("/;/",$part); $score=@preg_split("/=/",$cs[1]); $acscores[strtoupper($cs[0])]=$score[1]; } else { $acscores[strtoupper($part)]=1; } } if($acscores["ISO-8859-1"]==2) { $acscores["ISO-8859-1"]=(isset($acscores["*"])?$acscores["*"]:1);
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
} /* * Loop through the available languages/encodings, and pick the one * with the highest score, excluding the ones with a charset the user * did not include. */ $curlscore=0; $curcscore=0; $curgtlang=NULL; foreach($gettextlangs as $gtlang) { $tmp1=preg_replace("/\_/","-",$gtlang); $tmp2=@preg_split("/\./",$tmp1); $allang=strtolower($tmp2[0]); $gtcs=strtoupper($tmp2[1]); $noct=@preg_split("/-/",$allang); if(!isset($alscores["*"])){ $alscores["*"] = ""; } if(!isset($alscores[$allang])){ $alscores[$allang] = ""; } if(!isset($alscores[$noct[0]])){ $alscores[$noct[0]] = ""; } if(!isset($acscores[$gtcs])){ $acscores[$gtcs] = ""; } $testvals=array( array($alscores[$allang], $acscores[$gtcs]), array($alscores[$noct[0]], $acscores[$gtcs]), array($alscores[$allang], $acscores["*"]), array($alscores[$noct[0]], $acscores["*"]), array($alscores["*"], $acscores[$gtcs]), array($alscores["*"], $acscores["*"])); $found=FALSE; foreach($testvals as $tval) { if(!$found && isset($tval[0]) && isset($tval[1])) { $arr=find_match($curlscore, $curcscore, $curgtlang, $tval[0], $tval[1], $gtlang); $curlscore=$arr[0]; $curcscore=$arr[1]; $curgtlang=$arr[2]; $found=TRUE; } } } /* We must re-parse the gettext-string now, since we may have found it * through a "*" qualifier.*/ $gtparts=@preg_split("/\./",$curgtlang); $tmp=strtolower($gtparts[0]); $lang=preg_replace("/\_/", "-", $tmp); if (!headers_sent()){ header("Content-Language: $lang"); if(isset($gtparts[1])){ $charset=$gtparts[1]; header("Content-Type: $mime; charset=$charset"); } } return $curgtlang; }
211212213
// vim:tabstop=2:expandtab:shiftwidth=2:filetype=php:syntax:ruler: ?>