Code: Select all
function assert_equals($expected, $got, $message = ""){
if($got!==$expected){
var_dump($message, "EXPECTED:", $expected, "GOT:", $got);
exit(1);
}
}
function test($locale, $regex, $string, $expected_result){
assert_equals($locale, setlocale(LC_CTYPE, $locale), "setlocale");
assert_equals($expected_result, preg_match($regex, $string), array($locale, $regex, $string, bin2hex($string)));
}
$iso_8859_string = "\xe9"; // é
$utf8_string = "\xc3\xa9"; // é
$match_one_w = '/^\w$/';
// C Locale: doesn't match (as expected)
test("C", $match_one_w, $iso_8859_string, 0);
test("C", $match_one_w, $utf8_string, 0);
// fr_FR locale: only matches when iso-8859-1 encoded (as expected)
test("fr_FR", $match_one_w, $iso_8859_string, 1);
test("fr_FR", $match_one_w, $utf8_string, 0);
// fr_FR.UTF8 locale
test("fr_FR.UTF8", $match_one_w, $iso_8859_string, 0); // since it's smart enough not to match here
test("fr_FR.UTF8", $match_one_w, $utf8_string, 0); // might have expected a match here
// Unicode regex always matches utf8 string, regardless of locale?
test("C", '/^\pL$/u', $utf8_string, 1);
test("fr_FR", '/^\pL$/u', $utf8_string, 1);
test("fr_FR.UTF8", '/^\pL$/u', $utf8_string, 1);