Only unescape correct UTF8 sequences in MediaWiki page sections

i18n
Vitaliy Filippov 2016-09-01 20:57:51 +03:00
parent 13f6cca220
commit 55bee6cdcc
1 changed files with 8 additions and 2 deletions

View File

@ -422,9 +422,15 @@ sub unquote_wiki_url
my $anchor = '';
if ($article =~ s/#(.*)$//so)
{
my $a;
$anchor = $1;
# decode MediaWiki page section name
$anchor =~ tr/./%/;
# decode MediaWiki page section name (only correct UTF8 sequences)
$anchor =~ s/(
\.[0-7][A-F0-9]|
\.[CD][A-F0-9]\.[89AB][A-F0-9]|
\.E[A-F0-9](?:\.[89AB][A-F0-9]){2}|
\.F[0-7](?:\.[89AB][A-F0-9]){3}
)/($a = $1), ($a =~ tr!.!\%!), ($a)/gesx;
$anchor = url_decode($anchor);
$anchor =~ tr/_/ /;
}