Do not require HTML::Scrubber to correctly strip unsafe tags

hinted-selects
Vitaliy Filippov 2014-08-20 18:12:20 +04:00
parent 6c03a67bc7
commit c005274130
3 changed files with 29 additions and 87 deletions

View File

@ -258,19 +258,6 @@ sub OPTIONAL_MODULES {
version => 0, version => 0,
feature => ['jsonrpc', 'xmlrpc'], feature => ['jsonrpc', 'xmlrpc'],
}, },
{
# We need the 'utf8_mode' method of HTML::Parser, for HTML::Scrubber.
package => 'HTML-Parser',
module => 'HTML::Parser',
version => '3.40',
feature => ['html_desc'],
},
{
package => 'HTML-Scrubber',
module => 'HTML::Scrubber',
version => 0,
feature => ['html_desc'],
},
# Inbound Email # Inbound Email
{ {

View File

@ -160,80 +160,36 @@ sub html_quote {
return $var; return $var;
} }
sub _skip_attrs
{
my ($tag, $attrs) = @_;
$tag = lc $tag;
return "<$tag>" if $tag =~ m!^/!so;
my ($enclosed) = $attrs =~ m!/$!so ? ' /' : '';
$attrs = { $attrs =~ /([^\s=]+)=([^\s=\'\"]+|\"[^\"]*\"|\'[^\']*\')/gso };
my $new = {};
for (qw(name id class style title))
{
$new->{$_} = $attrs->{$_} if $attrs->{$_};
}
my %l = (a => 'href', blockquote => 'cite', q => 'cite');
if ($attrs->{$l{$tag}} && $attrs->{$l{$tag}} !~ /^[\"\']?javascript/iso)
{
$new->{$l{$tag}} = $attrs->{$l{$tag}};
}
return "<$tag".join("", map { " $_=".$new->{$_} } keys %$new).$enclosed.">";
}
sub html_light_quote { sub html_light_quote {
my ($text) = @_; my ($text) = @_;
# List of allowed HTML elements having no attributes. # List of allowed HTML elements having no attributes.
my @allow = qw(b strong em i u p br abbr acronym ins del cite code var my @allow = qw(
dfn samp kbd big small sub sup tt dd dt dl ul li ol a b big blockquote strong em i u p br abbr acronym ins del cite code var
fieldset legend); dfn samp kbd q small span sub sup tt dd dt dl ul li ol fieldset legend
);
if (!Bugzilla->feature('html_desc')) { my $safe = join('|', @allow);
my $safe = join('|', @allow); $text =~ s{(<(/?(?:$safe))(\s+(?:[^>"']+|"[^"]*"|'[^']*')*)?>)|(<)|(>)}{($1 ? _skip_attrs($2, $3) : ($4 ? '&lt;' : '&gt;'))}egiso;
my $chr = chr(1); return $text;
# First, escape safe elements.
$text =~ s#<($safe)>#$chr$1$chr#go;
$text =~ s#</($safe)>#$chr/$1$chr#go;
# Now filter < and >.
$text =~ s#<#&lt;#g;
$text =~ s#>#&gt;#g;
# Restore safe elements.
$text =~ s#$chr/($safe)$chr#</$1>#go;
$text =~ s#$chr($safe)$chr#<$1>#go;
return $text;
}
else {
# We can be less restrictive. We can accept elements with attributes.
push(@allow, qw(a blockquote q span));
# Allowed protocols.
my $safe_protocols = join('|', SAFE_PROTOCOLS);
my $protocol_regexp = qr{(^(?:$safe_protocols):|^[^:]+$)}i;
# Deny all elements and attributes unless explicitly authorized.
my @default = (0 => {
id => 1,
name => 1,
class => 1,
'*' => 0, # Reject all other attributes.
}
);
# Specific rules for allowed elements. If no specific rule is set
# for a given element, then the default is used.
my @rules = (a => {
href => $protocol_regexp,
title => 1,
id => 1,
name => 1,
class => 1,
'*' => 0, # Reject all other attributes.
},
blockquote => {
cite => $protocol_regexp,
id => 1,
name => 1,
class => 1,
'*' => 0, # Reject all other attributes.
},
'q' => {
cite => $protocol_regexp,
id => 1,
name => 1,
class => 1,
'*' => 0, # Reject all other attributes.
},
);
my $scrubber = HTML::Scrubber->new(default => \@default,
allow => \@allow,
rules => \@rules,
comment => 0,
process => 0);
return $scrubber->scrub($text);
}
} }
sub email_filter { sub email_filter {
@ -1150,8 +1106,8 @@ deleted.
=item C<html_light_quote($val)> =item C<html_light_quote($val)>
Returns a string where only explicitly allowed HTML elements and attributes Returns a string where only explicitly allowed HTML elements and attributes
are kept. All HTML elements and attributes not being in the whitelist are either are kept. All HTML elements not being in the whitelist are escaped; all HTML
escaped (if HTML::Scrubber is not installed) or removed. attributes no being in the whitelist are removed.
=item C<url_quote($val)> =item C<url_quote($val)>

View File

@ -53,7 +53,6 @@ END
feature_auth_ldap => 'LDAP Authentication', feature_auth_ldap => 'LDAP Authentication',
feature_auth_radius => 'RADIUS Authentication', feature_auth_radius => 'RADIUS Authentication',
feature_graphical_reports => 'Graphical Reports', feature_graphical_reports => 'Graphical Reports',
feature_html_desc => 'More HTML in Product/Group Descriptions',
feature_inbound_email => 'Inbound Email', feature_inbound_email => 'Inbound Email',
feature_jobqueue => 'Mail Queueing', feature_jobqueue => 'Mail Queueing',
feature_jsonrpc => 'JSON-RPC Interface', feature_jsonrpc => 'JSON-RPC Interface',