Do not require HTML::Scrubber to correctly strip unsafe tags
parent
6c03a67bc7
commit
c005274130
|
@ -258,19 +258,6 @@ sub OPTIONAL_MODULES {
|
|||
version => 0,
|
||||
feature => ['jsonrpc', 'xmlrpc'],
|
||||
},
|
||||
{
|
||||
# We need the 'utf8_mode' method of HTML::Parser, for HTML::Scrubber.
|
||||
package => 'HTML-Parser',
|
||||
module => 'HTML::Parser',
|
||||
version => '3.40',
|
||||
feature => ['html_desc'],
|
||||
},
|
||||
{
|
||||
package => 'HTML-Scrubber',
|
||||
module => 'HTML::Scrubber',
|
||||
version => 0,
|
||||
feature => ['html_desc'],
|
||||
},
|
||||
|
||||
# Inbound Email
|
||||
{
|
||||
|
|
102
Bugzilla/Util.pm
102
Bugzilla/Util.pm
|
@ -160,80 +160,36 @@ sub html_quote {
|
|||
return $var;
|
||||
}
|
||||
|
||||
sub _skip_attrs
|
||||
{
|
||||
my ($tag, $attrs) = @_;
|
||||
$tag = lc $tag;
|
||||
return "<$tag>" if $tag =~ m!^/!so;
|
||||
my ($enclosed) = $attrs =~ m!/$!so ? ' /' : '';
|
||||
$attrs = { $attrs =~ /([^\s=]+)=([^\s=\'\"]+|\"[^\"]*\"|\'[^\']*\')/gso };
|
||||
my $new = {};
|
||||
for (qw(name id class style title))
|
||||
{
|
||||
$new->{$_} = $attrs->{$_} if $attrs->{$_};
|
||||
}
|
||||
my %l = (a => 'href', blockquote => 'cite', q => 'cite');
|
||||
if ($attrs->{$l{$tag}} && $attrs->{$l{$tag}} !~ /^[\"\']?javascript/iso)
|
||||
{
|
||||
$new->{$l{$tag}} = $attrs->{$l{$tag}};
|
||||
}
|
||||
return "<$tag".join("", map { " $_=".$new->{$_} } keys %$new).$enclosed.">";
|
||||
}
|
||||
|
||||
sub html_light_quote {
|
||||
my ($text) = @_;
|
||||
|
||||
# List of allowed HTML elements having no attributes.
|
||||
my @allow = qw(b strong em i u p br abbr acronym ins del cite code var
|
||||
dfn samp kbd big small sub sup tt dd dt dl ul li ol
|
||||
fieldset legend);
|
||||
|
||||
if (!Bugzilla->feature('html_desc')) {
|
||||
my $safe = join('|', @allow);
|
||||
my $chr = chr(1);
|
||||
|
||||
# First, escape safe elements.
|
||||
$text =~ s#<($safe)>#$chr$1$chr#go;
|
||||
$text =~ s#</($safe)>#$chr/$1$chr#go;
|
||||
# Now filter < and >.
|
||||
$text =~ s#<#<#g;
|
||||
$text =~ s#>#>#g;
|
||||
# Restore safe elements.
|
||||
$text =~ s#$chr/($safe)$chr#</$1>#go;
|
||||
$text =~ s#$chr($safe)$chr#<$1>#go;
|
||||
return $text;
|
||||
}
|
||||
else {
|
||||
# We can be less restrictive. We can accept elements with attributes.
|
||||
push(@allow, qw(a blockquote q span));
|
||||
|
||||
# Allowed protocols.
|
||||
my $safe_protocols = join('|', SAFE_PROTOCOLS);
|
||||
my $protocol_regexp = qr{(^(?:$safe_protocols):|^[^:]+$)}i;
|
||||
|
||||
# Deny all elements and attributes unless explicitly authorized.
|
||||
my @default = (0 => {
|
||||
id => 1,
|
||||
name => 1,
|
||||
class => 1,
|
||||
'*' => 0, # Reject all other attributes.
|
||||
}
|
||||
);
|
||||
|
||||
# Specific rules for allowed elements. If no specific rule is set
|
||||
# for a given element, then the default is used.
|
||||
my @rules = (a => {
|
||||
href => $protocol_regexp,
|
||||
title => 1,
|
||||
id => 1,
|
||||
name => 1,
|
||||
class => 1,
|
||||
'*' => 0, # Reject all other attributes.
|
||||
},
|
||||
blockquote => {
|
||||
cite => $protocol_regexp,
|
||||
id => 1,
|
||||
name => 1,
|
||||
class => 1,
|
||||
'*' => 0, # Reject all other attributes.
|
||||
},
|
||||
'q' => {
|
||||
cite => $protocol_regexp,
|
||||
id => 1,
|
||||
name => 1,
|
||||
class => 1,
|
||||
'*' => 0, # Reject all other attributes.
|
||||
},
|
||||
);
|
||||
|
||||
my $scrubber = HTML::Scrubber->new(default => \@default,
|
||||
allow => \@allow,
|
||||
rules => \@rules,
|
||||
comment => 0,
|
||||
process => 0);
|
||||
|
||||
return $scrubber->scrub($text);
|
||||
}
|
||||
my @allow = qw(
|
||||
a b big blockquote strong em i u p br abbr acronym ins del cite code var
|
||||
dfn samp kbd q small span sub sup tt dd dt dl ul li ol fieldset legend
|
||||
);
|
||||
my $safe = join('|', @allow);
|
||||
$text =~ s{(<(/?(?:$safe))(\s+(?:[^>"']+|"[^"]*"|'[^']*')*)?>)|(<)|(>)}{($1 ? _skip_attrs($2, $3) : ($4 ? '<' : '>'))}egiso;
|
||||
return $text;
|
||||
}
|
||||
|
||||
sub email_filter {
|
||||
|
@ -1150,8 +1106,8 @@ deleted.
|
|||
=item C<html_light_quote($val)>
|
||||
|
||||
Returns a string where only explicitly allowed HTML elements and attributes
|
||||
are kept. All HTML elements and attributes not being in the whitelist are either
|
||||
escaped (if HTML::Scrubber is not installed) or removed.
|
||||
are kept. All HTML elements not being in the whitelist are escaped; all HTML
|
||||
attributes no being in the whitelist are removed.
|
||||
|
||||
=item C<url_quote($val)>
|
||||
|
||||
|
|
|
@ -53,7 +53,6 @@ END
|
|||
feature_auth_ldap => 'LDAP Authentication',
|
||||
feature_auth_radius => 'RADIUS Authentication',
|
||||
feature_graphical_reports => 'Graphical Reports',
|
||||
feature_html_desc => 'More HTML in Product/Group Descriptions',
|
||||
feature_inbound_email => 'Inbound Email',
|
||||
feature_jobqueue => 'Mail Queueing',
|
||||
feature_jsonrpc => 'JSON-RPC Interface',
|
||||
|
|
Loading…
Reference in New Issue