Bug 819432: Execute queries in two steps to improve performance

r=dkl a=LpSolit
4.4
Frédéric Buclin 2013-01-16 19:10:07 +01:00
parent 5fc1fc3ac7
commit 129c03ff64
8 changed files with 218 additions and 47 deletions

View File

@ -30,9 +30,10 @@ use Data::Dumper;
use Date::Format;
use Date::Parse;
use Scalar::Util qw(blessed);
use List::MoreUtils qw(all part uniq);
use List::MoreUtils qw(all firstidx part uniq);
use POSIX qw(INT_MAX);
use Storable qw(dclone);
use Time::HiRes qw(gettimeofday tv_interval);
# Description Of Boolean Charts
# -----------------------------
@ -682,7 +683,70 @@ sub new {
# Public Accessors #
####################
sub sql {
sub data {
my $self = shift;
return $self->{data} if $self->{data};
my $dbh = Bugzilla->dbh;
# If all fields belong to the 'bugs' table, there is no need to split
# the original query into two pieces. Else we override the 'fields'
# argument to first get bug IDs based on the search criteria defined
# by the caller, and the desired fields are collected in the 2nd query.
my @orig_fields = $self->_input_columns;
my $all_in_bugs_table = 1;
foreach my $field (@orig_fields) {
next if $self->COLUMNS->{$field}->{name} =~ /^bugs\.\w+$/;
$self->{fields} = ['bug_id'];
$all_in_bugs_table = 0;
last;
}
my $start_time = [gettimeofday()];
my $sql = $self->_sql;
# Do we just want bug IDs to pass to the 2nd query or all the data immediately?
my $func = $all_in_bugs_table ? 'selectall_arrayref' : 'selectcol_arrayref';
my $bug_ids = $dbh->$func($sql);
my @extra_data = ({sql => $sql, time => tv_interval($start_time)});
# Restore the original 'fields' argument, just in case.
$self->{fields} = \@orig_fields unless $all_in_bugs_table;
# If there are no bugs found, or all fields are in the 'bugs' table,
# there is no need for another query.
if (!scalar @$bug_ids || $all_in_bugs_table) {
$self->{data} = $bug_ids;
return wantarray ? ($self->{data}, \@extra_data) : $self->{data};
}
# Make sure the bug_id will be returned. If not, append it to the list.
my $pos = firstidx { $_ eq 'bug_id' } @orig_fields;
if ($pos < 0) {
push(@orig_fields, 'bug_id');
$pos = $#orig_fields;
}
# Now create a query with the buglist above as the single criteria
# and the fields that the caller wants. No need to redo security checks;
# the list has already been validated above.
my $search = $self->new('fields' => \@orig_fields,
'params' => {bug_id => $bug_ids, bug_id_type => 'anyexact'},
'sharer' => $self->_sharer_id,
'user' => $self->_user,
'allow_unlimited' => 1,
'_no_security_check' => 1);
$start_time = [gettimeofday()];
$sql = $search->_sql;
my $unsorted_data = $dbh->selectall_arrayref($sql);
push(@extra_data, {sql => $sql, time => tv_interval($start_time)});
# Let's sort the data. We didn't do it in the query itself because
# we already know in which order to sort bugs thanks to the first query,
# and this avoids additional table joins in the SQL query.
my %data = map { $_->[$pos] => $_ } @$unsorted_data;
$self->{data} = [map { $data{$_} } @$bug_ids];
return wantarray ? ($self->{data}, \@extra_data) : $self->{data};
}
sub _sql {
my ($self) = @_;
return $self->{sql} if $self->{sql};
my $dbh = Bugzilla->dbh;
@ -716,7 +780,7 @@ sub search_description {
# Make sure that the description has actually been generated if
# people are asking for the whole thing.
else {
$self->sql;
$self->_sql;
}
return $self->{'search_description'};
}
@ -1112,6 +1176,7 @@ sub _standard_joins {
my ($self) = @_;
my $user = $self->_user;
my @joins;
return () if $self->{_no_security_check};
my $security_join = {
table => 'bug_group_map',
@ -1188,6 +1253,7 @@ sub _translate_join {
# group security.
sub _standard_where {
my ($self) = @_;
return ('1=1') if $self->{_no_security_check};
# If replication lags badly between the shadow db and the main DB,
# it's possible for bugs to show up in searches before their group
# controls are properly set. To prevent this, when initially creating
@ -2949,3 +3015,109 @@ sub _translate_old_column {
}
1;
__END__
=head1 NAME
Bugzilla::Search - Provides methods to run queries against bugs.
=head1 SYNOPSIS
use Bugzilla::Search;
my $search = new Bugzilla::Search({'fields' => \@fields,
'params' => \%search_criteria,
'sharer' => $sharer_id,
'user' => $user_obj,
'allow_unlimited' => 1});
my $data = $search->data;
my ($data, $extra_data) = $search->data;
=head1 DESCRIPTION
Search.pm represents a search object. It's the single way to collect
data about bugs in a secure way. The list of bugs matching criteria
defined by the caller are filtered based on the user privileges.
=head1 METHODS
=head2 new
=over
=item B<Description>
Create a Bugzilla::Search object.
=item B<Params>
=over
=item C<fields>
An arrayref representing the bug attributes for which data is desired.
Legal attributes are listed in the fielddefs DB table. At least one field
must be defined, typically the 'bug_id' field.
=item C<params>
A hashref representing search criteria. Each key => value pair represents
a search criteria, where the key is the search field and the value is the
value for this field. At least one search criteria must be defined if the
'search_allow_no_criteria' parameter is turned off, else an error is thrown.
=item C<sharer>
When a saved search is shared by a user, this is his user ID.
=item C<user>
A L<Bugzilla::User> object representing the user to whom the data is addressed.
All security checks are done based on this user object, so it's not safe
to share results of the query with other users as not all users have the
same privileges or have the same role for all bugs in the list. If this
parameter is not defined, then the currently logged in user is taken into
account. If no user is logged in, then only public bugs will be returned.
=item C<allow_unlimited>
If set to a true value, the number of bugs retrieved by the query is not
limited.
=back
=item B<Returns>
A L<Bugzilla::Search> object.
=back
=head2 data
=over
=item B<Description>
Returns bugs matching search criteria passed to C<new()>.
=item B<Params>
None
=item B<Returns>
In scalar context, this method returns a reference to a list of bugs.
Each item of the list represents a bug, which is itself a reference to
a list where each item represents a bug attribute, in the same order as
specified in the C<fields> parameter of C<new()>.
In list context, this methods also returns a reference to a list containing
references to hashes. For each hash, two keys are defined: C<sql> contains
the SQL query which has been executed, and C<time> contains the time spent
to execute the SQL query, in seconds. There can be either a single hash, or
two hashes if two SQL queries have been executed sequentially to get all the
required data.
=back

View File

@ -30,7 +30,6 @@ use Bugzilla::Status;
use Bugzilla::Token;
use Date::Parse;
use Time::HiRes qw(gettimeofday tv_interval);
my $cgi = Bugzilla->cgi;
my $dbh = Bugzilla->dbh;
@ -682,8 +681,7 @@ my $search = new Bugzilla::Search('fields' => \@selectcolumns,
'params' => scalar $params->Vars,
'order' => \@order_columns,
'sharer' => $sharer_id);
my $query = $search->sql;
$vars->{'search_description'} = $search->search_description;
$order = join(',', $search->order);
if (scalar @{$search->invalid_order_columns}) {
@ -703,18 +701,6 @@ $params->delete('limit') if $vars->{'default_limited'};
# Query Execution
################################################################################
if ($cgi->param('debug')) {
$vars->{'debug'} = 1;
$vars->{'query'} = $query;
# Explains are limited to admins because you could use them to figure
# out how many hidden bugs are in a particular product (by doing
# searches and looking at the number of rows the explain says it's
# examining).
if ($user->in_group('admin')) {
$vars->{'query_explain'} = $dbh->bz_explain($query);
}
}
# Time to use server push to display an interim message to the user until
# the query completes and we can display the bug list.
if ($serverpush) {
@ -747,11 +733,25 @@ $::SIG{TERM} = 'DEFAULT';
$::SIG{PIPE} = 'DEFAULT';
# Execute the query.
my $start_time = [gettimeofday()];
my $buglist_sth = $dbh->prepare($query);
$buglist_sth->execute();
$vars->{query_time} = tv_interval($start_time);
my ($data, $extra_data) = $search->data;
$vars->{'search_description'} = $search->search_description;
if ($cgi->param('debug')) {
$vars->{'debug'} = 1;
$vars->{'queries'} = $extra_data;
my $query_time = 0;
$query_time += $_->{'time'} foreach @$extra_data;
$vars->{'query_time'} = $query_time;
# Explains are limited to admins because you could use them to figure
# out how many hidden bugs are in a particular product (by doing
# searches and looking at the number of rows the explain says it's
# examining).
if ($user->in_group('admin')) {
foreach my $query (@$extra_data) {
$query->{explain} = $dbh->bz_explain($query->{sql});
}
}
}
################################################################################
# Results Retrieval
@ -783,14 +783,14 @@ my @bugidlist;
my @bugs; # the list of records
while (my @row = $buglist_sth->fetchrow_array()) {
foreach my $row (@$data) {
my $bug = {}; # a record
# Slurp the row of data into the record.
# The second from last column in the record is the number of groups
# to which the bug is restricted.
foreach my $column (@selectcolumns) {
$bug->{$column} = shift @row;
$bug->{$column} = shift @$row;
}
# Process certain values further (i.e. date format conversion).

View File

@ -484,8 +484,7 @@ sub CollectSeriesData {
'fields' => ["bug_id"],
'allow_unlimited' => 1,
'user' => $user);
my $sql = $search->sql;
$data = $shadow_dbh->selectall_arrayref($sql);
$data = $search->data;
};
if (!$@) {

View File

@ -163,13 +163,12 @@ my $search = new Bugzilla::Search(
params => scalar $params->Vars,
allow_unlimited => 1,
);
my $query = $search->sql;
$::SIG{TERM} = 'DEFAULT';
$::SIG{PIPE} = 'DEFAULT';
my $dbh = Bugzilla->switch_to_shadow_db();
my $results = $dbh->selectall_arrayref($query);
Bugzilla->switch_to_shadow_db();
my ($results, $extra_data) = $search->data;
# We have a hash of hashes for the data itself, and a hash to hold the
# row/col/table names.
@ -256,7 +255,7 @@ if ($formatparam eq "bar") {
$vars->{'width'} = $width;
$vars->{'height'} = $height;
$vars->{'query'} = $query;
$vars->{'queries'} = $extra_data;
$vars->{'saved_report_id'} = $cgi->param('saved_report_id');
$vars->{'debug'} = $cgi->param('debug');

View File

@ -72,10 +72,13 @@
[% IF debug %]
<div class="bz_query_debug">
<p>[% query FILTER html %]</p>
<p>Execution time: [% query_time FILTER html %] seconds</p>
[% IF query_explain.defined %]
<pre>[% query_explain FILTER html %]</pre>
<p>Total execution time: [% query_time FILTER html %] seconds</p>
[% FOREACH query = queries %]
<p>[% query.sql FILTER html %]</p>
<p>Execution time: [% query.time FILTER html %] seconds</p>
[% IF query.explain %]
<pre>[% query.explain FILTER html %]</pre>
[% END %]
[% END %]
</div>
[% END %]

View File

@ -61,7 +61,9 @@
%]
[% IF debug %]
<p>[% query FILTER html %]</p>
[% FOREACH query = queries %]
<p>[% query.sql FILTER html %]</p>
[% END %]
[% END %]
<div align="center">

View File

@ -453,7 +453,7 @@ sub run_queries {
'order' => \@orderstrings
);
# If a query fails for whatever reason, it shouldn't kill the script.
my $sqlquery = eval { $search->sql };
my $data = eval { $search->data };
if ($@) {
print STDERR get_text('whine_query_failed', { query_name => $thisquery->{'name'},
author => $args->{'author'},
@ -461,15 +461,12 @@ sub run_queries {
next;
}
$sth = $dbh->prepare($sqlquery);
$sth->execute;
while (my @row = $sth->fetchrow_array) {
foreach my $row (@$data) {
my $bug = {};
for my $field (@searchfields) {
my $fieldname = $field;
$fieldname =~ s/^bugs\.//; # No need for bugs.whatever
$bug->{$fieldname} = shift @row;
$bug->{$fieldname} = shift @$row;
}
if ($thisquery->{'onemailperbug'}) {

View File

@ -530,13 +530,13 @@ sub do_tests {
my $sql;
TODO: {
local $TODO = $search_broken if $search_broken;
lives_ok { $sql = $search->sql } "$name: generate SQL";
lives_ok { $sql = $search->_sql } "$name: generate SQL";
}
my $results;
SKIP: {
skip "Can't run SQL without any SQL", 1 if !defined $sql;
$results = $self->_test_sql($sql);
$results = $self->_test_sql($search);
}
$self->_test_content($results, $sql);
@ -553,12 +553,11 @@ sub _test_search_object_creation {
}
sub _test_sql {
my ($self, $sql) = @_;
my $dbh = Bugzilla->dbh;
my ($self, $search) = @_;
my $name = $self->name;
my $results;
lives_ok { $results = $dbh->selectall_arrayref($sql) } "$name: Run SQL Query"
or diag($sql);
lives_ok { $results = $search->data } "$name: Run SQL Query"
or diag($search->_sql);
return $results;
}