Bug 142119 - Sphinx: indexing code, searching code to be done
git-svn-id: svn://svn.office.custis.ru/3rdparty/bugzilla.org/trunk@1886 6955db30-a419-402b-8a0d-67ecbb4d7f56master
parent
d55b895c2f
commit
b9653028c0
10
Bugzilla.pm
10
Bugzilla.pm
|
@ -610,6 +610,16 @@ sub dbh {
|
|||
return $class->request_cache->{dbh};
|
||||
}
|
||||
|
||||
sub dbh_sphinx
|
||||
{
|
||||
my $class = shift;
|
||||
if (!exists $class->request_cache->{dbh_sphinx})
|
||||
{
|
||||
$class->request_cache->{dbh_sphinx} = Bugzilla::DB::connect_sphinx();
|
||||
}
|
||||
return $class->request_cache->{dbh_sphinx};
|
||||
}
|
||||
|
||||
sub dbh_main {
|
||||
my $class = shift;
|
||||
$class->request_cache->{dbh_main} ||= Bugzilla::DB::connect_main();
|
||||
|
|
|
@ -1197,7 +1197,6 @@ sub _extract_multi_selects {
|
|||
# Should be called any time you update short_desc or change a comment.
|
||||
sub _sync_fulltext
|
||||
{
|
||||
use utf8;
|
||||
my ($self, $new_bug) = @_;
|
||||
my $dbh = Bugzilla->dbh;
|
||||
my ($short_desc) = $dbh->selectrow_array(
|
||||
|
@ -1214,21 +1213,34 @@ sub _sync_fulltext
|
|||
$nopriv = join "\n", @$nopriv;
|
||||
$priv = join "\n", @$priv;
|
||||
my $row = [ $short_desc, $nopriv, $priv ];
|
||||
$_ = $dbh->quote_fulltext($_) for @$row;
|
||||
## O_o Don't know how can it be tainted here, sometimes it was. Checking if it goes away.
|
||||
#trick_taint($row);
|
||||
# Determine if we are using Sphinx or MySQL fulltext search
|
||||
my ($sph, $id_field);
|
||||
my $index = Bugzilla->localconfig->{sphinx_index};
|
||||
if ($index)
|
||||
{
|
||||
$sph = Bugzilla->dbh_sphinx;
|
||||
$id_field = 'id';
|
||||
$_ = $sph->quote($_) for @$row;
|
||||
}
|
||||
else
|
||||
{
|
||||
$index = 'bugs_fulltext';
|
||||
$sph = $dbh;
|
||||
$id_field = 'bug_id';
|
||||
$_ = $dbh->quote_fulltext($_) for @$row;
|
||||
}
|
||||
my $sql;
|
||||
if ($new_bug)
|
||||
{
|
||||
$sql = "INSERT INTO bugs_fulltext (bug_id, short_desc, comments, comments_private)".
|
||||
$sql = "INSERT INTO $index ($id_field, short_desc, comments, comments_private)".
|
||||
" VALUES (".join(',', $self->id, @$row).")";
|
||||
}
|
||||
else
|
||||
{
|
||||
$sql = "UPDATE bugs_fulltext SET short_desc=$row->[0],".
|
||||
" comments=$row->[1], comments_private=$row->[2] WHERE bug_id=".$self->id;
|
||||
$sql = "UPDATE $index SET short_desc=$row->[0],".
|
||||
" comments=$row->[1], comments_private=$row->[2] WHERE $id_field=".$self->id;
|
||||
}
|
||||
return $dbh->do($sql);
|
||||
return $sph->do($sql);
|
||||
}
|
||||
|
||||
# This is the correct way to delete bugs from the DB.
|
||||
|
|
|
@ -96,6 +96,33 @@ sub connect_main {
|
|||
$lc->{db_sock}, $lc->{db_user}, $lc->{db_pass});
|
||||
}
|
||||
|
||||
sub connect_sphinx
|
||||
{
|
||||
my $lc = Bugzilla->localconfig;
|
||||
if (!$lc->{sphinx_index})
|
||||
{
|
||||
return undef;
|
||||
}
|
||||
|
||||
my $host = $lc->{sphinx_host};
|
||||
my $port = $lc->{sphinx_port};
|
||||
my $sock = $lc->{sphinx_sock};
|
||||
|
||||
my $dsn = "dbi:mysql:host=$host;database=none";
|
||||
$dsn .= ";port=$port" if $port;
|
||||
$dsn .= ";mysql_socket=$sock" if $sock;
|
||||
|
||||
my $sphinx = DBI->connect($dsn, 'nobody', 'nobody', {
|
||||
mysql_enable_utf8 => 1,
|
||||
# Needs to be explicitly specified for command-line processes.
|
||||
mysql_auto_reconnect => 1,
|
||||
});
|
||||
|
||||
$sphinx->do("SET NAMES utf8");
|
||||
|
||||
return $sphinx;
|
||||
}
|
||||
|
||||
sub _connect {
|
||||
my ($driver, $host, $dbname, $port, $sock, $user, $pass) = @_;
|
||||
|
||||
|
|
|
@ -3286,8 +3286,17 @@ sub _populate_bugs_fulltext
|
|||
my $bug_ids = shift;
|
||||
$bug_ids = undef if $bug_ids && !@$bug_ids;
|
||||
my $dbh = Bugzilla->dbh;
|
||||
my ($table, $limit1, $id) = ('bugs_fulltext', $dbh->sql_limit(1), 'bug_id');
|
||||
my $fulltext = $dbh->selectrow_array("SELECT $id FROM $table $limit1");
|
||||
# These vary between different fulltext search engines (MySQL, Sphinx)
|
||||
my ($table, $limit1, $id_field, $quote, $sph) = ('bugs_fulltext', $dbh->sql_limit(1), 'bug_id', 'quote_fulltext', $dbh);
|
||||
if (Bugzilla->localconfig->{sphinx_index})
|
||||
{
|
||||
$sph = Bugzilla->dbh_sphinx;
|
||||
$limit1 = 'LIMIT 1';
|
||||
$table = Bugzilla->localconfig->{sphinx_index};
|
||||
$id_field = 'id';
|
||||
$quote = 'quote';
|
||||
}
|
||||
my $fulltext = $sph->selectrow_array("SELECT $id_field FROM $table $limit1");
|
||||
my ($datasize, $time) = (0, time);
|
||||
my ($lastdata, $lasttime) = ($datasize, $time);
|
||||
# We only populate the table if it's empty or if we've been given a
|
||||
|
@ -3298,52 +3307,66 @@ sub _populate_bugs_fulltext
|
|||
$bug_ids ||= $dbh->selectcol_arrayref("SELECT bug_id FROM bugs");
|
||||
return if !$bug_ids;
|
||||
|
||||
# There could be tons of bugs, so we'll use 256-bug portions
|
||||
# There could be tons of bugs, so we use 256-bug portions,
|
||||
# and limit single query to 4MB
|
||||
print "Populating full-text index... (this can take a long time.)\n";
|
||||
my ($portion, $done, $total) = (256, 0, scalar @$bug_ids);
|
||||
my ($short, $all, $nopriv, $wh, $rows);
|
||||
while (my @ids = splice @$bug_ids, 0, $portion)
|
||||
my ($portion, $done, $max_packet, $total) = (256, 0, 4*1024*1024, scalar @$bug_ids);
|
||||
my ($short, $all, $nopriv, $wh);
|
||||
|
||||
# Count lengths in bytes
|
||||
use bytes;
|
||||
my $rows = {};
|
||||
while (@$bug_ids || %$rows)
|
||||
{
|
||||
$rows = {};
|
||||
$wh = "bug_id IN (" . join(",", ("?") x @ids) . ")";
|
||||
($short) = $dbh->selectall_arrayref(
|
||||
"SELECT bug_id, short_desc FROM bugs WHERE $wh", undef, @ids
|
||||
);
|
||||
$all = $dbh->selectall_arrayref(
|
||||
"SELECT bug_id, thetext, isprivate FROM longdescs WHERE $wh",
|
||||
undef, @ids
|
||||
);
|
||||
# Local block with 'use bytes' for counting data size in MB
|
||||
if (scalar keys %$rows < $portion)
|
||||
{
|
||||
use bytes;
|
||||
# Read more data
|
||||
my @ids = splice @$bug_ids, 0, $portion;
|
||||
$wh = "bug_id IN (" . join(",", ("?") x @ids) . ")";
|
||||
# Get bug titles
|
||||
($short) = $dbh->selectall_arrayref(
|
||||
"SELECT bug_id, short_desc FROM bugs WHERE $wh", undef, @ids
|
||||
);
|
||||
for (@$short)
|
||||
{
|
||||
$rows->{$_->[0]} = [ $_->[1], '', '' ];
|
||||
$datasize += length $_->[1];
|
||||
}
|
||||
# Comments divide into non-private and private
|
||||
# Get comments; they can be private and non-private
|
||||
$all = $dbh->selectall_arrayref(
|
||||
"SELECT bug_id, thetext, isprivate FROM longdescs WHERE $wh",
|
||||
undef, @ids
|
||||
);
|
||||
for (@$all)
|
||||
{
|
||||
$rows->{$_->[0]}->[$_->[2] ? 2 : 1] .= $_->[1] . "\n";
|
||||
$datasize += length($_->[1])+1;
|
||||
}
|
||||
}
|
||||
my $query = "INSERT INTO $table ($id_field, short_desc, comments, comments_private) VALUES ";
|
||||
my $len = 0;
|
||||
my @ids;
|
||||
for (keys %$rows)
|
||||
{
|
||||
Encode::_utf8_off($_) for @{$rows->{$_}};
|
||||
for (@{$rows->{$_}})
|
||||
{
|
||||
Encode::_utf8_off($_);
|
||||
$datasize += length $_;
|
||||
}
|
||||
my $s = "($_, ".join(', ', map { $sph->$quote($_) } @{$rows->{$_}})."), ";
|
||||
if ($len + length $s >= $max_packet)
|
||||
{
|
||||
last;
|
||||
}
|
||||
delete $rows->{$_};
|
||||
push @ids, $_;
|
||||
$query .= $s;
|
||||
$len += length $s;
|
||||
}
|
||||
for (keys %$rows)
|
||||
{
|
||||
# CustIS Bug 46221 - Snowball stemmers in Bugzilla fulltext search
|
||||
$rows->{$_} = join ', ', map { $dbh->quote_fulltext($_) } @{$rows->{$_}};
|
||||
}
|
||||
$dbh->do("DELETE FROM $table WHERE $id IN (".join(',', @ids).')');
|
||||
$dbh->do(
|
||||
"INSERT INTO $table ($id, short_desc, comments, comments_private) VALUES ".
|
||||
join(", ", map { "($_, $rows->{$_})" } @ids)
|
||||
);
|
||||
substr $query, -2, 2, '';
|
||||
$sph->do("DELETE FROM $table WHERE $id_field IN (".join(',', @ids).')');
|
||||
$sph->do($query);
|
||||
$done += @ids;
|
||||
print "\r$done / $total, ".sprintf("%.2f MB, %d KB/s", $datasize/1048576, ($datasize-$lastdata)/1024/(time-$lasttime));
|
||||
print "\r$done / $total, ";
|
||||
printf("%.2f MB, %d KB/s", $datasize/1048576, ($datasize-$lastdata)/1024/(time-$lasttime));
|
||||
print " ...";
|
||||
($lastdata, $lasttime) = ($datasize, time);
|
||||
}
|
||||
|
|
|
@ -150,6 +150,26 @@ EOT
|
|||
# want that.
|
||||
EOT
|
||||
},
|
||||
{
|
||||
name => 'sphinx_host',
|
||||
default => 'localhost',
|
||||
desc => "# The DNS name of the host that the Sphinx server runs on.\n"
|
||||
},
|
||||
{
|
||||
name => 'sphinx_index',
|
||||
default => '',
|
||||
desc => "# The name of Sphinx Search index to use for fulltext search ('' to disable Sphinx support)\n"
|
||||
},
|
||||
{
|
||||
name => 'sphinx_port',
|
||||
default => 0,
|
||||
desc => "# Sphinx port (listening MySQL protocol)\n",
|
||||
},
|
||||
{
|
||||
name => 'sphinx_sock',
|
||||
default => '',
|
||||
desc => "# Sphinx UNIX socket (listening MySQL protocol)\n",
|
||||
},
|
||||
{
|
||||
name => 'db_check',
|
||||
default => 1,
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
# Add this to your sphinx.conf to use Sphinx search
|
||||
|
||||
index bugs
|
||||
{
|
||||
type = rt
|
||||
path = /var/lib/sphinxsearch/data/bugs
|
||||
rt_field = short_desc
|
||||
rt_field = comments
|
||||
rt_field = comments_private
|
||||
docinfo = extern
|
||||
enable_star = 1
|
||||
charset_type = utf-8
|
||||
charset_table = 0..9, A..Z->a..z, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
|
||||
blend_chars = _, -, &, +, @, $
|
||||
morphology = stem_enru
|
||||
min_word_len = 2
|
||||
}
|
Loading…
Reference in New Issue