From 3e58bcd54f064c48e8fc0e9d3267f5b1c620f375 Mon Sep 17 00:00:00 2001 From: vfilippov Date: Tue, 15 Sep 2009 12:33:27 +0000 Subject: [PATCH] Bug 46221 Relevance + AND by default git-svn-id: svn://svn.office.custis.ru/3rdparty/bugzilla.org/trunk@427 6955db30-a419-402b-8a0d-67ecbb4d7f56 --- Bugzilla/DB/Mysql.pm | 31 +++++++++++++++++++------------ Bugzilla/Util.pm | 14 +++++++++++--- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/Bugzilla/DB/Mysql.pm b/Bugzilla/DB/Mysql.pm index 20f0035ee..8b2b3a3ad 100644 --- a/Bugzilla/DB/Mysql.pm +++ b/Bugzilla/DB/Mysql.pm @@ -165,22 +165,29 @@ sub sql_string_concat { sub sql_fulltext_search { my ($self, $column, $text) = @_; - # Add the boolean mode modifier if the search string contains - # boolean operators at the start or end of a word. - my $mode = ''; - if ($text =~ /(?:^|\W)[+\-<>~"()]/ || $text =~ /[()"*](?:$|\W)/) { - $mode = 'IN BOOLEAN MODE'; - - # quote un-quoted compound words - my @words = quotewords('[\s()]+', 'delimiters', $text); - foreach my $word (@words) { + # quote un-quoted compound words + my @words = quotewords('[\s()]+', 'delimiters', $text); + if ($text =~ /(?:^|\W)[+\-<>~"()]/ || $text =~ /[()"*](?:$|\W)/) + { + # already a boolean mode search + foreach my $word (@words) + { # match words that have non-word chars in the middle of them - if ($word =~ /\w\W+\w/ && $word !~ m/"/) { + if ($word =~ /\w\W+\w/ && $word !~ /\"/) + { $word = '"' . $word . '"'; } } - $text = join('', @words); } + else + { + # make search a boolean mode search + for (@words) + { + $_ = "+$_*" if /\w$/; + } + } + $text = join '', @words; # quote the text for use in the MATCH AGAINST expression $text = $self->quote($text); @@ -188,7 +195,7 @@ sub sql_fulltext_search { # untaint the text, since it's safe to use now that we've quoted it trick_taint($text); - return "MATCH($column) AGAINST($text $mode)"; + return ("MATCH($column) AGAINST($text IN BOOLEAN MODE)", "MATCH($column) AGAINST($text)"); } sub sql_istring { diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm index db576621d..8a707f899 100644 --- a/Bugzilla/Util.pm +++ b/Bugzilla/Util.pm @@ -676,6 +676,7 @@ sub disable_utf8 { sub stem_text { my ($text, $allow_verbatim) = @_; + Encode::_utf8_on($text) if Bugzilla->params->{utf8}; $text = [ split /(?<=\w)(?=\W)|(?<=\W)(?=\w)/, $text ]; my $q = 0; for (@$text) @@ -684,10 +685,17 @@ sub stem_text { $_ = Lingua::Stem::RuUTF8::stem_word($_) unless $q; } - elsif ($allow_verbatim) + else { - # If $allow_verbatim is TRUE then text in "double quotes" doesn't stem - $q = ($q + tr/\"/\"/) % 2; + if ($allow_verbatim) + { + # If $allow_verbatim is TRUE then text in "double quotes" doesn't stem + $q = ($q + tr/\"/\"/) % 2; + } + if (!/\s$/so) + { + $_ .= ' '; + } } } return join '', @$text;