Bug 82651 - Store contents for snippet retrieval in MySQL

git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1398 6955db30-a419-402b-8a0d-67ecbb4d7f56
remotes/github/custis
vfilippov 2011-09-29 18:08:16 +00:00 committed by Vitaliy Filippov
parent 1b75ada880
commit 80ccb26b20
4 changed files with 70 additions and 41 deletions

View File

@ -40,7 +40,7 @@ CREATE TABLE branches (
branch varchar(64) binary DEFAULT '' NOT NULL,
PRIMARY KEY (id),
UNIQUE branch (branch)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS checkins;
CREATE TABLE checkins (
@ -65,7 +65,7 @@ CREATE TABLE checkins (
KEY fileid (fileid),
KEY branchid (branchid),
KEY descid (descid)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS descs;
CREATE TABLE descs (
@ -83,7 +83,7 @@ CREATE TABLE dirs (
dir varchar(255) binary DEFAULT '' NOT NULL,
PRIMARY KEY (id),
UNIQUE dir (dir)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS files;
CREATE TABLE files (
@ -91,7 +91,7 @@ CREATE TABLE files (
file varchar(255) binary DEFAULT '' NOT NULL,
PRIMARY KEY (id),
UNIQUE file (file)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS people;
CREATE TABLE people (
@ -99,7 +99,7 @@ CREATE TABLE people (
who varchar(128) binary DEFAULT '' NOT NULL,
PRIMARY KEY (id),
UNIQUE who (who)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS repositories;
CREATE TABLE repositories (
@ -107,7 +107,7 @@ CREATE TABLE repositories (
repository varchar(64) binary DEFAULT '' NOT NULL,
PRIMARY KEY (id),
UNIQUE repository (repository)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS tags;
CREATE TABLE tags (
@ -121,6 +121,12 @@ CREATE TABLE tags (
KEY dirid (dirid),
KEY fileid (fileid),
KEY branchid (branchid)
) TYPE=InnoDB;
DROP TABLE IF EXISTS contents;
CREATE TABLE contents (
id int NOT NULL PRIMARY KEY,
content MEDIUMTEXT NOT NULL DEFAULT ''
) TYPE=MyISAM;
"""
@ -135,7 +141,7 @@ CREATE TABLE branches (
branch varchar(64) binary DEFAULT '' NOT NULL,
PRIMARY KEY (id),
UNIQUE branch (branch)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS commits;
CREATE TABLE commits (
@ -160,7 +166,7 @@ CREATE TABLE commits (
KEY fileid (fileid),
KEY branchid (branchid),
KEY descid (descid)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS descs;
CREATE TABLE descs (
@ -168,7 +174,8 @@ CREATE TABLE descs (
description text,
hash bigint(20) DEFAULT '0' NOT NULL,
PRIMARY KEY (id),
KEY hash (hash)
KEY hash (hash),
FULLTEXT KEY description (description)
) TYPE=MyISAM;
DROP TABLE IF EXISTS dirs;
@ -177,7 +184,7 @@ CREATE TABLE dirs (
dir varchar(255) binary DEFAULT '' NOT NULL,
PRIMARY KEY (id),
UNIQUE dir (dir)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS files;
CREATE TABLE files (
@ -185,7 +192,7 @@ CREATE TABLE files (
file varchar(255) binary DEFAULT '' NOT NULL,
PRIMARY KEY (id),
UNIQUE file (file)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS people;
CREATE TABLE people (
@ -193,7 +200,7 @@ CREATE TABLE people (
who varchar(128) binary DEFAULT '' NOT NULL,
PRIMARY KEY (id),
UNIQUE who (who)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS repositories;
CREATE TABLE repositories (
@ -201,7 +208,7 @@ CREATE TABLE repositories (
repository varchar(64) binary DEFAULT '' NOT NULL,
PRIMARY KEY (id),
UNIQUE repository (repository)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS tags;
CREATE TABLE tags (
@ -215,7 +222,7 @@ CREATE TABLE tags (
KEY dirid (dirid),
KEY fileid (fileid),
KEY branchid (branchid)
) TYPE=MyISAM;
) TYPE=InnoDB;
DROP TABLE IF EXISTS metadata;
CREATE TABLE metadata (
@ -223,8 +230,14 @@ CREATE TABLE metadata (
value text,
PRIMARY KEY (name),
UNIQUE name (name)
) TYPE=MyISAM;
) TYPE=InnoDB;
INSERT INTO metadata (name, value) VALUES ('version', '1');
DROP TABLE IF EXISTS contents;
CREATE TABLE contents (
id int NOT NULL PRIMARY KEY,
content MEDIUMTEXT NOT NULL DEFAULT ''
) TYPE=MyISAM;
"""
BONSAI_COMPAT="""

View File

@ -634,6 +634,9 @@ enabled = 0
# Set to 1 to enable indexing of file contents using Sphinx and Tika
index_content = 0
# Set to limit stored text file content size (4 MB default, 0 = unlimited)
#content_max_size = 4194304
# Database hostname, port, and socket
#host = localhost
#port = 3306
@ -661,7 +664,6 @@ index_content = 0
# Requires Sphinx >= 0.9.9 with a real-time updatable SphinxQL index.
# Index must be created in sphinx.conf by hand and have the following fields:
# rt_field = content
# rt_attr_string = content
# rt_attr_string = mimetype
# rt_attr_timestamp = ci_when
# rt_attr_uint = whoid

View File

@ -325,6 +325,7 @@ class Config:
self.cvsdb.enabled = 0
self.cvsdb.index_content = 0
self.cvsdb.content_max_size = 0
self.cvsdb.host = ''
self.cvsdb.port = 3306
self.cvsdb.socket = ''

View File

@ -40,7 +40,7 @@ error = "cvsdb error"
class CheckinDatabase:
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, cfg,
authorizer = None, index_content = 0, sphinx_host = None, sphinx_port = None,
sphinx_socket = None, sphinx_index = None):
sphinx_socket = None, sphinx_index = None, content_max_size = 0):
self.cfg = cfg
self._host = host
@ -56,6 +56,7 @@ class CheckinDatabase:
# Sphinx settings
self.index_content = index_content
self.content_max_size = content_max_size
self.sphinx_host = sphinx_host
self.sphinx_port = sphinx_port
self.sphinx_socket = sphinx_socket
@ -373,9 +374,6 @@ class CheckinDatabase:
if self.index_content:
sphcur = self.sphinx.cursor()
content = commit.GetContent()
# Sphinx has 4 MB text field limit
if len(content) >= 4*1024*1024:
content = content[0:4*1024*1024]
props['ci_when'] = str(int(commit.GetTime() or 0))
if len(content):
props['content'] = content
@ -392,6 +390,12 @@ class CheckinDatabase:
','.join('%s' for i in props)+')',
tuple(props[i] for i in props)
)
# Sphinx (at least 2.0.1) still caches all string attributes inside RAM,
# so we'll store them in MySQL (used only for snippet display)
# Limit content size:
if self.content_max_size and len(content) >= self.content_max_size:
content = content[0:self.content_max_size]
cursor.execute('INSERT INTO contents SET id=%s, content=%s', (commit_id, content))
except Exception, e:
print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+
"\n".join(i+'='+str(props[i]) for i in props))
@ -477,7 +481,7 @@ class CheckinDatabase:
elif self._row_limit:
limit = "LIMIT %s" % (str(self._row_limit))
fields = "id `id`, WEIGHT() `relevance`, `content`, `mimetype`"
fields = "id `id`, WEIGHT() `relevance`, `mimetype`"
return "SELECT %s FROM %s %s %s %s" % (fields, self.sphinx_index, conditions, order_by, limit)
@ -611,11 +615,12 @@ class CheckinDatabase:
if len(query.content_query) and self.sphinx:
# Use Sphinx to search on document content
sql = self.CreateSphinxQueryString(query)
cursor = self.sphinx.cursor()
cursor.execute(sql)
sphinx_rows = list((str(docid), rel, content, mimetype) for docid, rel, content, mimetype in cursor)
cursor = self.db.cursor()
sphcur = self.sphinx.cursor()
sphcur.execute(sql)
sphinx_rows = list((str(docid), rel, mimetype) for docid, rel, mimetype in sphcur)
if len(sphinx_rows):
# Fetch snippets
# FIXME remove hardcode
snippet_options = {
'around': 15,
'limit': 200,
@ -627,26 +632,32 @@ class CheckinDatabase:
snippets = {}
bm_html = cgi.escape(snippet_options['before_match'])
am_html = cgi.escape(snippet_options['after_match'])
for docid, rel, content, mimetype in sphinx_rows:
cursor.execute(
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
(content, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
)
s, = cursor.fetchone()
s = cgi.escape(s)
if re.match(preformatted_mime, mimetype):
s = s.replace('\n', '<br />')
s = s.replace(bm_html, snippet_options['before_match'])
s = s.replace(am_html, snippet_options['after_match'])
snippets[docid] = s
# Fetch all fields from MySQL
sql = self.CreateIdQueryString((docid for (docid, _, _, _) in sphinx_rows))
cursor = self.db.cursor()
# Build snippets using Sphinx (content is stored in MySQL)
for docid, rel, mimetype in sphinx_rows:
cursor.execute('SELECT content FROM contents WHERE id=%s', (docid, ))
s = cursor.fetchone()
if s:
s = s[0]
sphcur.execute(
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
(s, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
)
s, = sphcur.fetchone()
s = cgi.escape(s)
if re.match(preformatted_mime, mimetype):
s = s.replace('\n', '<br />')
s = s.replace(bm_html, snippet_options['before_match'])
s = s.replace(am_html, snippet_options['after_match'])
snippets[docid] = s
else:
snippets[docid] = ''
# Fetch commit attributes from MySQL
sql = self.CreateIdQueryString((docid for (docid, _, _) in sphinx_rows))
cursor.execute(sql)
byid = {}
for row in cursor:
byid[str(row[0])] = row
rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _, _) in sphinx_rows if docid in byid)
rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _) in sphinx_rows if docid in byid)
else:
rows = []
else:
@ -751,6 +762,7 @@ class CheckinDatabase:
self.sql_purge('branches', 'id', 'branchid', checkins_table)
self.sql_purge('descs', 'id', 'descid', checkins_table)
self.sql_purge('people', 'id', 'whoid', checkins_table)
self.sql_purge('contents', 'id', 'id', checkins_table)
# Reset all internal id caches. We could be choosier here,
# but let's just be as safe as possible.
@ -1096,6 +1108,7 @@ def ConnectDatabase(cfg, authorizer=None, readonly=0):
sphinx_port = int(cfg.cvsdb.sphinx_port),
sphinx_socket = cfg.cvsdb.sphinx_socket,
sphinx_index = cfg.cvsdb.sphinx_index,
content_max_size = cfg.cvsdb.content_max_size,
cfg = cfg,
)
db.Connect()