Bug 82651 - Store contents for snippet retrieval in MySQL
git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1398 6955db30-a419-402b-8a0d-67ecbb4d7f56remotes/github/custis
parent
1b75ada880
commit
80ccb26b20
|
@ -40,7 +40,7 @@ CREATE TABLE branches (
|
|||
branch varchar(64) binary DEFAULT '' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE branch (branch)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS checkins;
|
||||
CREATE TABLE checkins (
|
||||
|
@ -65,7 +65,7 @@ CREATE TABLE checkins (
|
|||
KEY fileid (fileid),
|
||||
KEY branchid (branchid),
|
||||
KEY descid (descid)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS descs;
|
||||
CREATE TABLE descs (
|
||||
|
@ -83,7 +83,7 @@ CREATE TABLE dirs (
|
|||
dir varchar(255) binary DEFAULT '' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE dir (dir)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS files;
|
||||
CREATE TABLE files (
|
||||
|
@ -91,7 +91,7 @@ CREATE TABLE files (
|
|||
file varchar(255) binary DEFAULT '' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE file (file)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS people;
|
||||
CREATE TABLE people (
|
||||
|
@ -99,7 +99,7 @@ CREATE TABLE people (
|
|||
who varchar(128) binary DEFAULT '' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE who (who)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS repositories;
|
||||
CREATE TABLE repositories (
|
||||
|
@ -107,7 +107,7 @@ CREATE TABLE repositories (
|
|||
repository varchar(64) binary DEFAULT '' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE repository (repository)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS tags;
|
||||
CREATE TABLE tags (
|
||||
|
@ -121,6 +121,12 @@ CREATE TABLE tags (
|
|||
KEY dirid (dirid),
|
||||
KEY fileid (fileid),
|
||||
KEY branchid (branchid)
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS contents;
|
||||
CREATE TABLE contents (
|
||||
id int NOT NULL PRIMARY KEY,
|
||||
content MEDIUMTEXT NOT NULL DEFAULT ''
|
||||
) TYPE=MyISAM;
|
||||
"""
|
||||
|
||||
|
@ -135,7 +141,7 @@ CREATE TABLE branches (
|
|||
branch varchar(64) binary DEFAULT '' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE branch (branch)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS commits;
|
||||
CREATE TABLE commits (
|
||||
|
@ -160,7 +166,7 @@ CREATE TABLE commits (
|
|||
KEY fileid (fileid),
|
||||
KEY branchid (branchid),
|
||||
KEY descid (descid)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS descs;
|
||||
CREATE TABLE descs (
|
||||
|
@ -168,7 +174,8 @@ CREATE TABLE descs (
|
|||
description text,
|
||||
hash bigint(20) DEFAULT '0' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
KEY hash (hash)
|
||||
KEY hash (hash),
|
||||
FULLTEXT KEY description (description)
|
||||
) TYPE=MyISAM;
|
||||
|
||||
DROP TABLE IF EXISTS dirs;
|
||||
|
@ -177,7 +184,7 @@ CREATE TABLE dirs (
|
|||
dir varchar(255) binary DEFAULT '' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE dir (dir)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS files;
|
||||
CREATE TABLE files (
|
||||
|
@ -185,7 +192,7 @@ CREATE TABLE files (
|
|||
file varchar(255) binary DEFAULT '' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE file (file)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS people;
|
||||
CREATE TABLE people (
|
||||
|
@ -193,7 +200,7 @@ CREATE TABLE people (
|
|||
who varchar(128) binary DEFAULT '' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE who (who)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS repositories;
|
||||
CREATE TABLE repositories (
|
||||
|
@ -201,7 +208,7 @@ CREATE TABLE repositories (
|
|||
repository varchar(64) binary DEFAULT '' NOT NULL,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE repository (repository)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS tags;
|
||||
CREATE TABLE tags (
|
||||
|
@ -215,7 +222,7 @@ CREATE TABLE tags (
|
|||
KEY dirid (dirid),
|
||||
KEY fileid (fileid),
|
||||
KEY branchid (branchid)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
|
||||
DROP TABLE IF EXISTS metadata;
|
||||
CREATE TABLE metadata (
|
||||
|
@ -223,8 +230,14 @@ CREATE TABLE metadata (
|
|||
value text,
|
||||
PRIMARY KEY (name),
|
||||
UNIQUE name (name)
|
||||
) TYPE=MyISAM;
|
||||
) TYPE=InnoDB;
|
||||
INSERT INTO metadata (name, value) VALUES ('version', '1');
|
||||
|
||||
DROP TABLE IF EXISTS contents;
|
||||
CREATE TABLE contents (
|
||||
id int NOT NULL PRIMARY KEY,
|
||||
content MEDIUMTEXT NOT NULL DEFAULT ''
|
||||
) TYPE=MyISAM;
|
||||
"""
|
||||
|
||||
BONSAI_COMPAT="""
|
||||
|
|
|
@ -634,6 +634,9 @@ enabled = 0
|
|||
# Set to 1 to enable indexing of file contents using Sphinx and Tika
|
||||
index_content = 0
|
||||
|
||||
# Set to limit stored text file content size (4 MB default, 0 = unlimited)
|
||||
#content_max_size = 4194304
|
||||
|
||||
# Database hostname, port, and socket
|
||||
#host = localhost
|
||||
#port = 3306
|
||||
|
@ -661,7 +664,6 @@ index_content = 0
|
|||
# Requires Sphinx >= 0.9.9 with a real-time updatable SphinxQL index.
|
||||
# Index must be created in sphinx.conf by hand and have the following fields:
|
||||
# rt_field = content
|
||||
# rt_attr_string = content
|
||||
# rt_attr_string = mimetype
|
||||
# rt_attr_timestamp = ci_when
|
||||
# rt_attr_uint = whoid
|
||||
|
|
|
@ -325,6 +325,7 @@ class Config:
|
|||
|
||||
self.cvsdb.enabled = 0
|
||||
self.cvsdb.index_content = 0
|
||||
self.cvsdb.content_max_size = 0
|
||||
self.cvsdb.host = ''
|
||||
self.cvsdb.port = 3306
|
||||
self.cvsdb.socket = ''
|
||||
|
|
63
lib/cvsdb.py
63
lib/cvsdb.py
|
@ -40,7 +40,7 @@ error = "cvsdb error"
|
|||
class CheckinDatabase:
|
||||
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, cfg,
|
||||
authorizer = None, index_content = 0, sphinx_host = None, sphinx_port = None,
|
||||
sphinx_socket = None, sphinx_index = None):
|
||||
sphinx_socket = None, sphinx_index = None, content_max_size = 0):
|
||||
self.cfg = cfg
|
||||
|
||||
self._host = host
|
||||
|
@ -56,6 +56,7 @@ class CheckinDatabase:
|
|||
|
||||
# Sphinx settings
|
||||
self.index_content = index_content
|
||||
self.content_max_size = content_max_size
|
||||
self.sphinx_host = sphinx_host
|
||||
self.sphinx_port = sphinx_port
|
||||
self.sphinx_socket = sphinx_socket
|
||||
|
@ -373,9 +374,6 @@ class CheckinDatabase:
|
|||
if self.index_content:
|
||||
sphcur = self.sphinx.cursor()
|
||||
content = commit.GetContent()
|
||||
# Sphinx has 4 MB text field limit
|
||||
if len(content) >= 4*1024*1024:
|
||||
content = content[0:4*1024*1024]
|
||||
props['ci_when'] = str(int(commit.GetTime() or 0))
|
||||
if len(content):
|
||||
props['content'] = content
|
||||
|
@ -392,6 +390,12 @@ class CheckinDatabase:
|
|||
','.join('%s' for i in props)+')',
|
||||
tuple(props[i] for i in props)
|
||||
)
|
||||
# Sphinx (at least 2.0.1) still caches all string attributes inside RAM,
|
||||
# so we'll store them in MySQL (used only for snippet display)
|
||||
# Limit content size:
|
||||
if self.content_max_size and len(content) >= self.content_max_size:
|
||||
content = content[0:self.content_max_size]
|
||||
cursor.execute('INSERT INTO contents SET id=%s, content=%s', (commit_id, content))
|
||||
except Exception, e:
|
||||
print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+
|
||||
"\n".join(i+'='+str(props[i]) for i in props))
|
||||
|
@ -477,7 +481,7 @@ class CheckinDatabase:
|
|||
elif self._row_limit:
|
||||
limit = "LIMIT %s" % (str(self._row_limit))
|
||||
|
||||
fields = "id `id`, WEIGHT() `relevance`, `content`, `mimetype`"
|
||||
fields = "id `id`, WEIGHT() `relevance`, `mimetype`"
|
||||
|
||||
return "SELECT %s FROM %s %s %s %s" % (fields, self.sphinx_index, conditions, order_by, limit)
|
||||
|
||||
|
@ -611,11 +615,12 @@ class CheckinDatabase:
|
|||
if len(query.content_query) and self.sphinx:
|
||||
# Use Sphinx to search on document content
|
||||
sql = self.CreateSphinxQueryString(query)
|
||||
cursor = self.sphinx.cursor()
|
||||
cursor.execute(sql)
|
||||
sphinx_rows = list((str(docid), rel, content, mimetype) for docid, rel, content, mimetype in cursor)
|
||||
cursor = self.db.cursor()
|
||||
sphcur = self.sphinx.cursor()
|
||||
sphcur.execute(sql)
|
||||
sphinx_rows = list((str(docid), rel, mimetype) for docid, rel, mimetype in sphcur)
|
||||
if len(sphinx_rows):
|
||||
# Fetch snippets
|
||||
# FIXME remove hardcode
|
||||
snippet_options = {
|
||||
'around': 15,
|
||||
'limit': 200,
|
||||
|
@ -627,26 +632,32 @@ class CheckinDatabase:
|
|||
snippets = {}
|
||||
bm_html = cgi.escape(snippet_options['before_match'])
|
||||
am_html = cgi.escape(snippet_options['after_match'])
|
||||
for docid, rel, content, mimetype in sphinx_rows:
|
||||
cursor.execute(
|
||||
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
|
||||
(content, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
|
||||
)
|
||||
s, = cursor.fetchone()
|
||||
s = cgi.escape(s)
|
||||
if re.match(preformatted_mime, mimetype):
|
||||
s = s.replace('\n', '<br />')
|
||||
s = s.replace(bm_html, snippet_options['before_match'])
|
||||
s = s.replace(am_html, snippet_options['after_match'])
|
||||
snippets[docid] = s
|
||||
# Fetch all fields from MySQL
|
||||
sql = self.CreateIdQueryString((docid for (docid, _, _, _) in sphinx_rows))
|
||||
cursor = self.db.cursor()
|
||||
# Build snippets using Sphinx (content is stored in MySQL)
|
||||
for docid, rel, mimetype in sphinx_rows:
|
||||
cursor.execute('SELECT content FROM contents WHERE id=%s', (docid, ))
|
||||
s = cursor.fetchone()
|
||||
if s:
|
||||
s = s[0]
|
||||
sphcur.execute(
|
||||
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
|
||||
(s, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
|
||||
)
|
||||
s, = sphcur.fetchone()
|
||||
s = cgi.escape(s)
|
||||
if re.match(preformatted_mime, mimetype):
|
||||
s = s.replace('\n', '<br />')
|
||||
s = s.replace(bm_html, snippet_options['before_match'])
|
||||
s = s.replace(am_html, snippet_options['after_match'])
|
||||
snippets[docid] = s
|
||||
else:
|
||||
snippets[docid] = ''
|
||||
# Fetch commit attributes from MySQL
|
||||
sql = self.CreateIdQueryString((docid for (docid, _, _) in sphinx_rows))
|
||||
cursor.execute(sql)
|
||||
byid = {}
|
||||
for row in cursor:
|
||||
byid[str(row[0])] = row
|
||||
rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _, _) in sphinx_rows if docid in byid)
|
||||
rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _) in sphinx_rows if docid in byid)
|
||||
else:
|
||||
rows = []
|
||||
else:
|
||||
|
@ -751,6 +762,7 @@ class CheckinDatabase:
|
|||
self.sql_purge('branches', 'id', 'branchid', checkins_table)
|
||||
self.sql_purge('descs', 'id', 'descid', checkins_table)
|
||||
self.sql_purge('people', 'id', 'whoid', checkins_table)
|
||||
self.sql_purge('contents', 'id', 'id', checkins_table)
|
||||
|
||||
# Reset all internal id caches. We could be choosier here,
|
||||
# but let's just be as safe as possible.
|
||||
|
@ -1096,6 +1108,7 @@ def ConnectDatabase(cfg, authorizer=None, readonly=0):
|
|||
sphinx_port = int(cfg.cvsdb.sphinx_port),
|
||||
sphinx_socket = cfg.cvsdb.sphinx_socket,
|
||||
sphinx_index = cfg.cvsdb.sphinx_index,
|
||||
content_max_size = cfg.cvsdb.content_max_size,
|
||||
cfg = cfg,
|
||||
)
|
||||
db.Connect()
|
||||
|
|
Loading…
Reference in New Issue