Bug 82651 - Store contents for snippet retrieval in MySQL
git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1398 6955db30-a419-402b-8a0d-67ecbb4d7f56remotes/github/custis
parent
1b75ada880
commit
80ccb26b20
|
@ -40,7 +40,7 @@ CREATE TABLE branches (
|
||||||
branch varchar(64) binary DEFAULT '' NOT NULL,
|
branch varchar(64) binary DEFAULT '' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
UNIQUE branch (branch)
|
UNIQUE branch (branch)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS checkins;
|
DROP TABLE IF EXISTS checkins;
|
||||||
CREATE TABLE checkins (
|
CREATE TABLE checkins (
|
||||||
|
@ -65,7 +65,7 @@ CREATE TABLE checkins (
|
||||||
KEY fileid (fileid),
|
KEY fileid (fileid),
|
||||||
KEY branchid (branchid),
|
KEY branchid (branchid),
|
||||||
KEY descid (descid)
|
KEY descid (descid)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS descs;
|
DROP TABLE IF EXISTS descs;
|
||||||
CREATE TABLE descs (
|
CREATE TABLE descs (
|
||||||
|
@ -83,7 +83,7 @@ CREATE TABLE dirs (
|
||||||
dir varchar(255) binary DEFAULT '' NOT NULL,
|
dir varchar(255) binary DEFAULT '' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
UNIQUE dir (dir)
|
UNIQUE dir (dir)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS files;
|
DROP TABLE IF EXISTS files;
|
||||||
CREATE TABLE files (
|
CREATE TABLE files (
|
||||||
|
@ -91,7 +91,7 @@ CREATE TABLE files (
|
||||||
file varchar(255) binary DEFAULT '' NOT NULL,
|
file varchar(255) binary DEFAULT '' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
UNIQUE file (file)
|
UNIQUE file (file)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS people;
|
DROP TABLE IF EXISTS people;
|
||||||
CREATE TABLE people (
|
CREATE TABLE people (
|
||||||
|
@ -99,7 +99,7 @@ CREATE TABLE people (
|
||||||
who varchar(128) binary DEFAULT '' NOT NULL,
|
who varchar(128) binary DEFAULT '' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
UNIQUE who (who)
|
UNIQUE who (who)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS repositories;
|
DROP TABLE IF EXISTS repositories;
|
||||||
CREATE TABLE repositories (
|
CREATE TABLE repositories (
|
||||||
|
@ -107,7 +107,7 @@ CREATE TABLE repositories (
|
||||||
repository varchar(64) binary DEFAULT '' NOT NULL,
|
repository varchar(64) binary DEFAULT '' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
UNIQUE repository (repository)
|
UNIQUE repository (repository)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS tags;
|
DROP TABLE IF EXISTS tags;
|
||||||
CREATE TABLE tags (
|
CREATE TABLE tags (
|
||||||
|
@ -121,6 +121,12 @@ CREATE TABLE tags (
|
||||||
KEY dirid (dirid),
|
KEY dirid (dirid),
|
||||||
KEY fileid (fileid),
|
KEY fileid (fileid),
|
||||||
KEY branchid (branchid)
|
KEY branchid (branchid)
|
||||||
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS contents;
|
||||||
|
CREATE TABLE contents (
|
||||||
|
id int NOT NULL PRIMARY KEY,
|
||||||
|
content MEDIUMTEXT NOT NULL DEFAULT ''
|
||||||
) TYPE=MyISAM;
|
) TYPE=MyISAM;
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -135,7 +141,7 @@ CREATE TABLE branches (
|
||||||
branch varchar(64) binary DEFAULT '' NOT NULL,
|
branch varchar(64) binary DEFAULT '' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
UNIQUE branch (branch)
|
UNIQUE branch (branch)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS commits;
|
DROP TABLE IF EXISTS commits;
|
||||||
CREATE TABLE commits (
|
CREATE TABLE commits (
|
||||||
|
@ -160,7 +166,7 @@ CREATE TABLE commits (
|
||||||
KEY fileid (fileid),
|
KEY fileid (fileid),
|
||||||
KEY branchid (branchid),
|
KEY branchid (branchid),
|
||||||
KEY descid (descid)
|
KEY descid (descid)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS descs;
|
DROP TABLE IF EXISTS descs;
|
||||||
CREATE TABLE descs (
|
CREATE TABLE descs (
|
||||||
|
@ -168,7 +174,8 @@ CREATE TABLE descs (
|
||||||
description text,
|
description text,
|
||||||
hash bigint(20) DEFAULT '0' NOT NULL,
|
hash bigint(20) DEFAULT '0' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
KEY hash (hash)
|
KEY hash (hash),
|
||||||
|
FULLTEXT KEY description (description)
|
||||||
) TYPE=MyISAM;
|
) TYPE=MyISAM;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS dirs;
|
DROP TABLE IF EXISTS dirs;
|
||||||
|
@ -177,7 +184,7 @@ CREATE TABLE dirs (
|
||||||
dir varchar(255) binary DEFAULT '' NOT NULL,
|
dir varchar(255) binary DEFAULT '' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
UNIQUE dir (dir)
|
UNIQUE dir (dir)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS files;
|
DROP TABLE IF EXISTS files;
|
||||||
CREATE TABLE files (
|
CREATE TABLE files (
|
||||||
|
@ -185,7 +192,7 @@ CREATE TABLE files (
|
||||||
file varchar(255) binary DEFAULT '' NOT NULL,
|
file varchar(255) binary DEFAULT '' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
UNIQUE file (file)
|
UNIQUE file (file)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS people;
|
DROP TABLE IF EXISTS people;
|
||||||
CREATE TABLE people (
|
CREATE TABLE people (
|
||||||
|
@ -193,7 +200,7 @@ CREATE TABLE people (
|
||||||
who varchar(128) binary DEFAULT '' NOT NULL,
|
who varchar(128) binary DEFAULT '' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
UNIQUE who (who)
|
UNIQUE who (who)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS repositories;
|
DROP TABLE IF EXISTS repositories;
|
||||||
CREATE TABLE repositories (
|
CREATE TABLE repositories (
|
||||||
|
@ -201,7 +208,7 @@ CREATE TABLE repositories (
|
||||||
repository varchar(64) binary DEFAULT '' NOT NULL,
|
repository varchar(64) binary DEFAULT '' NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
UNIQUE repository (repository)
|
UNIQUE repository (repository)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS tags;
|
DROP TABLE IF EXISTS tags;
|
||||||
CREATE TABLE tags (
|
CREATE TABLE tags (
|
||||||
|
@ -215,7 +222,7 @@ CREATE TABLE tags (
|
||||||
KEY dirid (dirid),
|
KEY dirid (dirid),
|
||||||
KEY fileid (fileid),
|
KEY fileid (fileid),
|
||||||
KEY branchid (branchid)
|
KEY branchid (branchid)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS metadata;
|
DROP TABLE IF EXISTS metadata;
|
||||||
CREATE TABLE metadata (
|
CREATE TABLE metadata (
|
||||||
|
@ -223,8 +230,14 @@ CREATE TABLE metadata (
|
||||||
value text,
|
value text,
|
||||||
PRIMARY KEY (name),
|
PRIMARY KEY (name),
|
||||||
UNIQUE name (name)
|
UNIQUE name (name)
|
||||||
) TYPE=MyISAM;
|
) TYPE=InnoDB;
|
||||||
INSERT INTO metadata (name, value) VALUES ('version', '1');
|
INSERT INTO metadata (name, value) VALUES ('version', '1');
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS contents;
|
||||||
|
CREATE TABLE contents (
|
||||||
|
id int NOT NULL PRIMARY KEY,
|
||||||
|
content MEDIUMTEXT NOT NULL DEFAULT ''
|
||||||
|
) TYPE=MyISAM;
|
||||||
"""
|
"""
|
||||||
|
|
||||||
BONSAI_COMPAT="""
|
BONSAI_COMPAT="""
|
||||||
|
|
|
@ -634,6 +634,9 @@ enabled = 0
|
||||||
# Set to 1 to enable indexing of file contents using Sphinx and Tika
|
# Set to 1 to enable indexing of file contents using Sphinx and Tika
|
||||||
index_content = 0
|
index_content = 0
|
||||||
|
|
||||||
|
# Set to limit stored text file content size (4 MB default, 0 = unlimited)
|
||||||
|
#content_max_size = 4194304
|
||||||
|
|
||||||
# Database hostname, port, and socket
|
# Database hostname, port, and socket
|
||||||
#host = localhost
|
#host = localhost
|
||||||
#port = 3306
|
#port = 3306
|
||||||
|
@ -661,7 +664,6 @@ index_content = 0
|
||||||
# Requires Sphinx >= 0.9.9 with a real-time updatable SphinxQL index.
|
# Requires Sphinx >= 0.9.9 with a real-time updatable SphinxQL index.
|
||||||
# Index must be created in sphinx.conf by hand and have the following fields:
|
# Index must be created in sphinx.conf by hand and have the following fields:
|
||||||
# rt_field = content
|
# rt_field = content
|
||||||
# rt_attr_string = content
|
|
||||||
# rt_attr_string = mimetype
|
# rt_attr_string = mimetype
|
||||||
# rt_attr_timestamp = ci_when
|
# rt_attr_timestamp = ci_when
|
||||||
# rt_attr_uint = whoid
|
# rt_attr_uint = whoid
|
||||||
|
|
|
@ -325,6 +325,7 @@ class Config:
|
||||||
|
|
||||||
self.cvsdb.enabled = 0
|
self.cvsdb.enabled = 0
|
||||||
self.cvsdb.index_content = 0
|
self.cvsdb.index_content = 0
|
||||||
|
self.cvsdb.content_max_size = 0
|
||||||
self.cvsdb.host = ''
|
self.cvsdb.host = ''
|
||||||
self.cvsdb.port = 3306
|
self.cvsdb.port = 3306
|
||||||
self.cvsdb.socket = ''
|
self.cvsdb.socket = ''
|
||||||
|
|
63
lib/cvsdb.py
63
lib/cvsdb.py
|
@ -40,7 +40,7 @@ error = "cvsdb error"
|
||||||
class CheckinDatabase:
|
class CheckinDatabase:
|
||||||
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, cfg,
|
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, cfg,
|
||||||
authorizer = None, index_content = 0, sphinx_host = None, sphinx_port = None,
|
authorizer = None, index_content = 0, sphinx_host = None, sphinx_port = None,
|
||||||
sphinx_socket = None, sphinx_index = None):
|
sphinx_socket = None, sphinx_index = None, content_max_size = 0):
|
||||||
self.cfg = cfg
|
self.cfg = cfg
|
||||||
|
|
||||||
self._host = host
|
self._host = host
|
||||||
|
@ -56,6 +56,7 @@ class CheckinDatabase:
|
||||||
|
|
||||||
# Sphinx settings
|
# Sphinx settings
|
||||||
self.index_content = index_content
|
self.index_content = index_content
|
||||||
|
self.content_max_size = content_max_size
|
||||||
self.sphinx_host = sphinx_host
|
self.sphinx_host = sphinx_host
|
||||||
self.sphinx_port = sphinx_port
|
self.sphinx_port = sphinx_port
|
||||||
self.sphinx_socket = sphinx_socket
|
self.sphinx_socket = sphinx_socket
|
||||||
|
@ -373,9 +374,6 @@ class CheckinDatabase:
|
||||||
if self.index_content:
|
if self.index_content:
|
||||||
sphcur = self.sphinx.cursor()
|
sphcur = self.sphinx.cursor()
|
||||||
content = commit.GetContent()
|
content = commit.GetContent()
|
||||||
# Sphinx has 4 MB text field limit
|
|
||||||
if len(content) >= 4*1024*1024:
|
|
||||||
content = content[0:4*1024*1024]
|
|
||||||
props['ci_when'] = str(int(commit.GetTime() or 0))
|
props['ci_when'] = str(int(commit.GetTime() or 0))
|
||||||
if len(content):
|
if len(content):
|
||||||
props['content'] = content
|
props['content'] = content
|
||||||
|
@ -392,6 +390,12 @@ class CheckinDatabase:
|
||||||
','.join('%s' for i in props)+')',
|
','.join('%s' for i in props)+')',
|
||||||
tuple(props[i] for i in props)
|
tuple(props[i] for i in props)
|
||||||
)
|
)
|
||||||
|
# Sphinx (at least 2.0.1) still caches all string attributes inside RAM,
|
||||||
|
# so we'll store them in MySQL (used only for snippet display)
|
||||||
|
# Limit content size:
|
||||||
|
if self.content_max_size and len(content) >= self.content_max_size:
|
||||||
|
content = content[0:self.content_max_size]
|
||||||
|
cursor.execute('INSERT INTO contents SET id=%s, content=%s', (commit_id, content))
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+
|
print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+
|
||||||
"\n".join(i+'='+str(props[i]) for i in props))
|
"\n".join(i+'='+str(props[i]) for i in props))
|
||||||
|
@ -477,7 +481,7 @@ class CheckinDatabase:
|
||||||
elif self._row_limit:
|
elif self._row_limit:
|
||||||
limit = "LIMIT %s" % (str(self._row_limit))
|
limit = "LIMIT %s" % (str(self._row_limit))
|
||||||
|
|
||||||
fields = "id `id`, WEIGHT() `relevance`, `content`, `mimetype`"
|
fields = "id `id`, WEIGHT() `relevance`, `mimetype`"
|
||||||
|
|
||||||
return "SELECT %s FROM %s %s %s %s" % (fields, self.sphinx_index, conditions, order_by, limit)
|
return "SELECT %s FROM %s %s %s %s" % (fields, self.sphinx_index, conditions, order_by, limit)
|
||||||
|
|
||||||
|
@ -611,11 +615,12 @@ class CheckinDatabase:
|
||||||
if len(query.content_query) and self.sphinx:
|
if len(query.content_query) and self.sphinx:
|
||||||
# Use Sphinx to search on document content
|
# Use Sphinx to search on document content
|
||||||
sql = self.CreateSphinxQueryString(query)
|
sql = self.CreateSphinxQueryString(query)
|
||||||
cursor = self.sphinx.cursor()
|
cursor = self.db.cursor()
|
||||||
cursor.execute(sql)
|
sphcur = self.sphinx.cursor()
|
||||||
sphinx_rows = list((str(docid), rel, content, mimetype) for docid, rel, content, mimetype in cursor)
|
sphcur.execute(sql)
|
||||||
|
sphinx_rows = list((str(docid), rel, mimetype) for docid, rel, mimetype in sphcur)
|
||||||
if len(sphinx_rows):
|
if len(sphinx_rows):
|
||||||
# Fetch snippets
|
# FIXME remove hardcode
|
||||||
snippet_options = {
|
snippet_options = {
|
||||||
'around': 15,
|
'around': 15,
|
||||||
'limit': 200,
|
'limit': 200,
|
||||||
|
@ -627,26 +632,32 @@ class CheckinDatabase:
|
||||||
snippets = {}
|
snippets = {}
|
||||||
bm_html = cgi.escape(snippet_options['before_match'])
|
bm_html = cgi.escape(snippet_options['before_match'])
|
||||||
am_html = cgi.escape(snippet_options['after_match'])
|
am_html = cgi.escape(snippet_options['after_match'])
|
||||||
for docid, rel, content, mimetype in sphinx_rows:
|
# Build snippets using Sphinx (content is stored in MySQL)
|
||||||
cursor.execute(
|
for docid, rel, mimetype in sphinx_rows:
|
||||||
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
|
cursor.execute('SELECT content FROM contents WHERE id=%s', (docid, ))
|
||||||
(content, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
|
s = cursor.fetchone()
|
||||||
)
|
if s:
|
||||||
s, = cursor.fetchone()
|
s = s[0]
|
||||||
s = cgi.escape(s)
|
sphcur.execute(
|
||||||
if re.match(preformatted_mime, mimetype):
|
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
|
||||||
s = s.replace('\n', '<br />')
|
(s, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
|
||||||
s = s.replace(bm_html, snippet_options['before_match'])
|
)
|
||||||
s = s.replace(am_html, snippet_options['after_match'])
|
s, = sphcur.fetchone()
|
||||||
snippets[docid] = s
|
s = cgi.escape(s)
|
||||||
# Fetch all fields from MySQL
|
if re.match(preformatted_mime, mimetype):
|
||||||
sql = self.CreateIdQueryString((docid for (docid, _, _, _) in sphinx_rows))
|
s = s.replace('\n', '<br />')
|
||||||
cursor = self.db.cursor()
|
s = s.replace(bm_html, snippet_options['before_match'])
|
||||||
|
s = s.replace(am_html, snippet_options['after_match'])
|
||||||
|
snippets[docid] = s
|
||||||
|
else:
|
||||||
|
snippets[docid] = ''
|
||||||
|
# Fetch commit attributes from MySQL
|
||||||
|
sql = self.CreateIdQueryString((docid for (docid, _, _) in sphinx_rows))
|
||||||
cursor.execute(sql)
|
cursor.execute(sql)
|
||||||
byid = {}
|
byid = {}
|
||||||
for row in cursor:
|
for row in cursor:
|
||||||
byid[str(row[0])] = row
|
byid[str(row[0])] = row
|
||||||
rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _, _) in sphinx_rows if docid in byid)
|
rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _) in sphinx_rows if docid in byid)
|
||||||
else:
|
else:
|
||||||
rows = []
|
rows = []
|
||||||
else:
|
else:
|
||||||
|
@ -751,6 +762,7 @@ class CheckinDatabase:
|
||||||
self.sql_purge('branches', 'id', 'branchid', checkins_table)
|
self.sql_purge('branches', 'id', 'branchid', checkins_table)
|
||||||
self.sql_purge('descs', 'id', 'descid', checkins_table)
|
self.sql_purge('descs', 'id', 'descid', checkins_table)
|
||||||
self.sql_purge('people', 'id', 'whoid', checkins_table)
|
self.sql_purge('people', 'id', 'whoid', checkins_table)
|
||||||
|
self.sql_purge('contents', 'id', 'id', checkins_table)
|
||||||
|
|
||||||
# Reset all internal id caches. We could be choosier here,
|
# Reset all internal id caches. We could be choosier here,
|
||||||
# but let's just be as safe as possible.
|
# but let's just be as safe as possible.
|
||||||
|
@ -1096,6 +1108,7 @@ def ConnectDatabase(cfg, authorizer=None, readonly=0):
|
||||||
sphinx_port = int(cfg.cvsdb.sphinx_port),
|
sphinx_port = int(cfg.cvsdb.sphinx_port),
|
||||||
sphinx_socket = cfg.cvsdb.sphinx_socket,
|
sphinx_socket = cfg.cvsdb.sphinx_socket,
|
||||||
sphinx_index = cfg.cvsdb.sphinx_index,
|
sphinx_index = cfg.cvsdb.sphinx_index,
|
||||||
|
content_max_size = cfg.cvsdb.content_max_size,
|
||||||
cfg = cfg,
|
cfg = cfg,
|
||||||
)
|
)
|
||||||
db.Connect()
|
db.Connect()
|
||||||
|
|
Loading…
Reference in New Issue