diff --git a/conf/viewvc.conf.dist b/conf/viewvc.conf.dist
index 95ce922e..114b2e73 100644
--- a/conf/viewvc.conf.dist
+++ b/conf/viewvc.conf.dist
@@ -634,9 +634,13 @@ enabled = 0
# Set to 1 to enable indexing of file contents using Sphinx and Tika
index_content = 0
-# Set to limit stored text file content size (4 MB default, 0 = unlimited, -1 = don't store content, index only)
+# Set to limit stored text file content size (max. 4MB - Sphinx limit)
#content_max_size = 4194304
+# Do store indexed content for snippet display?
+# Do display snippets when searching on content?
+enable_snippets = 1
+
# Database hostname, port, and socket
#host = localhost
#port = 3306
diff --git a/lib/config.py b/lib/config.py
index ab2aff1f..b663ad23 100644
--- a/lib/config.py
+++ b/lib/config.py
@@ -325,6 +325,7 @@ class Config:
self.cvsdb.enabled = 0
self.cvsdb.index_content = 0
+ self.cvsdb.enable_snippets = 1
self.cvsdb.content_max_size = 0
self.cvsdb.host = ''
self.cvsdb.port = 3306
diff --git a/lib/cvsdb.py b/lib/cvsdb.py
index d19c4efe..8f944079 100644
--- a/lib/cvsdb.py
+++ b/lib/cvsdb.py
@@ -40,7 +40,7 @@ error = "cvsdb error"
class CheckinDatabase:
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, cfg,
authorizer = None, index_content = 0, sphinx_host = None, sphinx_port = None,
- sphinx_socket = None, sphinx_index = None, content_max_size = 0):
+ sphinx_socket = None, sphinx_index = None, content_max_size = 0, enable_snippets = 1):
self.cfg = cfg
self._host = host
@@ -56,6 +56,9 @@ class CheckinDatabase:
# Sphinx settings
self.index_content = index_content
+ if content_max_size > 4*1024*1024 or content_max_size <= 0:
+ content_max_size = 4*1024*1024
+ self.enable_snippets = enable_snippets
self.content_max_size = content_max_size
self.sphinx_host = sphinx_host
self.sphinx_port = sphinx_port
@@ -376,7 +379,9 @@ class CheckinDatabase:
content = commit.GetContent()
props['ci_when'] = str(int(commit.GetTime() or 0))
if len(content):
- props['content'] = content
+ # Maximum field size limit for Sphinx is 4MB
+ if len(content) > self.content_max_size:
+ props['content'] = content[0:self.content_max_size]
# Now, stored MIME type is only needed while searching
# It is guessed again when the file is displayed
props['mimetype'] = commit.GetMimeType()
@@ -392,10 +397,7 @@ class CheckinDatabase:
)
# Sphinx (at least 2.0.1) still caches all string attributes inside RAM,
# so we'll store them in MySQL (used only for snippet display)
- if self.content_max_size >= 0:
- # Limit content size:
- if self.content_max_size != 0 and len(content) >= self.content_max_size:
- content = content[0:self.content_max_size]
+ if self.enable_snippets:
cursor.execute('INSERT INTO contents SET id=%s, content=%s', (commit_id, content))
except Exception, e:
print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+
@@ -612,6 +614,39 @@ class CheckinDatabase:
return self.authorizer.check_path_access(rootname, path_parts, vclib.FILE, rev)
return True
+ def fetch_snippets(self, query, sphinx_rows, cursor, sphinx_cursor):
+ snippets = {}
+ if self.enable_snippets:
+ # FIXME remove hardcode
+ snippet_options = {
+ 'around': 15,
+ 'limit': 200,
+ 'before_match': '',
+ 'after_match': '',
+ 'chunk_separator': ' ...\n',
+ }
+ preformatted_mime = 'text/(?!html|xml).*'
+ bm_html = cgi.escape(snippet_options['before_match'])
+ am_html = cgi.escape(snippet_options['after_match'])
+ # Build snippets using Sphinx (content is stored in MySQL)
+ for docid, rel, mimetype in sphinx_rows:
+ cursor.execute('SELECT content FROM contents WHERE id=%s', (docid, ))
+ s = cursor.fetchone()
+ if s:
+ s = s[0]
+ sphinx_cursor.execute(
+ 'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
+ (s, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
+ )
+ s, = sphinx_cursor.fetchone()
+ s = cgi.escape(s)
+ if re.match(preformatted_mime, mimetype):
+ s = s.replace('\n', '
')
+ s = s.replace(bm_html, snippet_options['before_match'])
+ s = s.replace(am_html, snippet_options['after_match'])
+ snippets[docid] = s
+ return snippets
+
def RunQuery(self, query):
if len(query.content_query) and self.sphinx:
# Use Sphinx to search on document content
@@ -621,44 +656,21 @@ class CheckinDatabase:
sphcur.execute(sql)
sphinx_rows = list((str(docid), rel, mimetype) for docid, rel, mimetype in sphcur)
if len(sphinx_rows):
- # FIXME remove hardcode
- snippet_options = {
- 'around': 15,
- 'limit': 200,
- 'before_match': '',
- 'after_match': '',
- 'chunk_separator': ' ...\n',
- }
- preformatted_mime = 'text/(?!html|xml).*'
- snippets = {}
- bm_html = cgi.escape(snippet_options['before_match'])
- am_html = cgi.escape(snippet_options['after_match'])
- # Build snippets using Sphinx (content is stored in MySQL)
- for docid, rel, mimetype in sphinx_rows:
- cursor.execute('SELECT content FROM contents WHERE id=%s', (docid, ))
- s = cursor.fetchone()
- if s:
- s = s[0]
- sphcur.execute(
- 'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
- (s, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
- )
- s, = sphcur.fetchone()
- s = cgi.escape(s)
- if re.match(preformatted_mime, mimetype):
- s = s.replace('\n', '
')
- s = s.replace(bm_html, snippet_options['before_match'])
- s = s.replace(am_html, snippet_options['after_match'])
- snippets[docid] = s
- else:
- snippets[docid] = ''
+ snippets = self.fetch_snippets(query, sphinx_rows, cursor, sphcur)
# Fetch commit attributes from MySQL
sql = self.CreateIdQueryString((docid for (docid, _, _) in sphinx_rows))
cursor.execute(sql)
byid = {}
for row in cursor:
byid[str(row[0])] = row
- rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _) in sphinx_rows if docid in byid)
+ nrows = []
+ for docid, rel, _ in sphinx_rows:
+ if docid in byid:
+ if docid in snippets:
+ nrows.append(byid[docid] + (rel, snippets[docid]))
+ else:
+ nrows.append(byid[docid] + (rel, ''))
+ rows = nrows
else:
rows = []
else:
@@ -1105,6 +1117,7 @@ def ConnectDatabase(cfg, authorizer=None, readonly=0):
min_relevance = cfg.cvsdb.fulltext_min_relevance,
authorizer = authorizer,
index_content = cfg.cvsdb.index_content,
+ enable_snippets = cfg.cvsdb.enable_snippets,
sphinx_host = cfg.cvsdb.sphinx_host,
sphinx_port = int(cfg.cvsdb.sphinx_port),
sphinx_socket = cfg.cvsdb.sphinx_socket,