diff --git a/conf/viewvc.conf.dist b/conf/viewvc.conf.dist index 95ce922e..114b2e73 100644 --- a/conf/viewvc.conf.dist +++ b/conf/viewvc.conf.dist @@ -634,9 +634,13 @@ enabled = 0 # Set to 1 to enable indexing of file contents using Sphinx and Tika index_content = 0 -# Set to limit stored text file content size (4 MB default, 0 = unlimited, -1 = don't store content, index only) +# Set to limit stored text file content size (max. 4MB - Sphinx limit) #content_max_size = 4194304 +# Do store indexed content for snippet display? +# Do display snippets when searching on content? +enable_snippets = 1 + # Database hostname, port, and socket #host = localhost #port = 3306 diff --git a/lib/config.py b/lib/config.py index ab2aff1f..b663ad23 100644 --- a/lib/config.py +++ b/lib/config.py @@ -325,6 +325,7 @@ class Config: self.cvsdb.enabled = 0 self.cvsdb.index_content = 0 + self.cvsdb.enable_snippets = 1 self.cvsdb.content_max_size = 0 self.cvsdb.host = '' self.cvsdb.port = 3306 diff --git a/lib/cvsdb.py b/lib/cvsdb.py index d19c4efe..8f944079 100644 --- a/lib/cvsdb.py +++ b/lib/cvsdb.py @@ -40,7 +40,7 @@ error = "cvsdb error" class CheckinDatabase: def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, cfg, authorizer = None, index_content = 0, sphinx_host = None, sphinx_port = None, - sphinx_socket = None, sphinx_index = None, content_max_size = 0): + sphinx_socket = None, sphinx_index = None, content_max_size = 0, enable_snippets = 1): self.cfg = cfg self._host = host @@ -56,6 +56,9 @@ class CheckinDatabase: # Sphinx settings self.index_content = index_content + if content_max_size > 4*1024*1024 or content_max_size <= 0: + content_max_size = 4*1024*1024 + self.enable_snippets = enable_snippets self.content_max_size = content_max_size self.sphinx_host = sphinx_host self.sphinx_port = sphinx_port @@ -376,7 +379,9 @@ class CheckinDatabase: content = commit.GetContent() props['ci_when'] = str(int(commit.GetTime() or 0)) if len(content): - props['content'] = content + # Maximum field size limit for Sphinx is 4MB + if len(content) > self.content_max_size: + props['content'] = content[0:self.content_max_size] # Now, stored MIME type is only needed while searching # It is guessed again when the file is displayed props['mimetype'] = commit.GetMimeType() @@ -392,10 +397,7 @@ class CheckinDatabase: ) # Sphinx (at least 2.0.1) still caches all string attributes inside RAM, # so we'll store them in MySQL (used only for snippet display) - if self.content_max_size >= 0: - # Limit content size: - if self.content_max_size != 0 and len(content) >= self.content_max_size: - content = content[0:self.content_max_size] + if self.enable_snippets: cursor.execute('INSERT INTO contents SET id=%s, content=%s', (commit_id, content)) except Exception, e: print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+ @@ -612,6 +614,39 @@ class CheckinDatabase: return self.authorizer.check_path_access(rootname, path_parts, vclib.FILE, rev) return True + def fetch_snippets(self, query, sphinx_rows, cursor, sphinx_cursor): + snippets = {} + if self.enable_snippets: + # FIXME remove hardcode + snippet_options = { + 'around': 15, + 'limit': 200, + 'before_match': '', + 'after_match': '', + 'chunk_separator': ' ...\n', + } + preformatted_mime = 'text/(?!html|xml).*' + bm_html = cgi.escape(snippet_options['before_match']) + am_html = cgi.escape(snippet_options['after_match']) + # Build snippets using Sphinx (content is stored in MySQL) + for docid, rel, mimetype in sphinx_rows: + cursor.execute('SELECT content FROM contents WHERE id=%s', (docid, )) + s = cursor.fetchone() + if s: + s = s[0] + sphinx_cursor.execute( + 'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')', + (s, self.sphinx_index, query.content_query) + tuple(snippet_options.values()) + ) + s, = sphinx_cursor.fetchone() + s = cgi.escape(s) + if re.match(preformatted_mime, mimetype): + s = s.replace('\n', '
') + s = s.replace(bm_html, snippet_options['before_match']) + s = s.replace(am_html, snippet_options['after_match']) + snippets[docid] = s + return snippets + def RunQuery(self, query): if len(query.content_query) and self.sphinx: # Use Sphinx to search on document content @@ -621,44 +656,21 @@ class CheckinDatabase: sphcur.execute(sql) sphinx_rows = list((str(docid), rel, mimetype) for docid, rel, mimetype in sphcur) if len(sphinx_rows): - # FIXME remove hardcode - snippet_options = { - 'around': 15, - 'limit': 200, - 'before_match': '', - 'after_match': '', - 'chunk_separator': ' ...\n', - } - preformatted_mime = 'text/(?!html|xml).*' - snippets = {} - bm_html = cgi.escape(snippet_options['before_match']) - am_html = cgi.escape(snippet_options['after_match']) - # Build snippets using Sphinx (content is stored in MySQL) - for docid, rel, mimetype in sphinx_rows: - cursor.execute('SELECT content FROM contents WHERE id=%s', (docid, )) - s = cursor.fetchone() - if s: - s = s[0] - sphcur.execute( - 'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')', - (s, self.sphinx_index, query.content_query) + tuple(snippet_options.values()) - ) - s, = sphcur.fetchone() - s = cgi.escape(s) - if re.match(preformatted_mime, mimetype): - s = s.replace('\n', '
') - s = s.replace(bm_html, snippet_options['before_match']) - s = s.replace(am_html, snippet_options['after_match']) - snippets[docid] = s - else: - snippets[docid] = '' + snippets = self.fetch_snippets(query, sphinx_rows, cursor, sphcur) # Fetch commit attributes from MySQL sql = self.CreateIdQueryString((docid for (docid, _, _) in sphinx_rows)) cursor.execute(sql) byid = {} for row in cursor: byid[str(row[0])] = row - rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _) in sphinx_rows if docid in byid) + nrows = [] + for docid, rel, _ in sphinx_rows: + if docid in byid: + if docid in snippets: + nrows.append(byid[docid] + (rel, snippets[docid])) + else: + nrows.append(byid[docid] + (rel, '')) + rows = nrows else: rows = [] else: @@ -1105,6 +1117,7 @@ def ConnectDatabase(cfg, authorizer=None, readonly=0): min_relevance = cfg.cvsdb.fulltext_min_relevance, authorizer = authorizer, index_content = cfg.cvsdb.index_content, + enable_snippets = cfg.cvsdb.enable_snippets, sphinx_host = cfg.cvsdb.sphinx_host, sphinx_port = int(cfg.cvsdb.sphinx_port), sphinx_socket = cfg.cvsdb.sphinx_socket,