Bug 82651 - Content size limit (4MB Sphinx), enable snippets setting
git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1400 6955db30-a419-402b-8a0d-67ecbb4d7f56remotes/github/custis
parent
a564b02d18
commit
72934bf6cd
|
@ -634,9 +634,13 @@ enabled = 0
|
|||
# Set to 1 to enable indexing of file contents using Sphinx and Tika
|
||||
index_content = 0
|
||||
|
||||
# Set to limit stored text file content size (4 MB default, 0 = unlimited, -1 = don't store content, index only)
|
||||
# Set to limit stored text file content size (max. 4MB - Sphinx limit)
|
||||
#content_max_size = 4194304
|
||||
|
||||
# Do store indexed content for snippet display?
|
||||
# Do display snippets when searching on content?
|
||||
enable_snippets = 1
|
||||
|
||||
# Database hostname, port, and socket
|
||||
#host = localhost
|
||||
#port = 3306
|
||||
|
|
|
@ -325,6 +325,7 @@ class Config:
|
|||
|
||||
self.cvsdb.enabled = 0
|
||||
self.cvsdb.index_content = 0
|
||||
self.cvsdb.enable_snippets = 1
|
||||
self.cvsdb.content_max_size = 0
|
||||
self.cvsdb.host = ''
|
||||
self.cvsdb.port = 3306
|
||||
|
|
89
lib/cvsdb.py
89
lib/cvsdb.py
|
@ -40,7 +40,7 @@ error = "cvsdb error"
|
|||
class CheckinDatabase:
|
||||
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, cfg,
|
||||
authorizer = None, index_content = 0, sphinx_host = None, sphinx_port = None,
|
||||
sphinx_socket = None, sphinx_index = None, content_max_size = 0):
|
||||
sphinx_socket = None, sphinx_index = None, content_max_size = 0, enable_snippets = 1):
|
||||
self.cfg = cfg
|
||||
|
||||
self._host = host
|
||||
|
@ -56,6 +56,9 @@ class CheckinDatabase:
|
|||
|
||||
# Sphinx settings
|
||||
self.index_content = index_content
|
||||
if content_max_size > 4*1024*1024 or content_max_size <= 0:
|
||||
content_max_size = 4*1024*1024
|
||||
self.enable_snippets = enable_snippets
|
||||
self.content_max_size = content_max_size
|
||||
self.sphinx_host = sphinx_host
|
||||
self.sphinx_port = sphinx_port
|
||||
|
@ -376,7 +379,9 @@ class CheckinDatabase:
|
|||
content = commit.GetContent()
|
||||
props['ci_when'] = str(int(commit.GetTime() or 0))
|
||||
if len(content):
|
||||
props['content'] = content
|
||||
# Maximum field size limit for Sphinx is 4MB
|
||||
if len(content) > self.content_max_size:
|
||||
props['content'] = content[0:self.content_max_size]
|
||||
# Now, stored MIME type is only needed while searching
|
||||
# It is guessed again when the file is displayed
|
||||
props['mimetype'] = commit.GetMimeType()
|
||||
|
@ -392,10 +397,7 @@ class CheckinDatabase:
|
|||
)
|
||||
# Sphinx (at least 2.0.1) still caches all string attributes inside RAM,
|
||||
# so we'll store them in MySQL (used only for snippet display)
|
||||
if self.content_max_size >= 0:
|
||||
# Limit content size:
|
||||
if self.content_max_size != 0 and len(content) >= self.content_max_size:
|
||||
content = content[0:self.content_max_size]
|
||||
if self.enable_snippets:
|
||||
cursor.execute('INSERT INTO contents SET id=%s, content=%s', (commit_id, content))
|
||||
except Exception, e:
|
||||
print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+
|
||||
|
@ -612,6 +614,39 @@ class CheckinDatabase:
|
|||
return self.authorizer.check_path_access(rootname, path_parts, vclib.FILE, rev)
|
||||
return True
|
||||
|
||||
def fetch_snippets(self, query, sphinx_rows, cursor, sphinx_cursor):
|
||||
snippets = {}
|
||||
if self.enable_snippets:
|
||||
# FIXME remove hardcode
|
||||
snippet_options = {
|
||||
'around': 15,
|
||||
'limit': 200,
|
||||
'before_match': '<span style="color:red">',
|
||||
'after_match': '</span>',
|
||||
'chunk_separator': ' ...\n',
|
||||
}
|
||||
preformatted_mime = 'text/(?!html|xml).*'
|
||||
bm_html = cgi.escape(snippet_options['before_match'])
|
||||
am_html = cgi.escape(snippet_options['after_match'])
|
||||
# Build snippets using Sphinx (content is stored in MySQL)
|
||||
for docid, rel, mimetype in sphinx_rows:
|
||||
cursor.execute('SELECT content FROM contents WHERE id=%s', (docid, ))
|
||||
s = cursor.fetchone()
|
||||
if s:
|
||||
s = s[0]
|
||||
sphinx_cursor.execute(
|
||||
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
|
||||
(s, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
|
||||
)
|
||||
s, = sphinx_cursor.fetchone()
|
||||
s = cgi.escape(s)
|
||||
if re.match(preformatted_mime, mimetype):
|
||||
s = s.replace('\n', '<br />')
|
||||
s = s.replace(bm_html, snippet_options['before_match'])
|
||||
s = s.replace(am_html, snippet_options['after_match'])
|
||||
snippets[docid] = s
|
||||
return snippets
|
||||
|
||||
def RunQuery(self, query):
|
||||
if len(query.content_query) and self.sphinx:
|
||||
# Use Sphinx to search on document content
|
||||
|
@ -621,44 +656,21 @@ class CheckinDatabase:
|
|||
sphcur.execute(sql)
|
||||
sphinx_rows = list((str(docid), rel, mimetype) for docid, rel, mimetype in sphcur)
|
||||
if len(sphinx_rows):
|
||||
# FIXME remove hardcode
|
||||
snippet_options = {
|
||||
'around': 15,
|
||||
'limit': 200,
|
||||
'before_match': '<span style="color:red">',
|
||||
'after_match': '</span>',
|
||||
'chunk_separator': ' ...\n',
|
||||
}
|
||||
preformatted_mime = 'text/(?!html|xml).*'
|
||||
snippets = {}
|
||||
bm_html = cgi.escape(snippet_options['before_match'])
|
||||
am_html = cgi.escape(snippet_options['after_match'])
|
||||
# Build snippets using Sphinx (content is stored in MySQL)
|
||||
for docid, rel, mimetype in sphinx_rows:
|
||||
cursor.execute('SELECT content FROM contents WHERE id=%s', (docid, ))
|
||||
s = cursor.fetchone()
|
||||
if s:
|
||||
s = s[0]
|
||||
sphcur.execute(
|
||||
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
|
||||
(s, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
|
||||
)
|
||||
s, = sphcur.fetchone()
|
||||
s = cgi.escape(s)
|
||||
if re.match(preformatted_mime, mimetype):
|
||||
s = s.replace('\n', '<br />')
|
||||
s = s.replace(bm_html, snippet_options['before_match'])
|
||||
s = s.replace(am_html, snippet_options['after_match'])
|
||||
snippets[docid] = s
|
||||
else:
|
||||
snippets[docid] = ''
|
||||
snippets = self.fetch_snippets(query, sphinx_rows, cursor, sphcur)
|
||||
# Fetch commit attributes from MySQL
|
||||
sql = self.CreateIdQueryString((docid for (docid, _, _) in sphinx_rows))
|
||||
cursor.execute(sql)
|
||||
byid = {}
|
||||
for row in cursor:
|
||||
byid[str(row[0])] = row
|
||||
rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _) in sphinx_rows if docid in byid)
|
||||
nrows = []
|
||||
for docid, rel, _ in sphinx_rows:
|
||||
if docid in byid:
|
||||
if docid in snippets:
|
||||
nrows.append(byid[docid] + (rel, snippets[docid]))
|
||||
else:
|
||||
nrows.append(byid[docid] + (rel, ''))
|
||||
rows = nrows
|
||||
else:
|
||||
rows = []
|
||||
else:
|
||||
|
@ -1105,6 +1117,7 @@ def ConnectDatabase(cfg, authorizer=None, readonly=0):
|
|||
min_relevance = cfg.cvsdb.fulltext_min_relevance,
|
||||
authorizer = authorizer,
|
||||
index_content = cfg.cvsdb.index_content,
|
||||
enable_snippets = cfg.cvsdb.enable_snippets,
|
||||
sphinx_host = cfg.cvsdb.sphinx_host,
|
||||
sphinx_port = int(cfg.cvsdb.sphinx_port),
|
||||
sphinx_socket = cfg.cvsdb.sphinx_socket,
|
||||
|
|
Loading…
Reference in New Issue