Bug 82651

git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1452 6955db30-a419-402b-8a0d-67ecbb4d7f56
remotes/github/custis
vfilippov 2011-10-27 14:02:57 +00:00 committed by Vitaliy Filippov
parent 24b979d9e2
commit 0bd2b940f9
2 changed files with 22 additions and 4 deletions

View File

@ -322,10 +322,24 @@ class SvnRev:
os.path.basename(change.path),
diffobj.tempfile2
)
# Read and guess charset by ourselves for text files
if mime and mime.startswith('text/') or (mime.startswith('application/') and mime.endswith('xml')):
try:
fd = open(diffobj.tempfile2, 'rb')
content = fd.read()
fd.close()
except: pass
# Guess charset
if content:
content, charset = repo.guesser.guess_charset(content)
if charset:
content = content.encode('utf-8')
if repo.verbose:
print 'Guessed %s for %s' % (charset, change.path)
elif repo.verbose:
print 'Failed to guess charset for %s, not indexing' % (change.path, )
# Try to extract content using Tika from binary documents
# Do not index contents of text files - it can be easily retrieved later
if (mime and not mime.startswith('text/') and not
(mime.startswith('application/') and mime.endswith('xml'))):
elif repo.tika_client:
content = repo.tika_client.get_text(diffobj.tempfile2, mime, change.path)
self.changes.append((path, action, plus, minus, content, mime))

View File

@ -416,7 +416,11 @@ class CheckinDatabase:
)
# Sphinx (at least 2.0.1) still caches all string attributes
# inside RAM, so we'll store contents in MySQL
if self.enable_snippets:
# Do not store contents of text files - it can be easily retrieved later
mime = props['mimetype']
if (self.enable_snippets and not (mime and
(mime.startswith('text/') or
mime.startswith('application/') and mime.endswith('xml')))):
cursor.execute('INSERT INTO contents SET id=%s, content=%s', (commit_id, content))
except Exception, e:
print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+