From 0bd2b940f9f9c7dd8446492f548cad7ca261b6bc Mon Sep 17 00:00:00 2001 From: vfilippov Date: Thu, 27 Oct 2011 14:02:57 +0000 Subject: [PATCH] Bug 82651 git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1452 6955db30-a419-402b-8a0d-67ecbb4d7f56 --- bin/svndbadmin | 20 +++++++++++++++++--- lib/cvsdb.py | 6 +++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/bin/svndbadmin b/bin/svndbadmin index 3300c205..8f6a77b9 100755 --- a/bin/svndbadmin +++ b/bin/svndbadmin @@ -322,10 +322,24 @@ class SvnRev: os.path.basename(change.path), diffobj.tempfile2 ) + # Read and guess charset by ourselves for text files + if mime and mime.startswith('text/') or (mime.startswith('application/') and mime.endswith('xml')): + try: + fd = open(diffobj.tempfile2, 'rb') + content = fd.read() + fd.close() + except: pass + # Guess charset + if content: + content, charset = repo.guesser.guess_charset(content) + if charset: + content = content.encode('utf-8') + if repo.verbose: + print 'Guessed %s for %s' % (charset, change.path) + elif repo.verbose: + print 'Failed to guess charset for %s, not indexing' % (change.path, ) # Try to extract content using Tika from binary documents - # Do not index contents of text files - it can be easily retrieved later - if (mime and not mime.startswith('text/') and not - (mime.startswith('application/') and mime.endswith('xml'))): + elif repo.tika_client: content = repo.tika_client.get_text(diffobj.tempfile2, mime, change.path) self.changes.append((path, action, plus, minus, content, mime)) diff --git a/lib/cvsdb.py b/lib/cvsdb.py index 0afa5495..c6c8294a 100644 --- a/lib/cvsdb.py +++ b/lib/cvsdb.py @@ -416,7 +416,11 @@ class CheckinDatabase: ) # Sphinx (at least 2.0.1) still caches all string attributes # inside RAM, so we'll store contents in MySQL - if self.enable_snippets: + # Do not store contents of text files - it can be easily retrieved later + mime = props['mimetype'] + if (self.enable_snippets and not (mime and + (mime.startswith('text/') or + mime.startswith('application/') and mime.endswith('xml')))): cursor.execute('INSERT INTO contents SET id=%s, content=%s', (commit_id, content)) except Exception, e: print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+