From 639d1c25dbea344d4ae1d2341e1e66afcdc99b1d Mon Sep 17 00:00:00 2001 From: vfilippov Date: Tue, 27 Sep 2011 16:34:42 +0000 Subject: [PATCH] Bug 82651 - Tika&Sphinx&chardet content indexing (done!) git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1389 6955db30-a419-402b-8a0d-67ecbb4d7f56 --- lib/viewvcmagic.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/viewvcmagic.py b/lib/viewvcmagic.py index 5f8b3ea8..62edab93 100644 --- a/lib/viewvcmagic.py +++ b/lib/viewvcmagic.py @@ -46,7 +46,14 @@ class ContentMagic: charset = chardet.detect(content) if charset and charset['encoding']: charset = charset['encoding'] - content = content.decode(charset) + if charset == 'MacCyrillic': + # Silly MacCyr, try cp1251 + try: + content = content.decode('windows-1251') + charset = 'windows-1251' + except: content = content.decode(charset) + else: + content = content.decode(charset) except: charset = None else: # Try UTF-8