Bug 82651 - Tika&Sphinx&chardet content indexing (done!)
git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1389 6955db30-a419-402b-8a0d-67ecbb4d7f56remotes/github/custis
parent
e363cf19b1
commit
639d1c25db
|
@ -46,7 +46,14 @@ class ContentMagic:
|
|||
charset = chardet.detect(content)
|
||||
if charset and charset['encoding']:
|
||||
charset = charset['encoding']
|
||||
content = content.decode(charset)
|
||||
if charset == 'MacCyrillic':
|
||||
# Silly MacCyr, try cp1251
|
||||
try:
|
||||
content = content.decode('windows-1251')
|
||||
charset = 'windows-1251'
|
||||
except: content = content.decode(charset)
|
||||
else:
|
||||
content = content.decode(charset)
|
||||
except: charset = None
|
||||
else:
|
||||
# Try UTF-8
|
||||
|
|
Loading…
Reference in New Issue