mirror of
https://github.com/Schrolli91/BOSWatch.git
synced 2026-01-02 23:00:09 +01:00
76 lines
1.9 KiB
Python
76 lines
1.9 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: UTF-8 -*-
|
|
#
|
|
|
|
"""
|
|
little Helper for converting strings
|
|
|
|
@author: Jens Herrmann
|
|
"""
|
|
|
|
import logging
|
|
|
|
|
|
def convertToUTF8(string = ""):
|
|
"""
|
|
Returns given string in UTF-8
|
|
|
|
@type string: String
|
|
@param string: String to convert to UTF-8
|
|
|
|
@return: string in UTF-8
|
|
@exception: Exception if converting to UTF-8 failed
|
|
"""
|
|
|
|
uft8String = ""
|
|
|
|
# nothing to do if string is empty
|
|
if len(string) > 0:
|
|
try:
|
|
# check given string is already UTF-8, return
|
|
return string.decode('UTF-8', 'strict')
|
|
except UnicodeDecodeError:
|
|
# string contains non-UTF-8 character
|
|
logging.debug("string contains non-UTF-8 characters: %s", string)
|
|
|
|
# try to find out encoding:
|
|
encodings = ('windows-1250', 'windows-1252', 'latin_1', 'cp850', 'cp852', 'iso8859_2', 'iso8859_15', 'mac_latin2', 'mac_roman')
|
|
for enc in encodings:
|
|
try:
|
|
string = string.decode(enc)
|
|
logging.debug("string was encoded in: %s", enc)
|
|
break
|
|
except Exception:
|
|
# if exception for last encoding entry fail, raise exception
|
|
if enc == encodings[-1]:
|
|
logging.warning("no encoding found")
|
|
logging.debug("no encoding found", exc_info=True)
|
|
# no fixing possible, raise exception
|
|
raise
|
|
pass
|
|
|
|
# string should now decoded...
|
|
|
|
try:
|
|
# encode decoded string to UTF-8
|
|
uft8String = string.encode('UTF-8')
|
|
except:
|
|
logging.warning("encoding to UTF-8 failed")
|
|
logging.debug("encoding to UTF-8 failed", exc_info=True)
|
|
# no fixing possible, raise exception
|
|
raise
|
|
|
|
# Now we must have an utf8-string, check it:
|
|
try:
|
|
uft8String.decode('UTF-8', 'strict')
|
|
logging.debug("string converting succeeded: %s", uft8String)
|
|
except:
|
|
logging.warning("converting to UTF-8 failed")
|
|
logging.debug("converting to UTF-8 failed", exc_info=True)
|
|
# no fixing possible, raise exception
|
|
raise
|
|
|
|
# End of exception: check given string is already UTF-8
|
|
pass
|
|
|
|
return uft8String |