BOSWatch/includes/helper/stringConverter.py

170 lines
4.3 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
"""
little Helper for converting strings
@author: Jens Herrmann
"""
import logging
#
# local helper function to decode a string...
#
def decodeString(inputString = ""):
"""
Returns given string as unicode
@type string: String
@param string: String to convert to unicode
@return: string in unicode
@exception: Exception if converting to unicode failed
"""
decodedString = ""
logging.debug("call decodeString('%s')", inputString)
# try to find out encoding:
encodings = ('utf-8', 'windows-1250', 'windows-1252', 'latin_1', 'cp850', 'cp852', 'iso8859_2', 'iso8859_15', 'mac_latin2', 'mac_roman')
for enc in encodings:
try:
decodedString = inputString.decode(enc)
logging.debug("-- string was encoded in: %s", enc)
break
except Exception:
# if exception for last encoding entry fail, raise exception
if enc == encodings[-1]:
logging.warning("no encoding found")
logging.debug("no encoding found", exc_info=True)
# no fixing possible, raise exception
raise
return decodedString
def convertToUnicode(inputString = ""):
"""
Returns given string as unicode
@type string: String
@param string: String to convert to unicode
@return: string in unicode
@exception: Exception if converting to unicode failed
"""
decodedString = ""
logging.debug("call convertToUnicode('%s')", inputString)
# nothing to do if inputString is empty
if len(inputString) > 0:
# 1. check if integer
try:
if int(inputString):
logging.debug("-- integer")
# ... then return it
return inputString
except ValueError:
# ... no integer is okay...
pass
# 2. Check if inputString is unicode...
if isinstance(inputString, unicode):
logging.debug("-- unicode")
return inputString
try:
# try to decoding:
decodedString = decodeString(inputString)
except:
logging.warning("decoding string failed")
logging.debug("encoding string failed", exc_info=True)
# no fixing possible, raise exception
raise
return decodedString
def convertToUTF8(inputString = ""):
"""
Returns given string in UTF-8
@type string: String
@param string: String to convert to UTF-8
@return: string in UTF-8
@exception: Exception if converting to UTF-8 failed
"""
uft8String = ""
logging.debug("call convertToUTF8('%s')", inputString)
# nothing to do if inputString is empty
if len(inputString) > 0:
try:
# 1. check if integer
try:
if int(inputString):
logging.debug("-- integer")
# ... then return it
return inputString
except ValueError:
pass
# 2. Check if inputString is unicode...
if isinstance(inputString, unicode):
logging.debug("-- unicode")
# ... then return it as UTF-8
uft8String = decodedString.encode('UTF-8')
return uft8String
# 2. check given inputString is already UTF-8...
decodedString = inputString.decode('UTF-8', 'strict')
# ... no UnicodeDecodeError exception, inputString ist UTF-8
logging.debug("-- UTF-8")
return inputString
except UnicodeDecodeError:
# inputString contains non-UTF-8 character
logging.debug("string contains non-UTF-8 characters: %s", inputString)
try:
# try to decoding:
decodedString = decodeString(inputString)
except:
logging.warning("decoding string failed")
logging.debug("encoding string failed", exc_info=True)
# no fixing possible, raise exception
raise
# inputString should now decoded...
try:
# encode decodedString to UTF-8
uft8String = decodedString.encode('UTF-8')
except:
logging.warning("encoding to UTF-8 failed")
logging.debug("encoding to UTF-8 failed", exc_info=True)
# no fixing possible, raise exception
raise
# Now we must have an utf8-string, check it:
try:
uft8String.decode('UTF-8', 'strict')
logging.debug("string converting succeeded: %s", uft8String)
except:
logging.warning("converting to UTF-8 failed")
logging.debug("converting to UTF-8 failed", exc_info=True)
# no fixing possible, raise exception
raise
# End of exception UnicodeDecodeError: check given string is already UTF-8
except:
logging.warning("error checking given string")
logging.debug("error checking given string", exc_info=True)
# no fixing possible, raise exception
raise
return uft8String