add convert csv-field "description" to utf-8

to make sure, that only utf-8 chars are in process
This commit is contained in:
JHCD 2015-07-13 19:43:47 +02:00
parent 1c78f9ad78
commit e829778db2
2 changed files with 21 additions and 2 deletions

View file

@ -7,4 +7,4 @@ ric,description
#
# !!! DO NOT delete the first line !!!
#
1234567,"POCSAG testdata"
1234567,"POCSAG testdata üöäß"

Can't render this file because it has a wrong number of fields in line 2.

View file

@ -15,6 +15,7 @@ import csv # for loading the description files
from includes import globals # Global variables
##
#
# Local function will load the csv-file
@ -36,7 +37,25 @@ def loadCSV(typ, idField):
logging.debug(row)
# only import rows with an integer as id
if row[idField].isdigit() == True:
resultList[row[idField]] = row['description']
# check if string contains non-utf8 characters
description = ""
try:
description = row['description'].decode('UTF-8', 'strict')
except UnicodeDecodeError:
# line contains non-utf8 character
logging.debug("row contains non-utf8 characters: %s", row['description'])
# try to find out codec:
encodings = ('windows-1250', 'windows-1252', 'iso-8859-1', 'iso-8859-15')
for enc in encodings:
try:
description = f.decode(enc)
break
except Exception:
pass
# encode in UTF-8
description = description.encode('UTF-8')
pass
resultList[row[idField]] = description
logging.debug("-- loading csv finished")
except:
logging.error("loading csvList for typ: %s failed", typ)