mirror of
https://github.com/Schrolli91/BOSWatch.git
synced 2026-01-20 15:20:16 +01:00
add convert csv-field "description" to utf-8
to make sure, that only utf-8 chars are in process
This commit is contained in:
parent
1c78f9ad78
commit
e829778db2
|
|
@ -7,4 +7,4 @@ ric,description
|
|||
#
|
||||
# !!! DO NOT delete the first line !!!
|
||||
#
|
||||
1234567,"POCSAG testdata"
|
||||
1234567,"POCSAG testdata üöäß"
|
||||
|
|
|
|||
|
Can't render this file because it has a wrong number of fields in line 2.
|
|
|
@ -15,6 +15,7 @@ import csv # for loading the description files
|
|||
|
||||
from includes import globals # Global variables
|
||||
|
||||
|
||||
##
|
||||
#
|
||||
# Local function will load the csv-file
|
||||
|
|
@ -36,7 +37,25 @@ def loadCSV(typ, idField):
|
|||
logging.debug(row)
|
||||
# only import rows with an integer as id
|
||||
if row[idField].isdigit() == True:
|
||||
resultList[row[idField]] = row['description']
|
||||
# check if string contains non-utf8 characters
|
||||
description = ""
|
||||
try:
|
||||
description = row['description'].decode('UTF-8', 'strict')
|
||||
except UnicodeDecodeError:
|
||||
# line contains non-utf8 character
|
||||
logging.debug("row contains non-utf8 characters: %s", row['description'])
|
||||
# try to find out codec:
|
||||
encodings = ('windows-1250', 'windows-1252', 'iso-8859-1', 'iso-8859-15')
|
||||
for enc in encodings:
|
||||
try:
|
||||
description = f.decode(enc)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
# encode in UTF-8
|
||||
description = description.encode('UTF-8')
|
||||
pass
|
||||
resultList[row[idField]] = description
|
||||
logging.debug("-- loading csv finished")
|
||||
except:
|
||||
logging.error("loading csvList for typ: %s failed", typ)
|
||||
|
|
|
|||
Loading…
Reference in a new issue