From e829778db2f88d116b0f6246222cd09beec4dc28 Mon Sep 17 00:00:00 2001 From: JHCD Date: Mon, 13 Jul 2015 19:43:47 +0200 Subject: [PATCH] add convert csv-field "description" to utf-8 to make sure, that only utf-8 chars are in process --- csv/poc.csv | 2 +- includes/descriptionList.py | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/csv/poc.csv b/csv/poc.csv index 44ddacb..c7c7893 100644 --- a/csv/poc.csv +++ b/csv/poc.csv @@ -7,4 +7,4 @@ ric,description # # !!! DO NOT delete the first line !!! # -1234567,"POCSAG testdata" +1234567,"POCSAG testdata üöäß" diff --git a/includes/descriptionList.py b/includes/descriptionList.py index 4d46279..3e7cb7f 100644 --- a/includes/descriptionList.py +++ b/includes/descriptionList.py @@ -15,6 +15,7 @@ import csv # for loading the description files from includes import globals # Global variables + ## # # Local function will load the csv-file @@ -36,7 +37,25 @@ def loadCSV(typ, idField): logging.debug(row) # only import rows with an integer as id if row[idField].isdigit() == True: - resultList[row[idField]] = row['description'] + # check if string contains non-utf8 characters + description = "" + try: + description = row['description'].decode('UTF-8', 'strict') + except UnicodeDecodeError: + # line contains non-utf8 character + logging.debug("row contains non-utf8 characters: %s", row['description']) + # try to find out codec: + encodings = ('windows-1250', 'windows-1252', 'iso-8859-1', 'iso-8859-15') + for enc in encodings: + try: + description = f.decode(enc) + break + except Exception: + pass + # encode in UTF-8 + description = description.encode('UTF-8') + pass + resultList[row[idField]] = description logging.debug("-- loading csv finished") except: logging.error("loading csvList for typ: %s failed", typ)