From e829778db2f88d116b0f6246222cd09beec4dc28 Mon Sep 17 00:00:00 2001
From: JHCD <github@jhc.de>
Date: Mon, 13 Jul 2015 19:43:47 +0200
Subject: [PATCH] add convert csv-field "description" to utf-8 to make sure,
 that only utf-8 chars are in process

---
 csv/poc.csv                 |  2 +-
 includes/descriptionList.py | 21 ++++++++++++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/csv/poc.csv b/csv/poc.csv
index 44ddacb..c7c7893 100644
--- a/csv/poc.csv
+++ b/csv/poc.csv
@@ -7,4 +7,4 @@ ric,description
 #
 # !!! DO NOT delete the first line !!!
 #
-1234567,"POCSAG testdata"
+1234567,"POCSAG testdata üöäß"
diff --git a/includes/descriptionList.py b/includes/descriptionList.py
index 4d46279..3e7cb7f 100644
--- a/includes/descriptionList.py
+++ b/includes/descriptionList.py
@@ -15,6 +15,7 @@ import csv # for loading the description files
 
 from includes import globals  # Global variables
 
+
 ##
 #
 # Local function will load the csv-file
@@ -36,7 +37,25 @@ def loadCSV(typ, idField):
 				logging.debug(row)
 				# only import rows with an integer as id
 				if row[idField].isdigit() == True:
-					resultList[row[idField]] = row['description']
+					# check if string contains non-utf8 characters
+					description = ""
+					try:
+						description = row['description'].decode('UTF-8', 'strict')
+					except UnicodeDecodeError:
+						# line contains non-utf8 character
+						logging.debug("row contains non-utf8 characters: %s", row['description'])
+						# try to find out codec:
+						encodings = ('windows-1250', 'windows-1252', 'iso-8859-1', 'iso-8859-15')
+						for enc in encodings:
+							try:
+								description = f.decode(enc)
+								break
+							except Exception:
+								pass
+						# encode in UTF-8
+						description = description.encode('UTF-8')
+						pass
+					resultList[row[idField]] = description
 		logging.debug("-- loading csv finished")
 	except:
 		logging.error("loading csvList for typ: %s failed", typ)