From 296c32a8daded6ed47fb7bad2561fa731d566280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Thu, 4 Mar 2021 13:33:58 -0500 Subject: [PATCH] decodetree: Open files with encoding='utf-8' When decodetree.py was added in commit 568ae7efae7, QEMU was using Python 2 which happily reads UTF-8 files in text mode. Python 3 requires either UTF-8 locale or an explicit encoding passed to open(). Now that Python 3 is required, explicit UTF-8 encoding for decodetree source files. To avoid further problems with the user locale, also explicit UTF-8 encoding for the generated C files. Explicit both input/output are plain text by using the 't' mode. This fixes: $ /usr/bin/python3 scripts/decodetree.py test.decode Traceback (most recent call last): File "scripts/decodetree.py", line 1397, in main() File "scripts/decodetree.py", line 1308, in main parse_file(f, toppat) File "scripts/decodetree.py", line 994, in parse_file for line in f: File "/usr/lib/python3.6/encodings/ascii.py", line 26, in decode return codecs.ascii_decode(input, self.errors)[0] UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 80: ordinal not in range(128) Backports 4cacecaaa2bbf8af0967bd3eee43297fada475a9 --- qemu/scripts/decodetree.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/qemu/scripts/decodetree.py b/qemu/scripts/decodetree.py index 966068ea..4637b633 100644 --- a/qemu/scripts/decodetree.py +++ b/qemu/scripts/decodetree.py @@ -4,7 +4,7 @@ # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either -# version 2 of the License, or (at your option) any later version. +# version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,6 +20,7 @@ # See the syntax and semantics in docs/devel/decodetree.rst. # +import io import os import re import sys @@ -94,7 +95,7 @@ def str_indent(c): def str_fields(fields): - """Return a string uniquely identifing FIELDS""" + """Return a string uniquely identifying FIELDS""" r = '' for n in sorted(fields.keys()): r += '_' + n @@ -814,7 +815,7 @@ def parse_generic(lineno, parent_pat, name, toks): arg = None fmt = None for t in toks: - # '&Foo' gives a format an explcit argument set. + # '&Foo' gives a format an explicit argument set. if re.fullmatch(re_arg_ident, t): tt = t[1:] if arg: @@ -903,7 +904,7 @@ def parse_generic(lineno, parent_pat, name, toks): elif not (is_format and width == 0) and width != insnwidth: error(lineno, 'definition has {0} bits'.format(width)) - # Do not check for fields overlaping fields; one valid usage + # Do not check for fields overlapping fields; one valid usage # is to be able to duplicate fields via import. fieldmask = 0 for f in flds.values(): @@ -1304,7 +1305,7 @@ def main(): for filename in args: input_file = filename - f = open(filename, 'r') + f = open(filename, 'rt', encoding='utf-8') parse_file(f, toppat) f.close() @@ -1324,9 +1325,11 @@ def main(): prop_size(stree) if output_file: - output_fd = open(output_file, 'w') + output_fd = open(output_file, 'wt', encoding='utf-8') else: - output_fd = sys.stdout + output_fd = io.TextIOWrapper(sys.stdout.buffer, + encoding=sys.stdout.encoding, + errors="ignore") output_autogen() for n in sorted(arguments.keys()):