diff --git a/doc/contributing/source/coding-style.rst b/doc/contributing/source/coding-style.rst index dbc23d640..449eec853 100644 --- a/doc/contributing/source/coding-style.rst +++ b/doc/contributing/source/coding-style.rst @@ -157,6 +157,7 @@ The script performs multiple style checks. By default, the script runs the follo * Check if there are no trailing whitespace. Always checked. * Check if there are no tabs. Respects clang-format guards. * Check if source code files use SPDX licenses rather than GPL license text. Respects clang-format guards. +* Check if files have the correct encoding (UTF-8). Always checked. The process returns a zero exit code if all files adhere to these rules. If there are files that do not comply with the rules, the process returns a non-zero @@ -170,6 +171,7 @@ flags: * ``--no-whitespace`` * ``--no-tabs`` * ``--no-licenses`` +* ``--no-encoding`` Additional information about the formatting issues detected by the script can be enabled by adding the ``-v, --verbose`` flag. diff --git a/utils/check-style-clang-format.py b/utils/check-style-clang-format.py index 3b6b4af8f..9d1bf1ffd 100755 --- a/utils/check-style-clang-format.py +++ b/utils/check-style-clang-format.py @@ -16,6 +16,7 @@ the ".clang-format" file. This script performs the following checks / fixes: - Check / trim trailing whitespace. Always checked. - Check / replace tabs with spaces. Respects clang-format guards. - Check / fix SPDX licenses rather than GPL text. Respects clang-format guards. +- Check file encoding. Always checked. This script can be applied to all text files in a given path or to individual files. @@ -80,6 +81,7 @@ CHECKS = [ "tabs", "license", "formatting", + "encoding", ] # Files to check @@ -109,6 +111,7 @@ FILE_EXTENSIONS_TO_CHECK["formatting"] = [ ] FILE_EXTENSIONS_TO_CHECK["include_prefixes"] = FILE_EXTENSIONS_TO_CHECK["formatting"] +FILE_EXTENSIONS_TO_CHECK["encoding"] = FILE_EXTENSIONS_TO_CHECK["formatting"] FILE_EXTENSIONS_TO_CHECK["tabs"] = [ ".c", @@ -155,6 +158,7 @@ FILE_EXTENSIONS_TO_CHECK["license"] = [ # Other check parameters TAB_SIZE = 4 +FILE_ENCODING = "UTF-8" ########################################################### @@ -318,6 +322,7 @@ def check_style_clang_format( "tabs": "tabs", "license": "GPL license text instead of SPDX license", "formatting": "bad code formatting", + "encoding": f"bad file encoding ({FILE_ENCODING})", } check_style_file_functions_kwargs = { @@ -353,6 +358,10 @@ def check_style_clang_format( "function": check_formatting_file, "kwargs": {}, # The formatting keywords are added below }, + "encoding": { + "function": check_encoding_file, + "kwargs": {}, + }, } if checks_enabled["formatting"]: @@ -509,6 +518,67 @@ def check_formatting_file( return (filename, is_file_compliant, verbose_infos) +def check_encoding_file( + filename: str, + fix: bool, + verbose: bool, +) -> Tuple[str, bool, List[str]]: + """ + Check / fix the encoding of a file. + + @param filename Name of the file to be checked. + @param fix Whether to fix (True) or just check (False) the encoding of the file. + @param verbose Show the lines that are not compliant with the style. + @return Tuple [Filename, + Whether the file is compliant with the style (before the check), + Verbose information]. + """ + + verbose_infos: List[str] = [] + is_file_compliant = True + + with open(filename, "rb") as f: + file_data = f.read() + file_lines = file_data.decode(FILE_ENCODING, errors="replace").splitlines(keepends=True) + + # Check if file has correct encoding + try: + file_data.decode(FILE_ENCODING) + + except UnicodeDecodeError as e: + is_file_compliant = False + + if verbose: + # Find line and column with bad encoding + bad_char_start_index = e.start + n_chars_file_read = 0 + + for line_number, line in enumerate(file_lines): + n_chars_line = len(line) + + if bad_char_start_index < n_chars_file_read + n_chars_line: + bad_char_column = bad_char_start_index - n_chars_file_read + + verbose_infos.extend( + [ + f"{filename}:{line_number + 1}:{bad_char_column + 1}: error: bad {FILE_ENCODING} encoding", + f" {line.rstrip()}", + f" {'':{bad_char_column}}^", + ] + ) + + break + + n_chars_file_read += n_chars_line + + # Fix file encoding + if fix and not is_file_compliant: + with open(filename, "w", encoding=FILE_ENCODING) as f: + f.writelines(file_lines) + + return (filename, is_file_compliant, verbose_infos) + + def check_manually_file( filename: str, fix: bool, @@ -533,7 +603,7 @@ def check_manually_file( verbose_infos: List[str] = [] clang_format_enabled = True - with open(filename, "r", encoding="utf-8") as f: + with open(filename, "r", encoding=FILE_ENCODING) as f: file_lines = f.readlines() for i, line in enumerate(file_lines): @@ -567,7 +637,7 @@ def check_manually_file( # Update file with the fixed lines if fix and not is_file_compliant: - with open(filename, "w", encoding="utf-8") as f: + with open(filename, "w", encoding=FILE_ENCODING) as f: f.writelines(file_lines) return (filename, is_file_compliant, verbose_infos) @@ -764,7 +834,8 @@ if __name__ == "__main__": description="Check and apply the ns-3 coding style recursively to all files in the given PATHs. " "The script checks the formatting of the files using clang-format and" " other coding style rules manually (see script arguments). " - "All checks respect clang-format guards, except trailing whitespace, which is always checked. " + "All checks respect clang-format guards, except trailing whitespace and file encoding," + " which are always checked. " 'When used in "check mode" (default), the script runs all checks in all files. ' "If it detects non-formatted files, they will be printed and this process exits with a non-zero code. " 'When used in "fix mode", this script automatically fixes the files and exits with 0 code.' @@ -808,6 +879,12 @@ if __name__ == "__main__": help="Do not check / fix code formatting (respects clang-format guards)", ) + parser.add_argument( + "--no-encoding", + action="store_true", + help=f"Do not check / fix file encoding ({FILE_ENCODING})", + ) + parser.add_argument( "--fix", action="store_true", @@ -840,14 +917,15 @@ if __name__ == "__main__": "tabs": not args.no_tabs, "license": not args.no_licenses, "formatting": not args.no_formatting, + "encoding": not args.no_encoding, }, fix=args.fix, verbose=args.verbose, n_jobs=args.jobs, ) - except Exception as e: - print("ERROR:", e) + except Exception as ex: + print("ERROR:", ex) sys.exit(1) if not all_checks_successful: