ZHBPythonTool/convert_cs_to_utf8.py
张宏博 450edbc9bb init
2025-12-19 16:20:53 +08:00

201 lines
6.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
批量将选定文件夹及其子文件夹中的所有 .cs 文件转换为 UTF-8无 BOM也支持单文件转换。
使用方式:
1) 无参数直接运行:默认使用路径 E:\WorkSpace\Meowment\Assets\GameMain\DataTables\DecorateCost.txt
2) 命令行传参python convert_cs_to_utf8.py "<目标文件夹路径或单个文件路径>"
- 传入文件夹:会递归处理其中所有 .cs 文件;
- 传入文件:仅处理该文件(不限扩展名)。
说明:
- 会递归处理所有 .cs 文件(当传入的是文件夹时);
- 自动尝试多种常见编码进行解码,最终以 UTF-8无 BOM写回
- 已经是 UTF-8 且无 BOM 的文件会跳过;
- 如原文件为 UTF-8 with BOM会移除 BOM 并写回为 UTF-8无 BOM
"""
from __future__ import annotations
import os
import sys
from typing import Iterable, Tuple
# 为 Windows 提供图形化目录选择(可选)
try:
import tkinter as tk
from tkinter import filedialog, messagebox
_TK_AVAILABLE = True
except Exception:
_TK_AVAILABLE = False
CANDIDATE_ENCODINGS: Tuple[str, ...] = (
# 优先常见的 UTF 系列
"utf-8", "utf-8-sig",
"utf-16", "utf-16-le", "utf-16-be",
# 常见中文/东亚编码
"gb18030", "gbk", "big5", "shift_jis",
# 西文常见编码
"cp1252",
)
# 默认路径:当未提供命令行参数时使用
DEFAULT_TARGET_PATH = r"e:\WorkSpace\MeowMent_New\Assets\GameMain\Scripts\Hotfix\Entity\LevelItem.cs"
def read_bytes(path: str) -> bytes:
with open(path, "rb") as f:
return f.read()
def detect_and_decode(data: bytes) -> Tuple[str, str]:
"""尝试用多种编码解码,返回 (使用的编码, 文本)。失败则抛出 UnicodeDecodeError。
策略:
- 如果有 UTF-8 BOM则优先用 utf-8-sig 解码,能去除 BOM
- 依次尝试候选编码;
- 若均失败,抛出最后一次的异常。
"""
# UTF-8 BOM 检测
if data.startswith(b"\xef\xbb\xbf"):
return "utf-8-sig", data.decode("utf-8-sig")
last_err: Exception | None = None
for enc in CANDIDATE_ENCODINGS:
try:
text = data.decode(enc)
return enc, text
except Exception as e:
last_err = e
continue
# 全部失败
if last_err:
raise last_err # type: ignore[misc]
raise UnicodeDecodeError("unknown", data, 0, 1, "cannot detect encoding")
def is_utf8_without_bom(data: bytes) -> bool:
"""粗略判断二进制是否为 UTF-8 且无 BOM。"""
if data.startswith(b"\xef\xbb\xbf"):
return False
try:
data.decode("utf-8")
return True
except UnicodeDecodeError:
return False
def iter_cs_files(root: str) -> Iterable[str]:
for dirpath, _dirnames, filenames in os.walk(root):
for name in filenames:
if name.lower().endswith(".cs"):
yield os.path.join(dirpath, name)
def convert_file_to_utf8(path: str) -> Tuple[bool, str | None]:
"""将单个文件转换为 UTF-8无 BOM
返回: (是否修改了文件, 错误消息或 None)
"""
try:
raw = read_bytes(path)
# 已是 UTF-8 且无 BOM -> 跳过
if is_utf8_without_bom(raw):
return False, None
# 尝试解码
used_enc, text = detect_and_decode(raw)
# 统一写为 UTF-8无 BOM
with open(path, "w", encoding="utf-8", newline="") as f:
f.write(text)
return True, None
except Exception as e:
return False, str(e)
def choose_directory_gui() -> str | None:
if not _TK_AVAILABLE:
return None
root = tk.Tk()
root.withdraw()
root.update()
path = filedialog.askdirectory(title="选择需要转换的根文件夹")
root.destroy()
if path:
return path
return None
def main(argv: list[str]) -> int:
# 1) 优先取命令行参数2) 否则使用默认路径
target_path = argv[1].strip() if (len(argv) >= 2 and argv[1].strip()) else DEFAULT_TARGET_PATH
# 收集待处理文件列表
files_to_process: list[str] = []
if os.path.isdir(target_path):
# 目录:仅处理 .cs 文件
files_to_process = list(iter_cs_files(target_path))
if not files_to_process:
print(f"目录下未找到 .cs 文件:{target_path}")
elif os.path.isfile(target_path):
# 单个文件:不限扩展名
files_to_process = [target_path]
else:
print(f"提供的路径无效:{target_path}")
return 1
total = 0
changed = 0
skipped = 0
errors = 0
error_list = []
print(f"开始处理: {target_path}")
for file_path in files_to_process:
total += 1
modified, err = convert_file_to_utf8(file_path)
if err:
errors += 1
error_list.append((file_path, err))
print(f"[错误] {file_path}: {err}")
else:
if modified:
changed += 1
print(f"[已转换] {file_path}")
else:
skipped += 1
# 可按需打印print(f"[已是UTF-8] {file_path}")
print("\n处理完成:")
print(f" 总计 .cs 文件: {total}")
print(f" 已转换为 UTF-8无 BOM: {changed}")
print(f" 已跳过(本就是 UTF-8 无 BOM: {skipped}")
print(f" 发生错误: {errors}")
# 如在 GUI 环境,弹窗汇总
# 若通过 GUI 运行且未提供参数,可弹窗汇总(此脚本现在默认有固定路径,弹窗仅在 GUI 手动改造时有效)
if _TK_AVAILABLE and (len(argv) == 1):
try:
msg = (f"总计: {total}\n"
f"转换: {changed}\n"
f"跳过: {skipped}\n"
f"错误: {errors}")
messagebox.showinfo("转换完成", msg)
except Exception:
pass
# 可选:将错误列表打印到结尾便于查看
if error_list:
print("\n错误详情:")
for p, e in error_list:
print(f"- {p}: {e}")
return 0 if errors == 0 else 2
if __name__ == "__main__":
sys.exit(main(sys.argv))