201 lines
6.1 KiB
Python
201 lines
6.1 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
批量将选定文件夹及其子文件夹中的所有 .cs 文件转换为 UTF-8(无 BOM),也支持单文件转换。
|
||
|
||
使用方式:
|
||
1) 无参数直接运行:默认使用路径 E:\WorkSpace\Meowment\Assets\GameMain\DataTables\DecorateCost.txt;
|
||
2) 命令行传参:python convert_cs_to_utf8.py "<目标文件夹路径或单个文件路径>"
|
||
- 传入文件夹:会递归处理其中所有 .cs 文件;
|
||
- 传入文件:仅处理该文件(不限扩展名)。
|
||
|
||
说明:
|
||
- 会递归处理所有 .cs 文件(当传入的是文件夹时);
|
||
- 自动尝试多种常见编码进行解码,最终以 UTF-8(无 BOM)写回;
|
||
- 已经是 UTF-8 且无 BOM 的文件会跳过;
|
||
- 如原文件为 UTF-8 with BOM,会移除 BOM 并写回为 UTF-8(无 BOM)。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
import os
|
||
import sys
|
||
from typing import Iterable, Tuple
|
||
|
||
# 为 Windows 提供图形化目录选择(可选)
|
||
try:
|
||
import tkinter as tk
|
||
from tkinter import filedialog, messagebox
|
||
_TK_AVAILABLE = True
|
||
except Exception:
|
||
_TK_AVAILABLE = False
|
||
|
||
|
||
CANDIDATE_ENCODINGS: Tuple[str, ...] = (
|
||
# 优先常见的 UTF 系列
|
||
"utf-8", "utf-8-sig",
|
||
"utf-16", "utf-16-le", "utf-16-be",
|
||
# 常见中文/东亚编码
|
||
"gb18030", "gbk", "big5", "shift_jis",
|
||
# 西文常见编码
|
||
"cp1252",
|
||
)
|
||
|
||
# 默认路径:当未提供命令行参数时使用
|
||
DEFAULT_TARGET_PATH = r"e:\WorkSpace\MeowMent_New\Assets\GameMain\Scripts\Hotfix\Entity\LevelItem.cs"
|
||
|
||
|
||
def read_bytes(path: str) -> bytes:
|
||
with open(path, "rb") as f:
|
||
return f.read()
|
||
|
||
|
||
def detect_and_decode(data: bytes) -> Tuple[str, str]:
|
||
"""尝试用多种编码解码,返回 (使用的编码, 文本)。失败则抛出 UnicodeDecodeError。
|
||
|
||
策略:
|
||
- 如果有 UTF-8 BOM,则优先用 utf-8-sig 解码,能去除 BOM;
|
||
- 依次尝试候选编码;
|
||
- 若均失败,抛出最后一次的异常。
|
||
"""
|
||
# UTF-8 BOM 检测
|
||
if data.startswith(b"\xef\xbb\xbf"):
|
||
return "utf-8-sig", data.decode("utf-8-sig")
|
||
|
||
last_err: Exception | None = None
|
||
for enc in CANDIDATE_ENCODINGS:
|
||
try:
|
||
text = data.decode(enc)
|
||
return enc, text
|
||
except Exception as e:
|
||
last_err = e
|
||
continue
|
||
# 全部失败
|
||
if last_err:
|
||
raise last_err # type: ignore[misc]
|
||
raise UnicodeDecodeError("unknown", data, 0, 1, "cannot detect encoding")
|
||
|
||
|
||
def is_utf8_without_bom(data: bytes) -> bool:
|
||
"""粗略判断二进制是否为 UTF-8 且无 BOM。"""
|
||
if data.startswith(b"\xef\xbb\xbf"):
|
||
return False
|
||
try:
|
||
data.decode("utf-8")
|
||
return True
|
||
except UnicodeDecodeError:
|
||
return False
|
||
|
||
|
||
def iter_cs_files(root: str) -> Iterable[str]:
|
||
for dirpath, _dirnames, filenames in os.walk(root):
|
||
for name in filenames:
|
||
if name.lower().endswith(".cs"):
|
||
yield os.path.join(dirpath, name)
|
||
|
||
|
||
def convert_file_to_utf8(path: str) -> Tuple[bool, str | None]:
|
||
"""将单个文件转换为 UTF-8(无 BOM)。
|
||
|
||
返回: (是否修改了文件, 错误消息或 None)
|
||
"""
|
||
try:
|
||
raw = read_bytes(path)
|
||
|
||
# 已是 UTF-8 且无 BOM -> 跳过
|
||
if is_utf8_without_bom(raw):
|
||
return False, None
|
||
|
||
# 尝试解码
|
||
used_enc, text = detect_and_decode(raw)
|
||
|
||
# 统一写为 UTF-8(无 BOM)
|
||
with open(path, "w", encoding="utf-8", newline="") as f:
|
||
f.write(text)
|
||
return True, None
|
||
except Exception as e:
|
||
return False, str(e)
|
||
|
||
|
||
def choose_directory_gui() -> str | None:
|
||
if not _TK_AVAILABLE:
|
||
return None
|
||
root = tk.Tk()
|
||
root.withdraw()
|
||
root.update()
|
||
path = filedialog.askdirectory(title="选择需要转换的根文件夹")
|
||
root.destroy()
|
||
if path:
|
||
return path
|
||
return None
|
||
|
||
|
||
def main(argv: list[str]) -> int:
|
||
# 1) 优先取命令行参数;2) 否则使用默认路径
|
||
target_path = argv[1].strip() if (len(argv) >= 2 and argv[1].strip()) else DEFAULT_TARGET_PATH
|
||
|
||
# 收集待处理文件列表
|
||
files_to_process: list[str] = []
|
||
if os.path.isdir(target_path):
|
||
# 目录:仅处理 .cs 文件
|
||
files_to_process = list(iter_cs_files(target_path))
|
||
if not files_to_process:
|
||
print(f"目录下未找到 .cs 文件:{target_path}")
|
||
elif os.path.isfile(target_path):
|
||
# 单个文件:不限扩展名
|
||
files_to_process = [target_path]
|
||
else:
|
||
print(f"提供的路径无效:{target_path}")
|
||
return 1
|
||
|
||
total = 0
|
||
changed = 0
|
||
skipped = 0
|
||
errors = 0
|
||
error_list = []
|
||
|
||
print(f"开始处理: {target_path}")
|
||
for file_path in files_to_process:
|
||
total += 1
|
||
modified, err = convert_file_to_utf8(file_path)
|
||
if err:
|
||
errors += 1
|
||
error_list.append((file_path, err))
|
||
print(f"[错误] {file_path}: {err}")
|
||
else:
|
||
if modified:
|
||
changed += 1
|
||
print(f"[已转换] {file_path}")
|
||
else:
|
||
skipped += 1
|
||
# 可按需打印:print(f"[已是UTF-8] {file_path}")
|
||
|
||
print("\n处理完成:")
|
||
print(f" 总计 .cs 文件: {total}")
|
||
print(f" 已转换为 UTF-8(无 BOM): {changed}")
|
||
print(f" 已跳过(本就是 UTF-8 无 BOM): {skipped}")
|
||
print(f" 发生错误: {errors}")
|
||
|
||
# 如在 GUI 环境,弹窗汇总
|
||
# 若通过 GUI 运行且未提供参数,可弹窗汇总(此脚本现在默认有固定路径,弹窗仅在 GUI 手动改造时有效)
|
||
if _TK_AVAILABLE and (len(argv) == 1):
|
||
try:
|
||
msg = (f"总计: {total}\n"
|
||
f"转换: {changed}\n"
|
||
f"跳过: {skipped}\n"
|
||
f"错误: {errors}")
|
||
messagebox.showinfo("转换完成", msg)
|
||
except Exception:
|
||
pass
|
||
|
||
# 可选:将错误列表打印到结尾便于查看
|
||
if error_list:
|
||
print("\n错误详情:")
|
||
for p, e in error_list:
|
||
print(f"- {p}: {e}")
|
||
|
||
return 0 if errors == 0 else 2
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main(sys.argv))
|