53 lines
1.9 KiB
Python
53 lines
1.9 KiB
Python
import os
|
|
import json
|
|
import openpyxl
|
|
|
|
current_dir = os.getcwd()
|
|
# 读取配置文件
|
|
with open(os.path.join(current_dir, 'tool/cfg/cfg_xlsx.json'), 'r', encoding='utf-8-sig') as f:
|
|
cfg = json.load(f)
|
|
|
|
file_list = cfg['file_list']
|
|
target_dir = cfg['target_dir']
|
|
source_dir = cfg['source_dir']
|
|
fields_to_remove = cfg.get('fields_to_remove', [])
|
|
|
|
# 确保目标目录存在
|
|
os.makedirs(target_dir, exist_ok=True)
|
|
|
|
# 遍历文件列表并转换文件
|
|
for file_cfg in file_list:
|
|
source_file_path = os.path.join(current_dir, source_dir, file_cfg["in_file"])
|
|
target_file_path = os.path.join(current_dir, target_dir, file_cfg["out_file"])
|
|
sheet_name = file_cfg["sheet_name"]
|
|
|
|
# 读取XLSX文件
|
|
workbook = openpyxl.load_workbook(source_file_path)
|
|
sheet = workbook[sheet_name] if sheet_name else workbook.active
|
|
|
|
# 获取字段名
|
|
fieldnames = [cell.value for cell in sheet[1]]
|
|
|
|
# 读取数据并去掉第一行
|
|
rows = list(sheet.iter_rows(values_only=True))[2:]
|
|
|
|
# 以第一列的值作为索引,并移除第一列
|
|
indexed_data = {}
|
|
for row in rows:
|
|
row_dict = {fieldnames[i]: row[i] for i in range(len(fieldnames))}
|
|
index = row_dict.pop(fieldnames[0]) # 获取第一列的值作为索引并移除第一列
|
|
|
|
# 仅保留需要的字段
|
|
row_dict = {field: row_dict[field] for field in file_cfg['fields'] if field in row_dict}
|
|
for key, value in row_dict.items():
|
|
try:
|
|
row_dict[key] = json.loads(value)
|
|
except (json.JSONDecodeError, TypeError):
|
|
pass # 如果解析失败,则保持原值
|
|
indexed_data[index] = row_dict
|
|
|
|
# 将JSON写入目标文件
|
|
with open(target_file_path, 'w', encoding='utf-8-sig') as json_file:
|
|
json.dump(indexed_data, json_file, ensure_ascii=False, indent=4)
|
|
|
|
print(f"Converted: {source_file_path} to {target_file_path}") |