pet_home_server/tool/cfg/tool_cfg.py
2024-11-21 19:36:12 +08:00

53 lines
1.9 KiB
Python

import os
import json
import openpyxl
current_dir = os.getcwd()
# 读取配置文件
with open(os.path.join(current_dir, 'tool/cfg/cfg_xlsx.json'), 'r', encoding='utf-8') as f:
cfg = json.load(f)
file_list = cfg['file_list']
target_dir = cfg['target_dir']
source_dir = cfg['source_dir']
fields_to_remove = cfg.get('fields_to_remove', [])
# 确保目标目录存在
os.makedirs(target_dir, exist_ok=True)
# 遍历文件列表并转换文件
for file_cfg in file_list:
source_file_path = os.path.join(current_dir, source_dir, file_cfg["in_file"])
target_file_path = os.path.join(current_dir, target_dir, file_cfg["out_file"])
sheet_name = file_cfg["sheet_name"]
# 读取XLSX文件
workbook = openpyxl.load_workbook(source_file_path)
sheet = workbook[sheet_name] if sheet_name else workbook.active
# 获取字段名
fieldnames = [cell.value for cell in sheet[1]]
# 读取数据并去掉第一行
rows = list(sheet.iter_rows(values_only=True))[2:]
# 以第一列的值作为索引,并移除第一列
indexed_data = {}
for row in rows:
row_dict = {fieldnames[i]: row[i] for i in range(len(fieldnames))}
index = row_dict.pop(fieldnames[0]) # 获取第一列的值作为索引并移除第一列
# 仅保留需要的字段
row_dict = {field: row_dict[field] for field in file_cfg['fields'] if field in row_dict}
for key, value in row_dict.items():
try:
row_dict[key] = json.loads(value)
except (json.JSONDecodeError, TypeError):
pass # 如果解析失败,则保持原值
indexed_data[index] = row_dict
# 将JSON写入目标文件
with open(target_file_path, 'w', encoding='utf-8') as json_file:
json.dump(indexed_data, json_file, ensure_ascii=False, indent=4)
print(f"Converted: {source_file_path} to {target_file_path}")