import os import json import openpyxl current_dir = os.getcwd() # 读取配置文件 with open(os.path.join(current_dir, 'tool/cfg/cfg_xlsx.json'), 'r', encoding='utf-8') as f: cfg = json.load(f) file_list = cfg['file_list'] target_dir = cfg['target_dir'] source_dir = cfg['source_dir'] fields_to_remove = cfg.get('fields_to_remove', []) # 确保目标目录存在 os.makedirs(target_dir, exist_ok=True) # 遍历文件列表并转换文件 for file_cfg in file_list: source_file_path = os.path.join(current_dir, source_dir, file_cfg["in_file"]) target_file_path = os.path.join(current_dir, target_dir, file_cfg["out_file"]) sheet_name = file_cfg["sheet_name"] # 读取XLSX文件 workbook = openpyxl.load_workbook(source_file_path) sheet = workbook[sheet_name] if sheet_name else workbook.active # 获取字段名 fieldnames = [cell.value for cell in sheet[1]] # 读取数据并去掉第一行 rows = list(sheet.iter_rows(values_only=True))[2:] # 以第一列的值作为索引,并移除第一列 indexed_data = {} for row in rows: row_dict = {fieldnames[i]: row[i] for i in range(len(fieldnames))} index = row_dict.pop(file_cfg["key"]) # 获取第一列的值作为索引并移除第一列 # 仅保留需要的字段 row_dict = {field: row_dict[field] for field in file_cfg['fields'] if field in row_dict} for key, value in row_dict.items(): try: row_dict[key] = json.loads(value) except (json.JSONDecodeError, TypeError): pass # 如果解析失败,则保持原值 indexed_data[index] = row_dict # 将JSON写入目标文件 with open(target_file_path, 'w', encoding='utf-8') as json_file: json.dump(indexed_data, json_file, ensure_ascii=False, indent=4) print(f"Converted: {source_file_path} to {target_file_path}")