import os
import json
import openpyxl

current_dir = os.getcwd()
# 读取配置文件
with open(os.path.join(current_dir, 'tool/cfg/cfg_xlsx.json'), 'r', encoding='utf-8') as f:
    cfg = json.load(f)

file_list = cfg['file_list']
target_dir = cfg['target_dir']
source_dir = cfg['source_dir']
fields_to_remove = cfg.get('fields_to_remove', [])

# 确保目标目录存在
os.makedirs(target_dir, exist_ok=True)

# 遍历文件列表并转换文件
for file_cfg in file_list:
    source_file_path = os.path.join(current_dir, source_dir, file_cfg["in_file"])
    target_file_path = os.path.join(current_dir, target_dir, file_cfg["out_file"])
    sheet_name = file_cfg["sheet_name"]

    # 读取XLSX文件
    workbook = openpyxl.load_workbook(source_file_path)
    sheet = workbook[sheet_name] if sheet_name else workbook.active
    
    # 获取字段名
    fieldnames = [cell.value for cell in sheet[1]]
    
    # 读取数据并去掉第一行
    rows = list(sheet.iter_rows(values_only=True))[2:]
    
    # 以第一列的值作为索引，并移除第一列
    indexed_data = {}
    for row in rows:
        row_dict = {fieldnames[i]: row[i] for i in range(len(fieldnames))}
        index = row_dict.pop(file_cfg["key"])  # 获取第一列的值作为索引并移除第一列
        
        # 仅保留需要的字段
        row_dict = {field: row_dict[field] for field in file_cfg['fields'] if field in row_dict}
        for key, value in row_dict.items():
            try:
                row_dict[key] = json.loads(value)
            except (json.JSONDecodeError, TypeError):
                pass  # 如果解析失败，则保持原值
        indexed_data[index] = row_dict
    
    # 将JSON写入目标文件
    with open(target_file_path, 'w', encoding='utf-8') as json_file:
        json.dump(indexed_data, json_file, ensure_ascii=False, indent=4)
    
    print(f"Converted: {source_file_path} to {target_file_path}")