import json import jsonlines def gen_det_label(input_label, out_label): with open(out_label, 'w', encoding='UTF-8') as out_file: with jsonlines.open(input_label, 'r') as rfd: for data in rfd: img_path = data['filename'] label = [] for cell in data['html']['cells']: a = '' for text in cell['tokens']: a += text result = {"transcription": str(a), "points": cell['bbox'][0]} label.append(result) # print(label) out_file.write(img_path + '\t' + json.dumps(label, ensure_ascii=False) + '\n')