123456789101112131415161718 |
- import json
- import jsonlines
- def gen_det_label(input_label, out_label):
- with open(out_label, 'w', encoding='UTF-8') as out_file:
- with jsonlines.open(input_label, 'r') as rfd:
- for data in rfd:
- img_path = data['filename']
- label = []
- for cell in data['html']['cells']:
- a = ''
- for text in cell['tokens']:
- a += text
- result = {"transcription": str(a), "points": cell['bbox'][0]}
- label.append(result)
- # print(label)
- out_file.write(img_path + '\t' + json.dumps(label, ensure_ascii=False) + '\n')
|