from libs.text_processor import TextProcessor from elasticsearch import Elasticsearch from typing import List, Dict import json from utils.es import ElasticsearchOperations def clean_law_document(ops): json_data = None with open(r"D:\work\03\regulations.json","r",encoding="utf-8") as f: lines = f.readlines() json_data = json.loads(''.join(lines)) print(">>> finished process document ") if json_data: processor = TextProcessor() index = 1 total = len(json_data) for item in json_data: es_ops.del_document(item["article_text"],item["meta_data"]['ArticleTitle']) print(item["meta_data"]['ArticleTitle'],f"processed {index}/{total}") index = index + 1 # 使用示例 if __name__ == "__main__": es_ops = ElasticsearchOperations() es_ops.es.indices.delete(index="text_chunks", ignore=[400, 404]) print(">>> finished delete index") #clean_law_document(es_ops)