12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- import pandas as pd
- from sqlalchemy.orm import Session
- from service.kg_node_service import KGNodeService
- from service.kg_prop_service import KGPropService
- import logging
- from utils.vectorizer import Vectorizer
- logger = logging.getLogger(__name__)
- class ExcelImporter:
- def __init__(self, db: Session):
- self.node_service = KGNodeService(db)
- self.prop_service = KGPropService(db)
-
- def import_from_excel(self, file_path: str, category: str, prop_name: str):
- try:
- # 读取Excel文件
- df = pd.read_excel(file_path, header=None)
-
- # 遍历每一行数据
- for _, row in df.iterrows():
- entity_name = str(row[0]).strip()
- prop_value = str(row[1]).strip() if len(row) > 1 else ''
-
- if not entity_name:
- continue
-
- # 检查节点是否存在
- node = self.node_service.get_node_by_name_category(entity_name, category)
-
- if not node:
- # 创建新节点
- node_data = {
- 'name': entity_name,
- 'category': category,
- 'version': 'xysy',
- 'embedding': Vectorizer.get_embedding(entity_name),
- 'status': 0
- }
- node = self.node_service.create_node(node_data)
-
- # 创建属性
- if prop_value:
- node_id = node['id'] if isinstance(node, dict) else node.id
- prop = self.prop_service.get_prop_by_ref_id(node_id, prop_name)
- if not prop:
- prop_data = {
- 'ref_id': node_id,
- 'category': 1,
- 'prop_name': prop_name,
- 'prop_value': prop_value,
- 'type': 1
- }
- self.prop_service.create_prop(prop_data)
-
- return True
- except Exception as e:
- logger.error(f"导入Excel数据失败: {str(e)}")
- raise ValueError(f"导入失败: {str(e)}")
- if __name__ == "__main__":
- file_path = "C:\\Users\\17664\\Desktop\\入院主诊断-诊疗计划.xlsx"
- category = "疾病"
- prop_name = "intramural_treatment_plan"
- from db.session import get_db
- db = next(get_db())
- importer = ExcelImporter(db)
- importer.import_from_excel(file_path, category, prop_name)
|