excel_importer.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. import pandas as pd
  2. from sqlalchemy.orm import Session
  3. from service.kg_node_service import KGNodeService
  4. from service.kg_prop_service import KGPropService
  5. import logging
  6. from utils.vectorizer import Vectorizer
  7. logger = logging.getLogger(__name__)
  8. class ExcelImporter:
  9. def __init__(self, db: Session):
  10. self.node_service = KGNodeService(db)
  11. self.prop_service = KGPropService(db)
  12. def import_from_excel(self, file_path: str, category: str, prop_name: str):
  13. try:
  14. # 读取Excel文件
  15. df = pd.read_excel(file_path, header=None)
  16. # 遍历每一行数据
  17. for _, row in df.iterrows():
  18. entity_name = str(row[0]).strip()
  19. prop_value = str(row[1]).strip() if len(row) > 1 else ''
  20. if not entity_name:
  21. continue
  22. # 检查节点是否存在
  23. node = self.node_service.get_node_by_name_category(entity_name, category)
  24. if not node:
  25. # 创建新节点
  26. node_data = {
  27. 'name': entity_name,
  28. 'category': category,
  29. 'version': 'xysy',
  30. 'embedding': Vectorizer.get_embedding(entity_name),
  31. 'status': 0
  32. }
  33. node = self.node_service.create_node(node_data)
  34. # 创建属性
  35. if prop_value:
  36. node_id = node['id'] if isinstance(node, dict) else node.id
  37. prop = self.prop_service.get_prop_by_ref_id(node_id, prop_name)
  38. if not prop:
  39. prop_data = {
  40. 'ref_id': node_id,
  41. 'category': 1,
  42. 'prop_name': prop_name,
  43. 'prop_value': prop_value,
  44. 'type': 1
  45. }
  46. self.prop_service.create_prop(prop_data)
  47. return True
  48. except Exception as e:
  49. logger.error(f"导入Excel数据失败: {str(e)}")
  50. raise ValueError(f"导入失败: {str(e)}")
  51. if __name__ == "__main__":
  52. file_path = "C:\\Users\\17664\\Desktop\\入院主诊断-诊疗计划.xlsx"
  53. category = "疾病"
  54. prop_name = "intramural_treatment_plan"
  55. from db.session import get_db
  56. db = next(get_db())
  57. importer = ExcelImporter(db)
  58. importer.import_from_excel(file_path, category, prop_name)