neo4j.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. from py2neo import Graph, Node, Relationship, Transaction, RelationshipMatcher, NodeMatcher
  2. from config.site import NEO4J_HOST,NEO4J_PASSWORD,NEO4J_USER
  3. def get_neo4j_db():
  4. # 连接到Neo4j数据库
  5. graph = Graph(NEO4J_HOST, auth=(NEO4J_USER,NEO4J_PASSWORD))
  6. #tx = Transaction(graph)
  7. return graph
  8. def trim(s, c):
  9. if(s==None or s == ''):
  10. return ''
  11. # 左侧空格
  12. while(s[:1] == c ):
  13. s = s[1:]
  14. # 右侧空格
  15. while(s[-1:] == c ):
  16. s = s[:-1]
  17. return s
  18. def trimall(part):
  19. part = trim(part, "\n")
  20. part = trim(part, ".")
  21. part = trim(part, " ")
  22. part = trim(part, "'")
  23. part = trim(part, "`")
  24. part = trim(part, ")")
  25. part = trim(part, "(")
  26. part = trim(part, "'")
  27. part = trim(part, "_")
  28. part = trim(part, "\"")
  29. part = trim(part, "-")
  30. part = trim(part, "[")
  31. part = trim(part, "]")
  32. part = part.replace("`", "")
  33. part = part.replace("\"", "\\\"")
  34. return part
  35. def get_all_entities_of_ent_typ(graph, ent_typ):
  36. matcher = NodeMatcher(graph)
  37. ent_list = list(matcher.match(ent_typ))
  38. ent_list = [ent['name'] for ent in ent_list]
  39. return ent_list
  40. # 三元组插入neo4j
  41. def triples2neo4j(graph, triples, one2many=False, many2one=False): # 允许一对多关系,允许多对一关系
  42. for triple in triples:
  43. # 取出头实体、尾实体、关系
  44. ent_1, ent_2, rel = triple
  45. head, head_typ = ent_1
  46. head_node = Node(head_typ, name=head)
  47. tail, tail_typ = ent_2
  48. tail_node = Node(tail_typ, name=tail)
  49. # head类型list
  50. head_list = get_all_entities_of_ent_typ(graph, head_typ)
  51. # tail类型list
  52. tail_list = get_all_entities_of_ent_typ(graph, tail_typ)
  53. # 头实体和尾实体都存在
  54. if head in head_list and tail in tail_list:
  55. graph.merge(head_node, head_typ, "name")
  56. graph.merge(tail_node, tail_typ, "name")
  57. if list(RelationshipMatcher(graph).match((head_node, tail_node), r_type = rel)):
  58. print(f'三元组 ({head} ,{tail} ,{rel}) 已存在于图谱中,插入失败!')
  59. else:
  60. graph.create(Relationship(head_node, rel, tail_node))
  61. print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
  62. # 头实体已存在
  63. elif head in head_list and tail not in tail_list:
  64. graph.merge(head_node, head_typ, "name")
  65. if list(RelationshipMatcher(graph).match((head_node, None), r_type = rel)):
  66. if one2many == False:
  67. print(f'头实体 {head} 已存在关系 {rel} 对应的三元组 ({head} ,{tail} ,{rel}),插入失败!')
  68. continue
  69. graph.create(tail_node)
  70. graph.create(Relationship(head_node, rel, tail_node))
  71. print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
  72. # 尾实体已存在
  73. elif head not in head_list and tail in tail_list:
  74. graph.merge(tail_node, tail_typ, "name")
  75. if list(RelationshipMatcher(graph).match((None, tail_node), r_type = rel)):
  76. if many2one == False:
  77. print(f'尾实体 {tail} 已存在关系 {rel} 对应的三元组 ({head} ,{tail} ,{rel}),插入失败!')
  78. continue
  79. graph.create(head_node)
  80. graph.create(Relationship(head_node, rel, tail_node))
  81. print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
  82. # 头实体、尾实体均不存在
  83. else:
  84. graph.create(head_node)
  85. graph.create(tail_node)
  86. graph.create(Relationship(head_node, rel, tail_node))
  87. print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
  88. #triples = []
  89. # #(['李沐','Per'], ['CMU', 'Sch'], '毕业于'),
  90. # #(['李沐', 'Per'], ['沐神的小迷弟', 'Per'], '迷弟'),
  91. # (['李沐','Per'], ['中国', 'Cou'], '出生于'),
  92. # (['李沐','Per'], ['亚马逊', 'Com'], '就职于'),
  93. # (['沐神的小迷弟', 'Per'], ['西安交通大学', 'Sch'], '就读于'),
  94. # (['李沐','Per'], ['上海交通大学', 'Sch'], '毕业于'),
  95. # (['李沐','Per'], ['百度', 'Com'], '就职于'),
  96. # ]
  97. # for line in lines:
  98. # line.strip()
  99. # if line.find("[]") >=0:
  100. # continue
  101. # #print(line)
  102. # parts = line.split(",")
  103. # ent1 = trimall(parts[0])
  104. # rela = trimall(parts[1])
  105. # ent2 = trimall(parts[2])
  106. # if ent1 == ent2:
  107. # continue
  108. # if ent1 is None or ent2 is None or rela is None:
  109. # continue
  110. # else:
  111. # triples.append(([ent1, 'Basic'],[ent2, 'Basic'], rela))
  112. # triples2neo4j(graph, triples, one2many=False, many2one=False)
  113. # print("数据插入成功")