123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- from py2neo import Graph, Node, Relationship, Transaction, RelationshipMatcher, NodeMatcher
- from config.site import NEO4J_HOST,NEO4J_PASSWORD,NEO4J_USER
- def get_neo4j_db():
- # 连接到Neo4j数据库
- graph = Graph(NEO4J_HOST, auth=(NEO4J_USER,NEO4J_PASSWORD))
- #tx = Transaction(graph)
- return graph
- def trim(s, c):
- if(s==None or s == ''):
- return ''
- # 左侧空格
- while(s[:1] == c ):
- s = s[1:]
- # 右侧空格
- while(s[-1:] == c ):
- s = s[:-1]
- return s
- def trimall(part):
- part = trim(part, "\n")
- part = trim(part, ".")
- part = trim(part, " ")
- part = trim(part, "'")
- part = trim(part, "`")
- part = trim(part, ")")
- part = trim(part, "(")
- part = trim(part, "'")
- part = trim(part, "_")
- part = trim(part, "\"")
- part = trim(part, "-")
- part = trim(part, "[")
- part = trim(part, "]")
- part = part.replace("`", "")
- part = part.replace("\"", "\\\"")
- return part
- def get_all_entities_of_ent_typ(graph, ent_typ):
- matcher = NodeMatcher(graph)
- ent_list = list(matcher.match(ent_typ))
- ent_list = [ent['name'] for ent in ent_list]
- return ent_list
-
- # 三元组插入neo4j
- def triples2neo4j(graph, triples, one2many=False, many2one=False): # 允许一对多关系,允许多对一关系
- for triple in triples:
- # 取出头实体、尾实体、关系
- ent_1, ent_2, rel = triple
- head, head_typ = ent_1
- head_node = Node(head_typ, name=head)
- tail, tail_typ = ent_2
- tail_node = Node(tail_typ, name=tail)
- # head类型list
- head_list = get_all_entities_of_ent_typ(graph, head_typ)
- # tail类型list
- tail_list = get_all_entities_of_ent_typ(graph, tail_typ)
- # 头实体和尾实体都存在
- if head in head_list and tail in tail_list:
- graph.merge(head_node, head_typ, "name")
- graph.merge(tail_node, tail_typ, "name")
- if list(RelationshipMatcher(graph).match((head_node, tail_node), r_type = rel)):
- print(f'三元组 ({head} ,{tail} ,{rel}) 已存在于图谱中,插入失败!')
- else:
- graph.create(Relationship(head_node, rel, tail_node))
- print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
- # 头实体已存在
- elif head in head_list and tail not in tail_list:
- graph.merge(head_node, head_typ, "name")
- if list(RelationshipMatcher(graph).match((head_node, None), r_type = rel)):
- if one2many == False:
- print(f'头实体 {head} 已存在关系 {rel} 对应的三元组 ({head} ,{tail} ,{rel}),插入失败!')
- continue
- graph.create(tail_node)
- graph.create(Relationship(head_node, rel, tail_node))
- print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
- # 尾实体已存在
- elif head not in head_list and tail in tail_list:
- graph.merge(tail_node, tail_typ, "name")
- if list(RelationshipMatcher(graph).match((None, tail_node), r_type = rel)):
- if many2one == False:
- print(f'尾实体 {tail} 已存在关系 {rel} 对应的三元组 ({head} ,{tail} ,{rel}),插入失败!')
- continue
- graph.create(head_node)
- graph.create(Relationship(head_node, rel, tail_node))
- print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
- # 头实体、尾实体均不存在
- else:
- graph.create(head_node)
- graph.create(tail_node)
- graph.create(Relationship(head_node, rel, tail_node))
- print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
-
- #triples = []
- # #(['李沐','Per'], ['CMU', 'Sch'], '毕业于'),
- # #(['李沐', 'Per'], ['沐神的小迷弟', 'Per'], '迷弟'),
- # (['李沐','Per'], ['中国', 'Cou'], '出生于'),
- # (['李沐','Per'], ['亚马逊', 'Com'], '就职于'),
- # (['沐神的小迷弟', 'Per'], ['西安交通大学', 'Sch'], '就读于'),
- # (['李沐','Per'], ['上海交通大学', 'Sch'], '毕业于'),
- # (['李沐','Per'], ['百度', 'Com'], '就职于'),
- # ]
- # for line in lines:
- # line.strip()
- # if line.find("[]") >=0:
- # continue
- # #print(line)
- # parts = line.split(",")
- # ent1 = trimall(parts[0])
- # rela = trimall(parts[1])
- # ent2 = trimall(parts[2])
- # if ent1 == ent2:
- # continue
- # if ent1 is None or ent2 is None or rela is None:
- # continue
- # else:
- # triples.append(([ent1, 'Basic'],[ent2, 'Basic'], rela))
-
- # triples2neo4j(graph, triples, one2many=False, many2one=False)
- # print("数据插入成功")
|