community_report.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. '''
  2. 这个脚本是用来从dump的图谱数据生成社区算法的
  3. '''
  4. import sys,os
  5. current_path = os.getcwd()
  6. sys.path.append(current_path)
  7. import networkx as nx
  8. import leidenalg
  9. import igraph as ig
  10. #import matplotlib.pyplot as plt
  11. import json
  12. from datetime import datetime
  13. from collections import Counter
  14. #社区报告的分辨率,数字越大,社区数量越少,数字越小,社区数量越多
  15. #RESOLUTION = 0.07
  16. #社区报告中是否包括节点的属性列表
  17. REPORT_INCLUDE_DETAILS = False
  18. # #图谱数据的缓存路径,数据从dump_graph_data.py生成
  19. # CACHED_DATA_PATH = f"{current_path}\\web\\cached_data"
  20. # #最终社区报告的输出路径
  21. REPORT_PATH = f"{current_path}\\web\\cached_data\\report"
  22. DENSITY = 0.52
  23. # def load_entity_data():
  24. # print("load entity data")
  25. # with open(f"{CACHED_DATA_PATH}\\entities_med.json", "r", encoding="utf-8") as f:
  26. # entities = json.load(f)
  27. # return entities
  28. # def load_relation_data(g):
  29. # for i in range(30):
  30. # if os.path.exists(f"{CACHED_DATA_PATH}\\relationship_med_{i}.json"):
  31. # print("load entity data", f"{CACHED_DATA_PATH}\\relationship_med_{i}.json")
  32. # with open(f"{CACHED_DATA_PATH}\\relationship_med_{i}.json", "r", encoding="utf-8") as f:
  33. # relations = json.load(f)
  34. # for item in relations:
  35. # g.add_edge(item[0], item[1], weight=1, **item[2])
  36. # def generate_enterprise_network():
  37. # G = nx.Graph()
  38. # ent_data = load_entity_data()
  39. # print("load entities completed")
  40. # for data in ent_data:
  41. # G.add_node(data[0], **data[1])
  42. # print("load entities into graph completed")
  43. # rel_data = load_relation_data(G)
  44. # print("load relation completed")
  45. # return G
  46. # def detect_communities(G):
  47. # """使用Leiden算法进行社区检测"""
  48. # # 转换networkx图到igraph格式
  49. # print("convert to igraph")
  50. # ig_graph = ig.Graph.from_networkx(G)
  51. # # 执行Leiden算法
  52. # partition = leidenalg.find_partition(
  53. # ig_graph,
  54. # leidenalg.CPMVertexPartition,
  55. # resolution_parameter=RESOLUTION,
  56. # n_iterations=2
  57. # )
  58. # # 将社区标签添加到原始图
  59. # for i, node in enumerate(G.nodes()):
  60. # G.nodes[node]['community'] = partition.membership[i]
  61. # print("convert to igraph finished")
  62. # return G, partition
  63. def generate_report(G, partition):
  64. """生成结构化分析报告"""
  65. report = []
  66. # 报告头信息
  67. report.append(f"# 疾病图谱关系社区分析报告\n")
  68. report.append(f"**生成时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
  69. report.append(f"**检测算法**: Leiden Algorithm\n")
  70. report.append(f"**算法参数**:\n")
  71. report.append(f"- 分辨率参数: {partition.resolution_parameter:.3f}\n")
  72. # report.append(f"- 迭代次数: {partition.n_iterations}\n")
  73. report.append(f"**社区数量**: {len(set(partition.membership))}\n")
  74. report.append(f"**模块度(Q)**: {partition.quality():.4f}\n")
  75. print("generate_report header finished")
  76. report.append("\n## 社区结构分析\n")
  77. print("generate_report community structure started")
  78. communities = {}
  79. for node in G.nodes(data=True):
  80. comm = node[1]['community']
  81. if comm not in communities:
  82. communities[comm] = []
  83. if 'type' not in node[1]:
  84. node[1]['type'] = '未知'
  85. if 'description' not in node[1]:
  86. node[1]['description'] = '未见描述'
  87. communities[comm].append({
  88. 'name': node[0],
  89. **node[1]
  90. })
  91. print("generate_report community structure finished")
  92. for comm_id, members in communities.items():
  93. print("community ", comm_id, "size: ", len(members))
  94. com_report = []
  95. com_report.append(f"### 第{comm_id+1}号社区报告 ")
  96. #com_report.append(f"**社区规模**: {len(members)} 个节点\n")
  97. # 行业类型分布
  98. type_dist = Counter([m['type'] for m in members])
  99. com_report.append(f"**类型分布**:")
  100. for industry, count in type_dist.most_common():
  101. com_report.append(f"- {industry}: {count} 个 ({count/len(members):.0%})")
  102. com_report.append("\n```json")
  103. obj_list = []
  104. names = []
  105. for member in members:
  106. obj = {}
  107. obj["name"] = member['name']
  108. obj["type"] = member['type']
  109. obj_list.append(obj)
  110. #com_report.append(f"'name':'{member['name']}','type':'{member['type']}'")
  111. names.append(member['name'])
  112. if REPORT_INCLUDE_DETAILS == False:
  113. continue
  114. for k in member.keys():
  115. if k not in ['name', 'type', 'description', 'community']:
  116. value = member[k]
  117. com_report.append(f"\t- {value}")
  118. com_report.append(json.dumps(obj_list, ensure_ascii=False, indent=4))
  119. com_report.append("```")
  120. com_report.append("\n**成员节点关系**:\n")
  121. for member in members:
  122. entities, relations = graph_helper.neighbor_search(member['name'], 1)
  123. com_report.append(f"- {member['name']} ({member['type']})")
  124. com_report.append(f"\t- 相关节点")
  125. for entity in entities:
  126. if entity['name'] in names:
  127. com_report.append(f"\t\t- {entity['name']} ({entity['type']})")
  128. com_report.append(f"\t- 相关关系")
  129. for relation in relations:
  130. if relation['src_name'] in names or relation['dest_name'] in names:
  131. com_report.append(f"\t\t- {relation['src_name']}-({relation['type']})->{relation['dest_name']}")
  132. # 计算社区内部连接密度
  133. subgraph = G.subgraph([m['name'] for m in members])
  134. density = nx.density(subgraph)
  135. com_report.append(f"\n**内部连接密度**: {density:.2f}\n")
  136. if density < DENSITY:
  137. com_report.append("**社区内部连接相对稀疏**\n")
  138. else:
  139. with open(f"{REPORT_PATH}\community_{comm_id}.md", "w", encoding="utf-8") as f:
  140. f.write("\n".join(com_report))
  141. print(f"社区 {comm_id+1} 报告文件大小:{len(''.join(com_report).encode('utf-8'))} 字节") # 添加文件生成验证
  142. # 可视化图表
  143. report.append("\n## 可视化分析\n")
  144. return "\n".join(report)
  145. if __name__ == "__main__":
  146. try:
  147. from libs.graph_helper import GraphHelper
  148. graph_helper = GraphHelper()
  149. G = graph_helper.graph
  150. print("graph loaded")
  151. # 生成企业关系网络
  152. # 执行社区检测
  153. G, partition = graph_helper.detect_communities()
  154. # 生成分析报告
  155. report = generate_report(G, partition)
  156. with open('community_report.md', 'w', encoding='utf-8') as f:
  157. f.write(report)
  158. print(f"报告文件大小:{len(report.encode('utf-8'))} 字节") # 添加文件生成验证
  159. print("社区分析报告已生成:community_report.md")
  160. except Exception as e:
  161. print(f"运行时错误:{str(e)}")
  162. raise e