community_report.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. """
  2. 社区报告生成模块
  3. 本模块用于从dump的图谱数据生成社区算法报告
  4. 主要功能:
  5. 1. 生成社区分析报告
  6. 2. 计算社区内部连接密度
  7. 3. 生成可视化分析报告
  8. """
  9. import sys,os
  10. current_path = os.getcwd()
  11. sys.path.append(current_path)
  12. import networkx as nx
  13. import leidenalg
  14. import igraph as ig
  15. #import matplotlib.pyplot as plt
  16. import json
  17. from datetime import datetime
  18. from collections import Counter
  19. #社区报告的分辨率,数字越大,社区数量越少,数字越小,社区数量越多
  20. #RESOLUTION = 0.07
  21. #社区报告中是否包括节点的属性列表
  22. REPORT_INCLUDE_DETAILS = False
  23. # #图谱数据的缓存路径,数据从dump_graph_data.py生成
  24. # CACHED_DATA_PATH = f"{current_path}\\web\\cached_data"
  25. # #最终社区报告的输出路径
  26. REPORT_PATH = f"{current_path}\\web\\cached_data\\report"
  27. DENSITY = 0.52
  28. # def load_entity_data():
  29. # print("load entity data")
  30. # with open(f"{CACHED_DATA_PATH}\\entities_med.json", "r", encoding="utf-8") as f:
  31. # entities = json.load(f)
  32. # return entities
  33. # def load_relation_data(g):
  34. # for i in range(30):
  35. # if os.path.exists(f"{CACHED_DATA_PATH}\\relationship_med_{i}.json"):
  36. # print("load entity data", f"{CACHED_DATA_PATH}\\relationship_med_{i}.json")
  37. # with open(f"{CACHED_DATA_PATH}\\relationship_med_{i}.json", "r", encoding="utf-8") as f:
  38. # relations = json.load(f)
  39. # for item in relations:
  40. # g.add_edge(item[0], item[1], weight=1, **item[2])
  41. # def generate_enterprise_network():
  42. # G = nx.Graph()
  43. # ent_data = load_entity_data()
  44. # print("load entities completed")
  45. # for data in ent_data:
  46. # G.add_node(data[0], **data[1])
  47. # print("load entities into graph completed")
  48. # rel_data = load_relation_data(G)
  49. # print("load relation completed")
  50. # return G
  51. # def detect_communities(G):
  52. # """使用Leiden算法进行社区检测"""
  53. # # 转换networkx图到igraph格式
  54. # print("convert to igraph")
  55. # ig_graph = ig.Graph.from_networkx(G)
  56. # # 执行Leiden算法
  57. # partition = leidenalg.find_partition(
  58. # ig_graph,
  59. # leidenalg.CPMVertexPartition,
  60. # resolution_parameter=RESOLUTION,
  61. # n_iterations=2
  62. # )
  63. # # 将社区标签添加到原始图
  64. # for i, node in enumerate(G.nodes()):
  65. # G.nodes[node]['community'] = partition.membership[i]
  66. # print("convert to igraph finished")
  67. # return G, partition
  68. def generate_report(G, partition):
  69. """
  70. 生成结构化分析报告
  71. 参数:
  72. G: NetworkX图对象,包含节点和边的信息
  73. partition: Leiden算法返回的社区划分结果
  74. 返回:
  75. str: 生成的分析报告内容
  76. """
  77. report = []
  78. # 报告头信息
  79. report.append(f"# 疾病图谱关系社区分析报告\n")
  80. report.append(f"**生成时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
  81. report.append(f"**检测算法**: Leiden Algorithm\n")
  82. report.append(f"**算法参数**:\n")
  83. report.append(f"- 分辨率参数: {partition.resolution_parameter:.3f}\n")
  84. # report.append(f"- 迭代次数: {partition.n_iterations}\n")
  85. report.append(f"**社区数量**: {len(set(partition.membership))}\n")
  86. report.append(f"**模块度(Q)**: {partition.quality():.4f}\n")
  87. print("generate_report header finished")
  88. report.append("\n## 社区结构分析\n")
  89. print("generate_report community structure started")
  90. communities = {}
  91. for node in G.nodes(data=True):
  92. comm = node[1]['community']
  93. if comm not in communities:
  94. communities[comm] = []
  95. if 'type' not in node[1]:
  96. node[1]['type'] = '未知'
  97. if 'description' not in node[1]:
  98. node[1]['description'] = '未见描述'
  99. communities[comm].append({
  100. 'name': node[0],
  101. **node[1]
  102. })
  103. print("generate_report community structure finished")
  104. for comm_id, members in communities.items():
  105. print("community ", comm_id, "size: ", len(members))
  106. com_report = []
  107. com_report.append(f"### 第{comm_id+1}号社区报告 ")
  108. #com_report.append(f"**社区规模**: {len(members)} 个节点\n")
  109. # 行业类型分布
  110. type_dist = Counter([m['type'] for m in members])
  111. com_report.append(f"**类型分布**:")
  112. for industry, count in type_dist.most_common():
  113. com_report.append(f"- {industry}: {count} 个 ({count/len(members):.0%})")
  114. com_report.append("\n**成员节点**:")
  115. member_names = ''
  116. member_count = 0
  117. for member in members:
  118. if member_count < 8:
  119. member_names += member['name'] + '_'
  120. member_count += 1
  121. com_report.append(f"- {member['name']} ({member['type']})")
  122. if REPORT_INCLUDE_DETAILS == False:
  123. continue
  124. for k in member.keys():
  125. if k not in ['name', 'type', 'description', 'community']:
  126. value = member[k]
  127. com_report.append(f"\t- {value}")
  128. com_report.append("\n**成员节点关系**:\n")
  129. for member in members:
  130. entities, relations = graph_helper.neighbor_search(member['name'], 1)
  131. com_report.append(f"- {member['name']} ({member['type']})")
  132. com_report.append(f"\t- 相关节点")
  133. for entity in entities:
  134. com_report.append(f"\t\t- {entity['id']} ({entity['type']})")
  135. com_report.append(f"\t- 相关关系")
  136. for relation in relations:
  137. com_report.append(f"\t\t- {relation['src_name']}-({relation['type']})->{relation['dest_name']}")
  138. # 计算社区内部连接密度
  139. subgraph = G.subgraph([m['name'] for m in members])
  140. density = nx.density(subgraph)
  141. com_report.append(f"\n**内部连接密度**: {density:.2f}\n")
  142. if density < DENSITY:
  143. com_report.append("**社区内部连接相对稀疏**\n")
  144. else:
  145. with open(f"{REPORT_PATH}\社区_{member_names}{comm_id}.md", "w", encoding="utf-8") as f:
  146. f.write("\n".join(com_report))
  147. print(f"社区 {comm_id+1} 报告文件大小:{len(''.join(com_report).encode('utf-8'))} 字节") # 添加文件生成验证
  148. # 可视化图表
  149. report.append("\n## 可视化分析\n")
  150. return "\n".join(report)
  151. if __name__ == "__main__":
  152. try:
  153. from graph_helper import GraphHelper
  154. graph_helper = GraphHelper()
  155. G = graph_helper.graph
  156. print("graph loaded")
  157. # 生成企业关系网络
  158. # 执行社区检测
  159. G, partition = graph_helper.detect_communities()
  160. # 生成分析报告
  161. report = generate_report(G, partition)
  162. with open('community_report.md', 'w', encoding='utf-8') as f:
  163. f.write(report)
  164. print(f"报告文件大小:{len(report.encode('utf-8'))} 字节") # 添加文件生成验证
  165. print("社区分析报告已生成:community_report.md")
  166. except Exception as e:
  167. print(f"运行时错误:{str(e)}")
  168. raise e