community_report.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. """
  2. 社区报告生成模块
  3. 本模块用于从dump的图谱数据生成社区算法报告
  4. 主要功能:
  5. 1. 生成社区分析报告
  6. 2. 计算社区内部连接密度
  7. 3. 生成可视化分析报告
  8. """
  9. import sys,os
  10. current_path = os.getcwd()
  11. sys.path.append(current_path)
  12. import networkx as nx
  13. import leidenalg
  14. import igraph as ig
  15. #import matplotlib.pyplot as plt
  16. import json
  17. from datetime import datetime
  18. from collections import Counter
  19. #社区报告的分辨率,数字越大,社区数量越少,数字越小,社区数量越多
  20. #RESOLUTION = 0.07
  21. #社区报告中是否包括节点的属性列表
  22. REPORT_INCLUDE_DETAILS = False
  23. # #图谱数据的缓存路径,数据从dump_graph_data.py生成
  24. # CACHED_DATA_PATH = f"{current_path}\\web\\cached_data"
  25. # #最终社区报告的输出路径
  26. REPORT_PATH = f"{current_path}\\web\\cached_data\\report"
  27. DENSITY = 0.52
  28. # def load_entity_data():
  29. # print("load entity data")
  30. # with open(f"{CACHED_DATA_PATH}\\entities_med.json", "r", encoding="utf-8") as f:
  31. # entities = json.load(f)
  32. # return entities
  33. # def load_relation_data(g):
  34. # for i in range(30):
  35. # if os.path.exists(f"{CACHED_DATA_PATH}\\relationship_med_{i}.json"):
  36. # print("load entity data", f"{CACHED_DATA_PATH}\\relationship_med_{i}.json")
  37. # with open(f"{CACHED_DATA_PATH}\\relationship_med_{i}.json", "r", encoding="utf-8") as f:
  38. # relations = json.load(f)
  39. # for item in relations:
  40. # g.add_edge(item[0], item[1], weight=1, **item[2])
  41. # def generate_enterprise_network():
  42. # G = nx.Graph()
  43. # ent_data = load_entity_data()
  44. # print("load entities completed")
  45. # for data in ent_data:
  46. # G.add_node(data[0], **data[1])
  47. # print("load entities into graph completed")
  48. # rel_data = load_relation_data(G)
  49. # print("load relation completed")
  50. # return G
  51. # def detect_communities(G):
  52. # """使用Leiden算法进行社区检测"""
  53. # # 转换networkx图到igraph格式
  54. # print("convert to igraph")
  55. # ig_graph = ig.Graph.from_networkx(G)
  56. # # 执行Leiden算法
  57. # partition = leidenalg.find_partition(
  58. # ig_graph,
  59. # leidenalg.CPMVertexPartition,
  60. # resolution_parameter=RESOLUTION,
  61. # n_iterations=2
  62. # )
  63. # # 将社区标签添加到原始图
  64. # for i, node in enumerate(G.nodes()):
  65. # G.nodes[node]['community'] = partition.membership[i]
  66. # print("convert to igraph finished")
  67. # return G, partition
  68. def generate_report(G, partition):
  69. """
  70. 生成结构化分析报告
  71. 参数:
  72. G: NetworkX图对象,包含节点和边的信息
  73. partition: Leiden算法返回的社区划分结果
  74. 返回:
  75. str: 生成的分析报告内容
  76. """
  77. report = []
  78. # 报告头信息
  79. report.append(f"# 疾病图谱关系社区分析报告\n")
  80. report.append(f"**生成时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
  81. report.append(f"**检测算法**: Leiden Algorithm\n")
  82. report.append(f"**算法参数**:\n")
  83. report.append(f"- 分辨率参数: {partition.resolution_parameter:.3f}\n")
  84. # report.append(f"- 迭代次数: {partition.n_iterations}\n")
  85. report.append(f"**社区数量**: {len(set(partition.membership))}\n")
  86. report.append(f"**模块度(Q)**: {partition.quality():.4f}\n")
  87. print("generate_report header finished")
  88. report.append("\n## 社区结构分析\n")
  89. print("generate_report community structure started")
  90. communities = {}
  91. for node in G.nodes(data=True):
  92. comm = node[1]['community']
  93. if comm not in communities:
  94. communities[comm] = []
  95. if 'type' not in node[1]:
  96. node[1]['type'] = '未知'
  97. if 'description' not in node[1]:
  98. node[1]['description'] = '未见描述'
  99. communities[comm].append({
  100. 'name': node[0],
  101. **node[1]
  102. })
  103. print("generate_report community structure finished")
  104. for comm_id, members in communities.items():
  105. print("community ", comm_id, "size: ", len(members))
  106. com_report = []
  107. com_report.append(f"### 第{comm_id+1}号社区报告 ")
  108. #com_report.append(f"**社区规模**: {len(members)} 个节点\n")
  109. # 行业类型分布
  110. type_dist = Counter([m['type'] for m in members])
  111. com_report.append(f"**类型分布**:")
  112. for industry, count in type_dist.most_common():
  113. com_report.append(f"- {industry}: {count} 个 ({count/len(members):.0%})")
  114. com_report.append("\n**成员节点**:")
  115. member_names = ''
  116. member_count = 0
  117. for member in members:
  118. if member_count < 8:
  119. #member['name']如果有会导致文件名报错的字符,需要去除
  120. member_name = member['name'].replace('\\', '').replace('/', '').replace(':', '').replace('*', '').replace('?', '').replace('"', '').replace('<', '').replace('>', '').replace('|', '')
  121. member_names += member_name + '_'
  122. member_count += 1
  123. com_report.append(f"- {member['name']} ({member['type']})")
  124. if REPORT_INCLUDE_DETAILS == False:
  125. continue
  126. for k in member.keys():
  127. if k not in ['name', 'type', 'description', 'community']:
  128. value = member[k]
  129. com_report.append(f"\t- {value}")
  130. com_report.append("\n**成员节点关系**:\n")
  131. for member in members:
  132. entities, relations = graph_helper.neighbor_search(member['name'], 1)
  133. com_report.append(f"- {member['name']} ({member['type']})")
  134. com_report.append(f"\t- 相关节点")
  135. for entity in entities:
  136. com_report.append(f"\t\t- {entity['id']} ({entity['type']})")
  137. com_report.append(f"\t- 相关关系")
  138. for relation in relations:
  139. com_report.append(f"\t\t- {relation['src_name']}-({relation['type']})->{relation['dest_name']}")
  140. # 计算社区内部连接密度
  141. subgraph = G.subgraph([m['name'] for m in members])
  142. density = nx.density(subgraph)
  143. com_report.append(f"\n**内部连接密度**: {density:.2f}\n")
  144. if density < DENSITY:
  145. com_report.append("**社区内部连接相对稀疏**\n")
  146. else:
  147. with open(f"{REPORT_PATH}\{member_names}{comm_id}.md", "w", encoding="utf-8") as f:
  148. f.write("\n".join(com_report))
  149. print(f"社区 {comm_id+1} 报告文件大小:{len(''.join(com_report).encode('utf-8'))} 字节") # 添加文件生成验证
  150. # 可视化图表
  151. report.append("\n## 可视化分析\n")
  152. return "\n".join(report)
  153. if __name__ == "__main__":
  154. try:
  155. from graph_helper import GraphHelper
  156. graph_helper = GraphHelper()
  157. G = graph_helper.graph
  158. print("graph loaded")
  159. # 生成企业关系网络
  160. # 执行社区检测
  161. G, partition = graph_helper.detect_communities()
  162. # 生成分析报告
  163. report = generate_report(G, partition)
  164. with open('community_report.md', 'w', encoding='utf-8') as f:
  165. f.write(report)
  166. print(f"报告文件大小:{len(report.encode('utf-8'))} 字节") # 添加文件生成验证
  167. print("社区分析报告已生成:community_report.md")
  168. except Exception as e:
  169. print(f"运行时错误:{str(e)}")
  170. raise e