فهرست منبع

Initial commit

SGTY 1 هفته پیش
کامیت
bdcf45864f
100فایلهای تغییر یافته به همراه101371 افزوده شده و 0 حذف شده
  1. 21 0
      .env_samples
  2. 138 0
      .gitignore
  3. 15 0
      .idea/ins_expert.iml
  4. 6 0
      .idea/inspectionProfiles/profiles_settings.xml
  5. 7 0
      .idea/misc.xml
  6. 8 0
      .idea/modules.xml
  7. 6 0
      .idea/vcs.xml
  8. 147 0
      .idea/workspace.xml
  9. 3 0
      .vscode/settings.json
  10. 72 0
      Dockerfile.base
  11. 31 0
      Dockerfile.job
  12. 17 0
      Dockerfile.server
  13. 21 0
      LICENSE
  14. 40 0
      README.md
  15. 305 0
      agent/app.py
  16. 41 0
      agent/cdss/capbility.py
  17. 488 0
      agent/cdss/libs/cdss_helper.py
  18. 72 0
      agent/cdss/models/schemas.py
  19. 25 0
      agent/db/database.py
  20. 87 0
      agent/db/database.sql
  21. 39 0
      agent/docs/update0411.txt
  22. 26 0
      agent/docs/update0414.txt
  23. 13 0
      agent/init_db.py
  24. 292 0
      agent/libs/agent.py
  25. 59 0
      agent/libs/auth.py
  26. 125 0
      agent/libs/graph.py
  27. 32 0
      agent/libs/response.py
  28. 96 0
      agent/libs/user.py
  29. 6 0
      agent/main.py
  30. 39 0
      agent/models/db/agent.py
  31. 198 0
      agent/models/db/graph.py
  32. 28 0
      agent/models/db/user.py
  33. 27 0
      agent/models/task.py
  34. 320 0
      agent/models/web/graph.py
  35. 67 0
      agent/models/web/request.py
  36. 111 0
      agent/models/web/response.py
  37. 445 0
      agent/openapi.yaml
  38. 192 0
      agent/router/dify_kb_router.py
  39. 119 0
      agent/router/file_router.py
  40. 838 0
      agent/router/graph_mgr_router.py
  41. 303 0
      agent/router/graph_network_router.py
  42. 106 0
      agent/router/kb_router.py
  43. 247 0
      agent/router/task_router.py
  44. 87 0
      agent/router/user_router.py
  45. 33 0
      agent/sample.md
  46. 66 0
      agent/server.py
  47. 28 0
      agent/test.py
  48. 145 0
      agent/test/test_task_router.py
  49. 42 0
      agent/testnx.py
  50. 94 0
      command/build_es_index.py
  51. 131 0
      command/build_graph_index.py
  52. 199 0
      command/community_report.py
  53. 3 0
      command/download_bge_model.py
  54. 101 0
      command/dump_graph_data.py
  55. 27 0
      command/elasticsearch_add_doc.py
  56. 29 0
      command/elasticsearch_clean.py
  57. 121 0
      command/entity_extract.py
  58. 83 0
      command/extract_disease_doc.py
  59. 120 0
      command/extract_doc_from_json.py
  60. 47 0
      command/generate_doc_abstract.py
  61. 13 0
      command/start_agent.py
  62. 15 0
      command/start_web_server.py
  63. 34 0
      command/words_freq_in_doc.py
  64. 57 0
      config/site.py
  65. 209 0
      environment.yml
  66. 113 0
      executor/job_script/prompt/entity_extract.txt
  67. 51 0
      executor/job_script/prompt/standard_med.txt
  68. 4 0
      executor/job_script/sample.py
  69. 185 0
      executor/job_script/standard_kb_build.py
  70. 209 0
      executor/job_script/standard_kb_extractor.py
  71. 198 0
      executor/job_script/standard_pdf_extractor.py
  72. 36 0
      executor/job_script/standard_txt_chunk.py
  73. 42 0
      executor/job_script/standard_word_extractor.py
  74. 250 0
      executor/main.py
  75. 7 0
      executor/ocr/OCRExecutor.py
  76. 169 0
      functions/basic_function.py
  77. 93 0
      functions/call.py
  78. 3 0
      graph/.idea/.gitignore
  79. 6 0
      graph/.idea/inspectionProfiles/profiles_settings.xml
  80. 12 0
      graph/.idea/kg-server.iml
  81. 7 0
      graph/.idea/misc.xml
  82. 8 0
      graph/.idea/modules.xml
  83. 141 0
      graph/background_job.py
  84. 56 0
      graph/cdss.pl
  85. 0 0
      graph/config/__init__.py
  86. 14 0
      graph/config/site.py
  87. 0 0
      graph/db/__init__.py
  88. 18 0
      graph/db/database.py
  89. 182 0
      graph/db/models.py
  90. 124 0
      graph/db/neo4j.py
  91. 320 0
      graph/db/schemas.py
  92. 27 0
      graph/deep/read_drug.py
  93. 43445 0
      graph/dict/jieba_dict.txt
  94. 141 0
      graph/dict/sample.json
  95. 59 0
      graph/environment.yml
  96. 78 0
      graph/export_prolog.py
  97. 48180 0
      graph/main.pl
  98. 105 0
      graph/main.py
  99. 126 0
      graph/mirge.py
  100. 0 0
      graph/models/response.py

+ 21 - 0
.env_samples

@@ -0,0 +1,21 @@
+POSTGRES_HOST=localhost
+POSTGRES_PORT=5432
+POSTGRES_DB=rag_db
+POSTGRES_USER=postgres
+POSTGRES_PASSWORD=your_password
+
+#indexing
+ELASTICSEARCH_HOST=http://localhost:9200
+
+# DeepSeek API
+DEEPSEEK_API_URL=https://api.siliconflow.cn/v1/chat/completions
+DEEPSEEK_API_KEY=sk-???
+
+
+#Embedding
+EMBEDDING_MODEL=C:\Users\jiyua\.cache\modelscope\hub\models\deepseek-ai\DeepSeek-R1-Distill-Qwen-1___5b
+DOC_PATH=D:/work/03/regulations.json
+DOC_STORAGE_PATH=D:/work/03/output/docs
+TRUNC_OUTPUT_PATH=D:/work/03/output/chunc_data
+DOC_ABSTRACT_OUTPUT_PATH=D:/work/03/output/doc_abstract
+JIEBA_USER_DICT=D:/work/03/ins_expert/dict/legal_terms.txt

+ 138 - 0
.gitignore

@@ -0,0 +1,138 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/

+ 15 - 0
.idea/ins_expert.iml

@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="langgraph (2)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="py.test" />
+  </component>
+</module>

+ 6 - 0
.idea/inspectionProfiles/profiles_settings.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

+ 7 - 0
.idea/misc.xml

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="langgraph (2)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="langgraph (2)" project-jdk-type="Python SDK" />
+</project>

+ 8 - 0
.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/ins_expert.iml" filepath="$PROJECT_DIR$/.idea/ins_expert.iml" />
+    </modules>
+  </component>
+</project>

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

+ 147 - 0
.idea/workspace.xml

@@ -0,0 +1,147 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="AutoImportSettings">
+    <option name="autoReloadType" value="SELECTIVE" />
+  </component>
+  <component name="ChangeListManager">
+    <list default="true" id="556e838b-1bb4-4816-8e54-c3fd1e8bac72" name="Changes" comment="代码提交" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="ProjectColorInfo">{
+  &quot;customColor&quot;: &quot;&quot;,
+  &quot;associatedIndex&quot;: 1
+}</component>
+  <component name="ProjectId" id="2xOVtg1EQJ3z0uz9Gr5CmHDzyN3" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent"><![CDATA[{
+  "keyToString": {
+    "Python.main (1).executor": "Run",
+    "Python.main.executor": "Debug",
+    "RunOnceActivity.ShowReadmeOnStart": "true",
+    "RunOnceActivity.git.unshallow": "true",
+    "git-widget-placeholder": "master",
+    "node.js.detected.package.eslint": "true",
+    "node.js.detected.package.tslint": "true",
+    "node.js.selected.package.eslint": "(autodetect)",
+    "node.js.selected.package.tslint": "(autodetect)",
+    "nodejs_package_manager_path": "npm",
+    "vue.rearranger.settings.migration": "true"
+  }
+}]]></component>
+  <component name="RunManager" selected="Python.main (1)">
+    <configuration name="main (1)" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="ins_expert" />
+      <option name="ENV_FILES" value="" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/executor" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/executor/main.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="main" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="ins_expert" />
+      <option name="ENV_FILES" value="" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/agent" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/agent/main.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <recent_temporary>
+      <list>
+        <item itemvalue="Python.main (1)" />
+        <item itemvalue="Python.main" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="SharedIndexes">
+    <attachedChunks>
+      <set>
+        <option value="bundled-js-predefined-d6986cc7102b-deb605915726-JavaScript-PY-243.22562.220" />
+        <option value="bundled-python-sdk-0fc6c617c4bd-9a18a617cbe4-com.jetbrains.pycharm.pro.sharedIndexes.bundled-PY-243.22562.220" />
+      </set>
+    </attachedChunks>
+  </component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="556e838b-1bb4-4816-8e54-c3fd1e8bac72" name="Changes" comment="" />
+      <created>1747809327016</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1747809327016</updated>
+      <workItem from="1747809330034" duration="7104000" />
+      <workItem from="1748324207454" duration="678000" />
+      <workItem from="1748328684570" duration="17908000" />
+    </task>
+    <task id="LOCAL-00001" summary="代码提交">
+      <option name="closed" value="true" />
+      <created>1748570986823</created>
+      <option name="number" value="00001" />
+      <option name="presentableId" value="LOCAL-00001" />
+      <option name="project" value="LOCAL" />
+      <updated>1748570986823</updated>
+    </task>
+    <option name="localTasksCounter" value="2" />
+    <servers />
+  </component>
+  <component name="TypeScriptGeneratedFilesManager">
+    <option name="version" value="3" />
+  </component>
+  <component name="VcsManagerConfiguration">
+    <MESSAGE value="代码提交" />
+    <option name="LAST_COMMIT_MESSAGE" value="代码提交" />
+  </component>
+  <component name="XDebuggerManager">
+    <breakpoint-manager>
+      <breakpoints>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/agent/router/user_router.py</url>
+          <line>21</line>
+          <option name="timeStamp" value="2" />
+        </line-breakpoint>
+      </breakpoints>
+    </breakpoint-manager>
+  </component>
+  <component name="com.intellij.coverage.CoverageDataManagerImpl">
+    <SUITE FILE_PATH="coverage/ins_expert$main__1_.coverage" NAME="main (1) Coverage Results" MODIFIED="1748512756047" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/executor" />
+    <SUITE FILE_PATH="coverage/ins_expert$main.coverage" NAME="main Coverage Results" MODIFIED="1748512661246" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/agent" />
+  </component>
+</project>

+ 3 - 0
.vscode/settings.json

@@ -0,0 +1,3 @@
+{
+    "marscode.chatLanguage": "cn"
+}

+ 72 - 0
Dockerfile.base

@@ -0,0 +1,72 @@
+# 基于官方的 Ubuntu 镜像
+FROM debian:stable-slim AS ubuntu-base
+
+# 设置环境变量,避免在安装过程中提示交互式输入
+ENV DEBIAN_FRONTEND=noninteractive
+
+# 安装必要的依赖
+RUN apt-get update && apt-get install -y \
+    wget \
+    bzip2 \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+FROM ubuntu-base AS ubuntu-anaconda
+# 下载并安装 Anaconda
+RUN wget --quiet https://repo.anaconda.com/archive/Anaconda3-2024.10-1-Linux-x86_64.sh -O ~/anaconda.sh && \
+    /bin/bash ~/anaconda.sh -b -p /opt/conda && \
+    rm ~/anaconda.sh && \
+    echo "export PATH=/opt/conda/bin:$PATH" >> ~/.bashrc && \
+    /opt/conda/bin/conda clean --all -y
+
+ENV PATH="/opt/conda/bin:$PATH"
+#设置清华源
+
+FROM ubuntu-anaconda AS ubuntu-anaconda-config
+
+RUN conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ && \
+    conda config --set show_channel_urls yes
+COPY environment.yml /app/environment.yml
+RUN apt-get update && \
+    apt-get install -y python3-dev libpq-dev
+RUN apt-get install -y build-essential 
+
+RUN conda env create -f /app/environment.yml
+RUN apt-get update && apt-get install -y --no-install-recommends curl && apt-get clean && rm -rf /var/lib/apt/lists/*
+# 配置 Conda 环境变量
+
+# 创建名为 server 的 Python 虚拟环境
+#RUN conda create -y -n server python=3.9 && \
+#    conda clean --all -y
+
+# 将 requirements.txt 文件复制到镜像中
+#COPY requirements.txt /app/requirements.txt
+# 激活 server 环境并使用 pip 安装依赖库
+# RUN conda run -n server pip install --no-cache-dir -r /app/requirements.txt
+
+# 将 FastAPI 应用代码复制到镜像中
+FROM ubuntu-anaconda-config AS ubuntu-anaconda-app
+COPY . /app
+
+# 设置环境变量(这些环境变量可以在容器启动时覆盖)
+ENV POSTGRESQL_HOST="localhost"
+ENV POSTGRESQL_DATABASE="kg"
+ENV POSTGRESQL_USER="postgres"
+#ENV POSTGRESQL_PASSWORD="difyai123456"
+ENV DEEPSEEK_API_URL="https://api.siliconflow.cn/v1/chat/completions"
+#ENV DEEPSEEK_API_KEY="sk-vecnpjmtmelcefdbtbbpqvzcegopxrherbnbjhscugbpxuif"
+ENV JOB_PATH="/app/agent/jobs"
+ENV JOB_SCRIPT_PATH="/app/agent/job_script"
+# 创建日志目录
+RUN mkdir -p /app/logs
+
+# 创建一个启动脚本
+RUN echo '#!/bin/bash' > /start.sh && \
+    echo 'source /opt/conda/etc/profile.d/conda.sh' >> /start.sh && \
+    echo 'conda activate kgbuilder' >> /start.sh && \
+    echo 'cd /app' >> /start.sh && \
+    echo 'python agent/main.py' >> /start.sh && \
+    chmod +x /start.sh
+
+# 设置默认命令
+CMD ["/bin/bash", "/start.sh"]

+ 31 - 0
Dockerfile.job

@@ -0,0 +1,31 @@
+#docker build -f Dockerfile.job -t kgbuilder-job:1.0 .  
+FROM kgbuilder:latest 
+#RUN apt-get remove -y python3-dev libpq-dev
+#RUN apt-get remove -y build-essential 
+#RUN apt-get clean
+RUN apt-get update
+#RUN apt-get install -y libgtk-dotnet3.0-cil-dev
+#RUN apt-get install -y libicu72
+COPY . /app
+# 设置环境变量(这些环境变量可以在容器启动时覆盖)
+ENV POSTGRESQL_HOST="localhost"
+ENV POSTGRESQL_DATABASE="kg"
+ENV POSTGRESQL_USER="postgres"
+#ENV POSTGRESQL_PASSWORD="difyai123456"
+ENV DEEPSEEK_API_URL="https://api.siliconflow.cn/v1/chat/completions"
+#ENV DEEPSEEK_API_KEY="sk-vecnpjmtmelcefdbtbbpqvzcegopxrherbnbjhscugbpxuif"
+ENV JOB_PATH="/app/agent/jobs"
+ENV JOB_SCRIPT_PATH="/app/agent/job_script"
+# 创建日志目录
+RUN mkdir -p /app/logs
+
+# 创建一个启动脚本
+RUN echo '#!/bin/bash' > /start.sh && \
+    echo 'source /opt/conda/etc/profile.d/conda.sh' >> /start.sh && \
+    echo 'conda activate kgbuilder' >> /start.sh && \
+    echo 'cd /app' >> /start.sh && \
+    echo 'python executor/main.py' >> /start.sh && \
+    chmod +x /start.sh
+
+# 设置默认命令
+CMD ["/bin/bash", "/start.sh"]

+ 17 - 0
Dockerfile.server

@@ -0,0 +1,17 @@
+# docker build -f Dockerfile.server -t kgbuilder-server:1.0 . 
+FROM kgbuilder:latest
+
+
+# 创建日志目录
+RUN mkdir -p /app/logs
+
+# 创建一个启动脚本
+RUN echo '#!/bin/bash' > /start.sh && \
+    echo 'source /opt/conda/etc/profile.d/conda.sh' >> /start.sh && \
+    echo 'conda activate kgbuilder' >> /start.sh && \
+    echo 'cd /app' >> /start.sh && \
+    echo 'python agent/main.py' >> /start.sh && \
+    chmod +x /start.sh
+
+# 设置默认命令
+CMD ["/bin/bash", "/start.sh"]

+ 21 - 0
LICENSE

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 米川
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 40 - 0
README.md

@@ -0,0 +1,40 @@
+
+### 环境变量
+
+- POSTGRESQL_HOST="localhost" # 数据库地址
+- POSTGRESQL_DATABASE="kg" # 数据库名称
+- POSTGRESQL_USER="postgres" # 数据库用户名
+- POSTGRESQL_PASSWORD="difyai123456" # 数据库密码
+- DEEPSEEK_API_URL="https://api.siliconflow.cn/v1/chat/completions" # 模型地址
+- DEEPSEEK_API_KEY="" # 模型密钥
+- JOB_PATH="/app/agent/jobs" # 任务路径
+- JOB_SCRIPT_PATH="/app/agent/job_script" # 任务脚本路径
+
+### 运行API服务器
+
+1、需要设置POSTGRESQL相关的环境变量
+2、需要映射日志路径和任务路径到容器中
+3、需要设置网络为docker_default
+
+docker run -idt --name kgb-server --network=docker_default -v D:/work/03/qz_data/logs0:/app/logs -v D:/work/03/qz_data/job0:/app/agent/jobs -e POSTGRESQL_HOST="db" -e POSTGRESQL_PASSWORD="difyai123456"  kgbuilder-server
+
+### 运行任务处理服务器
+
+docker run -idt --name kgb-job --network=docker_default -v D:/work/03/qz_data/logs0:/app/logs -v D:/work/03/qz_data/job0:/app/agent/jobs -e POSTGRESQL_HOST="db" -e POSTGRESQL_PASSWORD="difyai123456" -e DEEPSEEK_API_URL="https://api.siliconflow.cn/v1/chat/completions" -e DEEPSEEK_API_KEY="sk-vecnpjmtmelcefdbtbbpqvzcegopxrherbnbjhscugbpxuif" -e DONET_SYSTEM_GLOBALIZATION_INVARIANT=1 kgbuilder-job:1.0
+
+### 运行web服务器
+
+1、需要设置网络为docker_default
+2、如果你修改了API服务器的名称,则需要设置环境变量VITE_API_URL=http://kgb-server:8000 为你修改后的API服务器名称
+
+docker run -idt  -p 8080:8080 --name kgb-web --network=docker_default kgbuilder-web:1.0
+
+
+### 任务处理服务器调试
+
+这里是调试word_extractor.py的脚本,你可以根据需要修改脚本路径和参数
+
+bash
+source /opt/conda/etc/profile.d/conda.sh
+conda activate kgbuilder
+python executor/job_script/standard_word_extractor.py /app/agent/jobs/74

+ 305 - 0
agent/app.py

@@ -0,0 +1,305 @@
+import sys,os
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+import streamlit as st
+import os
+import json
+import requests
+from dotenv import load_dotenv
+from langgraph.graph import StateGraph, MessagesState,START, END
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage,BaseMessage
+from langchain_openai import ChatOpenAI
+#from langchain_community.callbacks.streamlit import StreamlitCallbackHandler
+from langchain_core.callbacks import AsyncCallbackHandler
+from typing import Any
+#streamlit run agent/app.py
+# 加载环境变量
+load_dotenv()
+
+from config.site import SiteConfig
+config = SiteConfig()
+
+GRAPH_API_URL = config.get_config("GRAPH_API_URL")
+DEEPSEEK_API_URL = config.get_config("DEEPSEEK_API_URL")
+DEEPSEEK_API_KEY = config.get_config("DEEPSEEK_API_KEY")
+
+PROMPT_INTENSION = '''你是一个临床医学专家,你需要对用户的问题进行意图分析,判断用户的问题是否是关于医疗健康的问题。如果是医疗健康的问题,你需要输出"是",否则输出"否"。'''
+PROMPT_DATA_EXTRACTION = '''你是一个NLP专家,你需要对用户的问题进行数据抽取,抽取的结果输出为json格式。以下是json格式的样本
+···json
+{ 
+"pat_name": "XXX", "pat_sex": "XXX","pat_age": 0,"clinical_department": "XXX","chief_complaint":["A","B","C"],
+"present_illness":["A","B","C"],past_medical_history:["A","B","C"],physical_examination:["A","B","C"],lab_and_imaging:["A","B","C"]
+},
+···
+其中字段的描述如下:
+pat_name:患者名字,字符串,如"张三",无患者信息输出""
+pat_sex:患者性别,字符串,如"男",无患者信息输出""
+pat_age:患者年龄,数字,单位为年,如25岁,输出25,无年龄信息输出0
+clinical_department:就诊科室,字符串,如"呼吸内科",无就诊科室信息输出""
+chief_complaint:主诉,字符串列表,包括主要症状的列表,如["胸痛","发热"],无主诉输出[]
+present_illness:现病史,字符串列表,包括症状发展过程、诱因、伴随症状(如疼痛性质、放射部位、缓解方式,无现病史信息输出[]
+past_medical_history:既往病史,字符串列表,包括疾病史(如高血压、糖尿病)、手术史、药物过敏史、家族史等,无现病史信息输出[]
+physical_examination:体格检查,字符串列表,如生命体征(血压、心率)、心肺腹部体征、实验室/影像学结果(如心电图异常、肌钙蛋白升高),无信息输出[]
+lab_and_imaging:检验与检查,字符串列表,包括血常规、生化指标、心电图(ECG)、胸部X光、CT等检查项目,结果和报告等,无信息输出[]
+'''
+######################## langgraph
+# 初始化Deepseek模型
+llm = ChatOpenAI(
+      # deepseek-ai/DeepSeek-V3, deepseek-ai/DeepSeek-R1  
+      model="Pro/deepseek-ai/DeepSeek-V3",
+      #model="Qwen/QwQ-32B", 
+      api_key=DEEPSEEK_API_KEY,
+      base_url=DEEPSEEK_API_URL,
+      streaming=True)
+
+
+# 定义LangGraph工作流程
+class MyStreamingOutCallbackHandler(AsyncCallbackHandler):
+    def __init__(self):
+        super().__init__()
+        self.content = ""
+    async def on_llm_new_token(self, token: str, **kwargs) -> None:        
+        # 流式输出token的回调函数        
+        self.content += token 
+        print(token)       
+        st.write(token) 
+    async def on_llm_end(self, response, **kwargs) -> None:        
+        # 流式输出结束的回调函数        
+        # # 在这里可以处理流式输出的结束逻辑        
+        pass
+    async def on_llm_error(self, error, **kwargs) -> None:
+        # 流式输出错误的回调函数        
+        pass
+    async def on_chat_model_start(self,
+        serialized: dict[str, Any],
+        messages: list[list[BaseMessage]], **kwargs) -> None:        
+        # 流式输出开始的回调函数       
+        print("on_chat_model_start", messages) 
+        
+def agent_node(state):
+    print("agent_node", state)
+    messages = [
+        SystemMessage(content=PROMPT_INTENSION),
+        HumanMessage(content=state["messages"][-1].content)
+    ]
+    response = llm.stream(messages)
+    collect_messages = []
+    
+    for chunk in response:
+        text = chunk.content or ""
+        if text == "":
+            continue  
+        #st.write(text)
+        #state["callback"](text)
+        collect_messages.append(text)
+    fully_response = ''.join(collect_messages)
+    state["messages"].append(AIMessage(content=fully_response))
+    #response = llm.invoke(state["messages"], config={"callbacks":[MyStreamingOutCallbackHandler()]})
+    return state
+
+def entity_extraction_node(state):
+    state["messages"] =  state["messages"][:-1]
+   
+    print("entity_extraction_node",state["messages"][-1].content)
+    print(state["messages"])
+    messages = [
+        SystemMessage(content=PROMPT_DATA_EXTRACTION),
+        HumanMessage(content=state["messages"][-1].content)
+    ]
+    response = llm.stream(messages)
+    collect_messages = []
+    
+    for chunk in response:
+        text = chunk.content or ""
+        if text == "":
+            continue     
+        #st.write(text)
+        state["callback"](text)
+        collect_messages.append(text)
+    fully_response = ''.join(collect_messages)
+    state["messages"].append(AIMessage(content=fully_response))
+    #response = llm.invoke(state["messages"], config={"callbacks":[MyStreamingOutCallbackHandler()]})
+    return state
+
+def recommed_check(state):
+    print("recommed_check",state["messages"][-1].content)
+    text_json = state["messages"][-1].content
+    text_json = text_json.strip("\n```json")
+    json_data = json.loads(text_json)
+    headers = {
+        "Content-Type": "application/json"
+    }
+    data = {
+        "q": " ".join(json_data["chief_complaint"]),
+        "type":"Check"
+    }
+    response = requests.get(f"{GRAPH_API_URL}/graph/nodes/neighbor_search?keyword={data['q']}&&neighbor_type={data['type']}",
+                            headers=headers)
+    response.raise_for_status()    
+    response = response.json()
+
+    state["callback"]("\n") 
+    if "records" in response.keys():
+        state["callback"]("## 该病案可能的诊断包括\n") 
+        if response["records"] and "nodes" in response["records"].keys():
+            response_data = response["records"]["nodes"]
+            for data in response_data:
+                print(data)
+                if "type" in data.keys():
+                    if data["type"] == "Disease":
+                        state["callback"]("- "+data["id"]+"("+data["type"]+","+str(round(data['count']*100,2))+"%)\n")
+        
+        state["callback"]("## 推荐的检查\n") 
+        if "neighbors" in response["records"].keys():
+            response_data = response["records"]["neighbors"]
+            for data in response_data:
+                state["callback"]("- "+data["id"]+"("+str(round(data['count']*100,2))+"%)\n")
+    
+    data = {
+        "q": " ".join(json_data["chief_complaint"]),
+        "type":"Drug"
+    }            
+    response = requests.get(f"{GRAPH_API_URL}/graph/nodes/neighbor_search?keyword={data['q']}&&neighbor_type={data['type']}",
+                            headers=headers)
+    response.raise_for_status()    
+    response = response.json()
+
+    state["callback"]("\n") 
+    if "records" in response.keys():        
+        state["callback"]("## 推荐的药物\n") 
+        if "neighbors" in response["records"].keys():
+            response_data = response["records"]["neighbors"]
+            for data in response_data:
+                state["callback"]("- "+data["id"]+"("+str(round(data['count']*100,2))+"%)\n")
+                
+                
+    print("recommed_check finished")
+    
+    #response = llm.invoke(state["messages"], config={"callbacks":[MyStreamingOutCallbackHandler()]})
+    return state
+
+def should_continue_2_entity_extraction(state):
+    print("should_continue")
+    previous_message = state["messages"][-1]
+    ai_resposne = previous_message.content or ""    
+    ai_resposne = ai_resposne.strip()
+    print("should_continue",ai_resposne)
+    if ai_resposne == "是":
+        # 是医疗健康问题,继续执行工具节点        
+        return "continue"    
+    return "end"
+def tool_node(state):
+    # 在此添加自定义工具逻辑
+    print("tool_node")
+    return {"tool_output": "Tool executed"}
+
+class MyMessageState(MessagesState):
+    callback: Any = None
+    
+workflow = StateGraph(MyMessageState)
+workflow.add_node("agent", agent_node)
+workflow.add_node("tools", tool_node)
+workflow.add_node("extract", entity_extraction_node)
+workflow.add_node("recommend_check", recommed_check)
+workflow.add_edge(START, "agent")
+#workflow.add_edge("agent", "tools")
+workflow.add_edge("extract", "recommend_check")
+workflow.add_edge("recommend_check", END)
+workflow.add_edge("tools", END)
+workflow.add_conditional_edges( "agent", should_continue_2_entity_extraction, {"continue":"extract", "end":END})
+app = workflow.compile()
+
+def test_langgraph(user_input):    
+    messages = [        
+        HumanMessage(content=user_input)
+    ]
+    response = app.invoke({"messages":messages})
+    print(response)
+    
+######################## networkx
+
+    
+#Streamlit界面
+
+st.set_page_config(layout="wide")
+tmp_text = ""
+def st_callback(text):    
+    global tmp_text
+    tmp_text = tmp_text + text 
+    st.info(tmp_text)
+
+def submit_question(text):
+    print("submit_question", text)   
+
+
+user_input = None
+submit_button = None
+
+
+st.header("Med Graph Agent")
+
+if "history" not in st.session_state:
+    st.session_state.history = []
+    
+
+for message in st.session_state.history: 
+    with st.chat_message(message["role"]):
+        st.write(message["content"])
+user_input = st.chat_input("请输入您的问题:")
+if user_input:
+    messages = [        
+        HumanMessage(content=user_input)
+    ]           
+    state = MyMessageState(messages=messages, callback=st_callback)
+    st.session_state.history.append({"role": "user", "content": user_input})   
+    st.chat_message("user").write(user_input) 
+    placeholder = st.empty()
+    with placeholder:
+        st.info("thinking...")
+        tmp_text = ""
+        response = app.invoke(state)
+    print(state["messages"])
+    message = {"role": "assistant", "content": tmp_text}
+    placeholder.empty()
+    
+    #st.write("### 回答:")
+    st.session_state.history.append(message)
+    with st.chat_message(message["role"]):
+        st.write(message["content"])
+  
+# left_col, right_col = st.columns([1,2])
+# with left_col:    
+#     st.header("Med Graph Agent")
+#     user_input = st.text_area("请输入您的问题:",height=200)
+#     submit_button = st.button("提交", on_click=submit_question, args=[user_input])
+    
+# with right_col:        
+#     with st.container(height=800):
+#         if "history" not in st.session_state:
+#             st.session_state.history = []
+            
+        
+#         for message in st.session_state.history: 
+#             with st.chat_message(message["role"]):
+#                 st.write(message["content"])
+#         if submit_button:
+#             messages = [        
+#                 HumanMessage(content=user_input)
+#             ]           
+#             state = MyMessageState(messages=messages, callback=st_callback)
+#             st.session_state.history.append({"role": "user", "content": user_input})   
+#             st.chat_message("user").write(user_input) 
+#             placeholder = st.empty()
+#             with placeholder:
+#                 st.info("thinking...")
+#                 tmp_text = ""
+#                 response = app.invoke(state)
+#             print(state["messages"])
+#             message = {"role": "assistant", "content": tmp_text}
+#             placeholder.empty()
+            
+#             #st.write("### 回答:")
+#             st.session_state.history.append(message)
+#             with st.chat_message(message["role"]):
+#                 st.write(message["content"])

+ 41 - 0
agent/cdss/capbility.py

@@ -0,0 +1,41 @@
+from cdss.models.schemas import CDSSDict, CDSSInput,CDSSInt,CDSSOutput,CDSSText
+from cdss.libs.cdss_helper import CDSSHelper
+import logging
+logger = logging.getLogger(__name__)
+
+class CDSSCapability:
+    cdss_helper: CDSSHelper = None
+    def __init__(self):
+        self.cdss_helper = CDSSHelper()
+        #self.cdss_helper.load_local_data()
+        logger.debug("CDSSCapability initialized")
+    
+    def process(self, input: CDSSInput, embeding_search:bool = True) -> CDSSOutput:        
+        start_nodes = []
+        chief_complaint = input.get_value("chief_complaint")
+        logger.info(f"process input: {input}")
+        output = CDSSOutput()
+        if chief_complaint:
+            for keyword in chief_complaint:
+                results = self.cdss_helper.node_search(
+                    keyword, limit=10, node_type="word"
+                )
+                for item in results:
+                    if item['score']>1.9:                        
+                        start_nodes.append(item['id'])
+            logger.info(f"cdss start from {start_nodes}")    
+            result = self.cdss_helper.cdss_travel(input, start_nodes,max_hops=2)
+                
+            for item in result["details"]:
+                name, data = item
+                output.departments.value[name] = data
+            for item in result["diags"][:5]:                       
+                output.diagnosis.value[item[0]] = item[1]   
+            for item in result["checks"][:5]:        
+                item[1]['score'] = item[1]['count'] / result["total_checks"]
+                output.checks.value[item[0]] = item[1]            
+            for item in result["drugs"][:5]:          
+                item[1]['score'] = item[1]['count'] / result["total_checks"]
+                output.drugs.value[item[0]] = item[1]  
+                #print(f"\t药品:{item[0]} {item[1]['count'] / result["total_drugs"] * 100:.2f} %")
+        return output

+ 488 - 0
agent/cdss/libs/cdss_helper.py

@@ -0,0 +1,488 @@
+import os
+import sys
+import logging 
+import json
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+from libs.graph_helper import GraphHelper
+from typing import List
+from cdss.models.schemas import CDSSInput
+from config.site import SiteConfig
+import networkx as nx
+import pandas as pd
+logger = logging.getLogger(__name__)
+class CDSSHelper(GraphHelper):
+
+    def node_search(self, node_id=None, node_type=None, filters=None, limit=1, min_degree=None):
+        """节点检索功能"""            
+        es_result = self.es.search_title_index("graph_entity_index", node_id, limit)
+        results = []
+        for item in es_result:           
+            score = item["score"]            
+            results.append({
+                    'id': item["title"],
+                    'score': score,
+                    "name": item["title"],
+            })
+        return results
+    def _load_entity_data(self):
+        config = SiteConfig()
+        CACHED_DATA_PATH = config.get_config("CACHED_DATA_PATH")
+        logger.info("load entity data")
+        #这里设置了读取的属性
+        data = {"id":[], "name":[], "type":[], "allowed_sex_list":[], "allowed_age_range":[]}
+        with open(f"{CACHED_DATA_PATH}\\entities_med.json", "r", encoding="utf-8") as f:
+            entities = json.load(f)
+            for item in entities:
+                data["id"].append(int(item[0]))
+                data["name"].append(item[1]["name"])
+                data["type"].append(item[1]["type"])
+                data["allowed_sex_liste"].append(item[1]["allowed_sex_list"]) if "allowed_sex_list" in item[1] else data["allowed_sex_list"].append("")
+                data["allowed_age_range"].append(item[1]["allowed_age_range"]) if "allowed_age_range" in item[1] else data["allowed_age_range"].append("")
+                #item[1]["id"] = item[0]
+                #item[1]["name"] = item[0]
+                #attrs = item[1]
+                #self.graph.add_node(item[0], **attrs)
+        self.entity_data = pd.DataFrame(data)
+        self.entity_data.set_index("id", inplace=True)
+        logger.info("load entity data finished")
+
+    def _load_relation_data(self):
+        config = SiteConfig()
+        CACHED_DATA_PATH = config.get_config("CACHED_DATA_PATH")
+        logger.info("load relationship data")
+        
+        for i in range(99):
+            if os.path.exists(f"{CACHED_DATA_PATH}\\relationship_med_{i}.json"):            
+                logger.info(f"load entity data {CACHED_DATA_PATH}\\relationship_med_{i}.json")
+                with open(f"{CACHED_DATA_PATH}\\relationship_med_{i}.json", "r", encoding="utf-8") as f:
+                    data = {"src":[], "dest":[], "type":[], "weight":[]}
+                    relations = json.load(f)
+                    for item in relations:                            
+                        data["src"].append(int(item[0]))    
+                        data["dest"].append(int(item[2]))   
+                        if data['src'] == 2969539 or data['dest'] == 2969539:
+                            print(">>>>>>>> FOUND 2969539")
+                        data["type"].append(item[4]["type"]) 
+                        data["weight"].append(item[4]["weight"]) if "weight" in item[4] else data["weight"].append(1)          
+                    self.relation_data = pd.concat([self.relation_data, pd.DataFrame(data)], ignore_index=True)
+        
+    def build_graph(self):
+        self.entity_data = pd.DataFrame({"id":[],"name":[], "type":[], "allowed_sex_list":[], "allowed_age_range":[]})
+        self.relation_data = pd.DataFrame({"src":[], "dest":[], "type":[], "weight":[]})
+        self._load_entity_data()        
+        self._load_relation_data()
+        self._load_local_data()
+        
+        
+        self.graph = nx.from_pandas_edgelist(self.relation_data, "src", "dest", edge_attr=True, create_using=nx.DiGraph())
+    
+        nx.set_node_attributes(self.graph, self.entity_data.to_dict(orient="index"))
+
+
+        #print(self.graph.in_edges('1257357',data=True))
+        
+    def _load_local_data(self):
+        #这里加载update数据和权重数据
+        config = SiteConfig()
+        self.update_data_path = config.get_config('UPDATE_DATA_PATH')
+        self.factor_data_path = config.get_config('FACTOR_DATA_PATH')
+        logger.info(f"load update data from {self.update_data_path}")        
+        for root, dirs, files in os.walk(self.update_data_path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                if file_path.endswith(".json") and file.startswith("ent"):
+                    self._load_update_entity_json(file_path)
+                if file_path.endswith(".json") and file.startswith("rel"):
+                    self._load_update_relationship_json(file_path)
+    
+    def _load_update_entity_json(self, file):
+        '''load json data from file'''
+        logger.info(f"load entity update data from {file}")
+        
+        #这里加载update数据,update数据是一个json文件,格式同cached data如下:
+
+        with open(file, "r", encoding="utf-8") as f:
+            entities = json.load(f)  
+            for item in entities: 
+                original_data = self.entity_data[self.entity_data.index==item[0]]
+                if original_data.empty:
+                    continue
+                original_data = original_data.iloc[0]
+                id=int(item[0])
+                name = item[1]["name"] if "name" in item[1] else original_data['name']
+                type = item[1]["type"] if "type" in item[1] else original_data['type']
+                allowed_sex_liste = item[1]["allowed_sex_list"] if "allowed_sex_list" in item[1] else original_data['allowed_sex_list']
+                allowed_age_range = item[1]["allowed_age_range"] if "allowed_age_range" in item[1] else original_data['allowed_age_range']
+
+                self.entity_data.loc[id,["name", "type", "allowed_sex_list","allowed_age_range"]] = [name, type, allowed_sex_liste, allowed_age_range]
+
+    
+    def _load_update_relationship_json(self, file):
+        '''load json data from file'''
+        logger.info(f"load relationship update data from {file}")   
+        
+        with open(file, "r", encoding="utf-8") as f:
+            relations = json.load(f)           
+            for item in relations:
+                data = {}
+                original_data = self.relation_data[(self.relation_data['src']==data['src']) & 
+                                       (self.relation_data['dest']==data['dest']) &
+                                       (self.relation_data['type']==data['type'])]                
+                if original_data.empty:
+                    continue
+                original_data = original_data.iloc[0]
+                data["src"] = int(item[0]) 
+                data["dest"]= int(item[2])
+                data["type"]= item[4]["type"]
+                data["weight"]=item[4]["weight"] if "weight" in item[4] else original_data['weight'] 
+                
+                self.relation_data.loc[(self.relation_data['src']==data['src']) & 
+                                       (self.relation_data['dest']==data['dest']) &
+                                       (self.relation_data['type']==data['type']), 'weight'] = data["weight"]
+                
+
+            
+         
+    def check_sex_allowed(self, node, sex):        
+        #性别过滤,假设疾病节点有一个属性叫做allowed_sex_type,值为“0,1,2”,分别代表未知,男,女
+        sex_allowed = self.graph.nodes[node].get('allowed_sex_list', None)
+        if sex_allowed:
+            if len(sex_allowed) == 0:
+                #如果性别列表为空,那么默认允许所有性别
+                return True
+            sex_allowed_list = sex_allowed.split(',')
+            if sex not in sex_allowed_list:
+                #如果性别不匹配,跳过
+                return False
+        return True
+    def check_age_allowed(self, node, age):
+        #年龄过滤,假设疾病节点有一个属性叫做allowed_age_range,值为“6-88”,代表年龄在0-88月之间是允许的
+        #如果说年龄小于6岁,那么我们就认为是儿童,所以儿童的年龄范围是0-6月
+        age_allowed = self.graph.nodes[node].get('allowed_age_range', None)
+        if age_allowed:
+            if len(age_allowed) == 0:
+                #如果年龄范围为空,那么默认允许所有年龄
+                return True
+            age_allowed_list = age_allowed.split('-')
+            age_min = int(age_allowed_list[0])
+            age_max = int(age_allowed_list[-1])
+            if age >= age_min and age < age_max:
+                #如果年龄范围正常,那么返回True
+                return True
+        else:
+            #如果没有设置年龄范围,那么默认返回True
+            return True
+        return False
+        
+    def cdss_travel(self, input:CDSSInput, start_nodes:List, max_hops=3):      
+        #这里设置了节点的type取值范围,可以根据实际情况进行修改,允许出现多个类型
+        DEPARTMENT=['科室','Department']
+        DIESEASE=['疾病','Disease']
+        DRUG=['药品','Drug']
+        CHECK=['检查','Check']
+        SYMPTOM=['症状','Symptom']
+        allowed_types = DEPARTMENT + DIESEASE+ DRUG + CHECK + SYMPTOM
+        #这里设置了边的type取值范围,可以根据实际情况进行修改,允许出现多个类型
+        #不过后面的代码里面没有对边的type进行过滤,所以这里是留做以后扩展的
+        allowed_links = ['has_symptom', 'need_check', 'recommend_drug', 'belongs_to']
+        #这里要将用户输入的文本转换成节点id,由于存在同名节点的情况,所以实际node_ids的数量会大于start_nodes的数量
+        node_ids = []
+        node_id_names = {}
+        for node in start_nodes:
+            logger.debug(f"searching for node {node}")
+            result = self.entity_data[self.entity_data['name'] == node]
+            
+            for index, data in result.iterrows():     
+                node_id_names[index] = data["name"]
+                node_ids = node_ids + [index]
+        
+        logger.info(f"start travel from {node_id_names}")
+        #这里是一个队列,用于存储待遍历的症状:
+        node_ids_filtered = []
+        for node in node_ids:
+            if self.graph.has_node(node):
+                node_ids_filtered.append(node)
+            else:
+                logger.debug(f"node {node} not found")
+        node_ids = node_ids_filtered
+        queue = [(node, 0, node_id_names[node], {'allowed_types': allowed_types, 'allowed_links':allowed_links}) for node in node_ids]        
+        visited = set()      
+        results = {}
+        #整理input的数据,这里主要是要检查输入数据是否正确,也需要做转换
+        if input.pat_age.value > 0 and input.pat_age.type == 'year':
+            #这里将年龄从年转换为月,因为我们的图里面的年龄都是以月为单位的
+            input.pat_age.value = input.pat_age.value * 12
+            input.pat_age.type = 'month'
+            
+        #STEP 1: 假设start_nodes里面都是症状,第一步我们先找到这些症状对应的疾病
+        #TODO 由于这部分是按照症状逐一去寻找疾病,所以实际应用中可以缓存这些结果
+        while queue:
+            node, depth, path, data = queue.pop(0)
+            #这里是通过id去获取节点的name和type
+            node_type = self.entity_data[self.entity_data.index == node]['type'].tolist()[0]
+            node_name = self.entity_data[self.entity_data.index == node]['name'].tolist()[0]
+            logger.debug(f"node {node} type {node_type}")
+            if node_type in DIESEASE:
+                logger.debug(f"node {node} type {node_type} is a disease")
+                if self.check_sex_allowed(node, input.pat_sex.value) == False:
+                    continue
+                if self.check_age_allowed(node, input.pat_age.value) == False:
+                    continue
+                if node in results.keys():                 
+                    results[node]["count"] = results[node]["count"] + 1  
+                    results[node]["path"].append(path)   
+                else:
+                    results[node] = {"type": node_type, "count":1, "name":node_name, 'path':[path]}            
+                continue
+            
+            if node in visited or depth > max_hops:
+                logger.debug(f"{node} already visited or reach max hops")
+                continue                
+            
+            visited.add(node)
+            logger.debug(f"check edges from {node}")
+            for edge in self.graph.in_edges(node, data=True):                   
+                src, dest, edge_data = edge
+                if src not in visited and depth + 1 < max_hops:        
+                    logger.debug(f"put into queue travel from {src} to {dest}")            
+                    queue.append((src, depth + 1, path, data))
+                else:
+                    logger.debug(f"skip travel from {src} to {dest}")
+                    #print("-" * (indent+4), f"start travel from {src} to {dest}")    
+        logger.info(f"STEP 1 finished")
+        #这里输出markdonw格式日志
+        log_data = ["|疾病|症状|出现次数|是否相关"]
+        log_data.append("|--|--|--|--|")
+        for item in results:
+            data = results[item]
+            data['relevant'] = False
+            if data["count"] / len(start_nodes) > 0.5:
+                #疾病有50%以上的症状出现,才认为是相关的
+                data['relevant'] = True
+            log_data.append(f"|{data['name']}|{','.join(data['path'])}|{data['count']}|{data['relevant']}|")
+
+        content = "疾病和症状相关性统计表格\n"+"\n".join(log_data)
+        logger.debug(f"\n{content}")
+        #STEP 2: 找到这些疾病对应的科室,检查和药品       
+        #由于这部分是按照疾病逐一去寻找,所以实际应用中可以缓存这些结果
+        
+        logger.info("STEP 2 start")
+
+        for disease in results.keys():
+            #TODO 这里需要对疾病对应的科室检查药品进行加载缓存,性能可以得到很大的提升
+            if results[disease]["relevant"] == False:                
+                continue
+            logger.debug(f"search data for {disease}:{results[disease]['name']}") 
+            queue = []   
+            queue.append((disease, 0, disease,  {'allowed_types': DEPARTMENT, 'allowed_links':['belongs_to']}))
+            #这里尝试过将visited放倒for disease循环外面,但是会造成一些问题,性能提升也不明显,所以这里还是放在for disease循环里面
+            visited = set()
+            
+            while queue:
+                node, depth, disease, data = queue.pop(0)         
+                
+                if node in visited or depth > max_hops:
+                    continue                 
+                visited.add(node)                
+
+                node_type = self.entity_data[self.entity_data.index == node]['type'].tolist()[0]
+                node_name = self.entity_data[self.entity_data.index == node]['name'].tolist()[0]
+
+                logger.debug(f"node {results[disease].get("name", disease)} {node_name} type {node_type}")
+                #node_type = self.graph.nodes[node].get('type')
+                if node_type in DEPARTMENT:                
+                    #展开科室,重复次数为疾病出现的次数,为了方便后续统计
+                    department_data = [node_name] * results[disease]["count"]
+                    if 'department' in results[disease].keys():
+                        results[disease]["department"] = results[disease]["department"] + department_data
+                    else:
+                        results[disease]["department"] = department_data
+                    continue
+                if node_type in CHECK:
+                    if 'check' in results[disease].keys():
+                        results[disease]["check"] = list(set(results[disease]["check"]+[node_name]))
+                    else:
+                        results[disease]["check"] = [node_name]
+                    continue
+                if node_type in DRUG:
+                    if 'drug' in results[disease].keys():
+                        results[disease]["drug"] = list(set(results[disease]["drug"]+[node_name]))
+                    else:
+                        results[disease]["drug"] = [node_name]
+                    continue
+                for edge in self.graph.out_edges(node, data=True):                
+                    src, dest, edge_data = edge
+                    src_name = self.entity_data[self.entity_data.index == src]['name'].tolist()[0]
+                    dest_name = self.entity_data[self.entity_data.index == dest]['name'].tolist()[0]
+                    dest_type = self.entity_data[self.entity_data.index == dest]['type'].tolist()[0]
+
+                    if dest_type in allowed_types:
+                        if dest not in visited and depth + 1 < max_hops:  
+                            logger.debug(f"put travel request in queue from {src}:{src_name} to {dest}:{dest_name}")
+                            queue.append((edge[1], depth + 1, disease, data))   
+        
+        #TODO 可以在这里将results里面的每个疾病对应的科室,检查和药品进行缓存,方便后续使用        
+        # for item in results.keys():
+        #     department_data = results[item].get("department", [])
+        #     count_data = results[item].get("count")
+        #     check_data = results[item].get("check", [])
+        #     drug_data = results[item].get("drug", [])
+        #     #缓存代码放在这里
+            
+        logger.info(f"STEP 2 finished") 
+        #这里输出日志
+        log_data = ["|disease|count|department|check|drug|"]
+        log_data.append("|--|--|--|--|--|")
+        for item in results.keys():
+            department_data = results[item].get("department", [])
+            count_data = results[item].get("count")
+            check_data = results[item].get("check", [])
+            drug_data = results[item].get("drug", [])
+            log_data.append(f"|{results[item].get("name", item)}|{count_data}|{','.join(department_data)}|{','.join(check_data)}|{','.join(drug_data)}|")
+           
+        logger.debug("疾病科室检查药品相关统计\n"+"\n".join(log_data))
+        #日志输出完毕
+         
+        #STEP 3: 对于结果按照科室维度进行汇总
+        
+        logger.info(f"STEP 3 start") 
+        final_results = {}
+        total = 0
+        for disease in results.keys():
+            #由于存在有些疾病没有科室的情况,所以这里需要做一下处理
+            departments = ['DEFAULT']
+            if 'department' in results[disease].keys():          
+                departments = results[disease]["department"]             
+            for department in departments:
+                total += 1
+                if not department in final_results.keys():
+                    final_results[department] = {
+                        "diseases": [results[disease].get("name",disease)],
+                        "checks": results[disease].get("check",[]), 
+                        "drugs": results[disease].get("drug",[]),
+                        "count": 1
+                    }
+                else:
+                    final_results[department]["diseases"] = final_results[department]["diseases"]+[results[disease].get("name",disease)]
+                    final_results[department]["checks"] = final_results[department]["checks"]+results[disease].get("check",[])
+                    final_results[department]["drugs"] = final_results[department]["drugs"]+results[disease].get("drug",[])
+                    final_results[department]["count"] += 1
+
+        #这里是统计科室出现的分布
+        for department in final_results.keys():
+            final_results[department]["score"] = final_results[department]["count"] / total
+            
+        logger.info(f"STEP 3 finished")  
+        #这里输出日志
+        log_data = ["|department|disease|check|drug|count|score"]
+        log_data.append("|--|--|--|--|--|--|")
+        for department in final_results.keys():
+            diesease_data = final_results[department].get("diseases", [])
+            check_data = final_results[department].get("checks", [])
+            drug_data = final_results[department].get("drugs", [])
+            count_data = final_results[department].get("count", 0)
+            score_data = final_results[department].get("score", 0)
+            log_data.append(f"|{department}|{','.join(diesease_data)}|{','.join(check_data)}|{','.join(drug_data)}|{count_data}|{score_data}|")
+           
+        logger.debug("\n"+"\n".join(log_data))
+        
+        #STEP 4: 对于final_results里面的disease,checks和durgs统计出现的次数并且按照次数降序排序
+        logger.info(f"STEP 4 start") 
+        def sort_data(data, count=5):
+            tmp = {}
+            for item in data:
+                if item in tmp.keys():
+                    tmp[item]["count"] +=1
+                else:
+                    tmp[item] = {"count":1}
+            sorted_data = sorted(tmp.items(), key=lambda x:x[1]["count"],reverse=True)
+            return sorted_data[:count]
+        
+        for department in final_results.keys():
+            final_results[department]['name'] = department
+            final_results[department]["diseases"] = sort_data(final_results[department]["diseases"])
+            final_results[department]["checks"] = sort_data(final_results[department]["checks"])
+            final_results[department]["drugs"] = sort_data(final_results[department]["drugs"])
+        
+        #这里把科室做一个排序,按照出现的次数降序排序
+        sorted_final_results = sorted(final_results.items(), key=lambda x:x[1]["count"],reverse=True)
+        
+        logger.info(f"STEP 4 finished")  
+        #这里输出markdown日志
+        log_data = ["|department|disease|check|drug|count|score"]
+        log_data.append("|--|--|--|--|--|--|")
+        for department in final_results.keys():
+            diesease_data = final_results[department].get("diseases")
+            check_data = final_results[department].get("checks")
+            drug_data = final_results[department].get("drugs")
+            count_data = final_results[department].get("count", 0)
+            score_data = final_results[department].get("score", 0)
+            log_data.append(f"|{department}|{diesease_data}|{check_data}|{drug_data}|{count_data}|{score_data}|")
+           
+        logger.debug("\n"+"\n".join(log_data))
+        #STEP 5: 对于final_results里面的diseases, checks和durgs统计全局出现的次数并且按照次数降序排序
+        logger.info(f"STEP 5 start") 
+        checks = {}
+        drugs = {}
+        diags = {}
+        total_check = 0
+        total_drug = 0
+        total_diags = 0
+        for department in final_results.keys():
+            #这里是提取了科室出现的概率,对于缺省的科室设置了0.1
+            #对于疾病来说用疾病在科室中出现的次数乘以科室出现的概率作为分数
+            department_factor = 0.1 if department == 'DEFAULT' else final_results[department]["score"]
+            for disease, data in final_results[department]["diseases"]:
+                total_diags += 1
+                if disease in diags.keys():
+                    diags[disease]["count"] += data["count"] 
+                    diags[disease]["score"] += data["count"] * department_factor
+                else:
+                    diags[disease] = {"count":data["count"], "score":data["count"] * department_factor}
+            #对于检查和药品直接累加出现的次数
+            for check, data in final_results[department]["checks"]:
+                total_check += 1
+                if check in checks.keys():
+                    checks[check]["count"] += data["count"]
+                else:
+                    checks[check] = {"count":data["count"]}
+            for drug, data in final_results[department]["drugs"]:
+                total_drug += 1
+                if drug in drugs.keys():
+                    drugs[drug]["count"] += data["count"]
+                else:
+                    drugs[drug] = {"count":data["count"]}
+        
+        sorted_diags = sorted(diags.items(), key=lambda x:x[1]["score"],reverse=True)
+        sorted_checks = sorted(checks.items(), key=lambda x:x[1]["count"],reverse=True)
+        sorted_drugs = sorted(drugs.items(), key=lambda x:x[1]["count"],reverse=True)
+        logger.info(f"STEP 5 finished")  
+        #这里输出markdown日志
+        log_data = ["|department|disease|check|drug|count|score"]
+        log_data.append("|--|--|--|--|--|--|")
+        for department in final_results.keys():
+            diesease_data = final_results[department].get("diseases")
+            check_data = final_results[department].get("checks")
+            drug_data = final_results[department].get("drugs")
+            count_data = final_results[department].get("count", 0)
+            score_data = final_results[department].get("score", 0)
+            log_data.append(f"|{department}|{diesease_data}|{check_data}|{drug_data}|{count_data}|{score_data}|")
+           
+        logger.debug("这里是经过排序的数据\n"+"\n".join(log_data))
+        #STEP 6: 整合数据并返回
+            # if "department" in item.keys():
+            #     final_results["department"] = list(set(final_results["department"]+item["department"]))
+            # if "diseases" in item.keys():
+            #     final_results["diseases"] = list(set(final_results["diseases"]+item["diseases"]))
+            # if "checks" in item.keys():
+            #     final_results["checks"] = list(set(final_results["checks"]+item["checks"]))
+            # if "drugs" in item.keys():
+            #     final_results["drugs"] = list(set(final_results["drugs"]+item["drugs"]))
+            # if "symptoms" in item.keys():
+            #     final_results["symptoms"] = list(set(final_results["symptoms"]+item["symptoms"]))
+        return {"details":sorted_final_results, 
+                "diags":sorted_diags, "total_diags":total_diags,
+                "checks":sorted_checks, "drugs":sorted_drugs, 
+                "total_checks":total_check, "total_drugs":total_drug}

+ 72 - 0
agent/cdss/models/schemas.py

@@ -0,0 +1,72 @@
+
+from typing import List, Optional, Dict, Union
+
+class CDSSInt:
+    type: str
+    value: Union[int, float]
+    def __init__(self, type:str='nubmer' , value: Union[int,float]=0):
+        self.type = type
+        self.value = value
+    def __str__(self):
+        return f"{self.type}:{self.value}"
+    def value(self):
+        return self.value
+    
+class CDSSText:
+    type: str
+    value: Union[str, List[str]]
+    def __init__(self, type:str='text', value: Union[str, List[str]]=""):
+        self.type = type
+        self.value = value
+    def __str__(self):
+        if isinstance(self.value, str):
+            return f"{self.type}:{self.value}"
+        return f"{self.type}:{','.join(self.value)}"
+    def value(self):
+        return self.value    
+
+class CDSSDict:
+    type: str
+    value: Dict
+    def __init__(self, type:str='dict', value: Dict={}):
+        self.type = type
+        self.value = value
+    def __str__(self):
+        return f"{self.type}:{self.value}"
+    def value(self):
+        return self.value
+
+# pat_name:患者名字,字符串,如"张三",无患者信息输出""
+# pat_sex:患者性别,字符串,如"男",无患者信息输出""
+# pat_age:患者年龄,数字,单位为年,如25岁,输出25,无年龄信息输出0
+# clinical_department:就诊科室,字符串,如"呼吸内科",无就诊科室信息输出""
+# chief_complaint:主诉,字符串列表,包括主要症状的列表,如["胸痛","发热"],无主诉输出[]
+# present_illness:现病史,字符串列表,包括症状发展过程、诱因、伴随症状(如疼痛性质、放射部位、缓解方式,无现病史信息输出[]
+# past_medical_history:既往病史,字符串列表,包括疾病史(如高血压、糖尿病)、手术史、药物过敏史、家族史等,无现病史信息输出[]
+# physical_examination:体格检查,字符串列表,如生命体征(血压、心率)、心肺腹部体征、实验室/影像学结果(如心电图异常、肌钙蛋白升高),无信息输出[]
+# lab_and_imaging:检验与检查,字符串列表,包括血常规、生化指标、心电图(ECG)、胸部X光、CT等检查项目,结果和报告等,无信息输出[]
+class CDSSInput:
+    pat_age: CDSSInt = CDSSInt(type='year', value=0)
+    pat_sex: CDSSInt= CDSSInt(type='sex', value=0)
+    values: List[CDSSText] 
+    
+    def __init__(self, **kwargs):
+        #提取kwargs中的所有字段,并将它们添加到类的属性中。这样,在创建子类时,就可以直接使用这些字段了。
+        values = []
+        for key, value in kwargs.items():
+            #如果key的属性已经存在,则将value设置为该属性的值。否则,将value添加到values列表中。            
+            if hasattr(self, key):
+                setattr(self, key, value)
+            else:
+                values.append(CDSSText(key, value))
+        setattr(self, 'values', values)
+    def get_value(self, key)->CDSSText:
+        for value in self.values:
+            if value.type == key:
+                return value.value
+        
+class CDSSOutput:
+    departments: CDSSDict = CDSSDict(type='departments', value={})
+    diagnosis: CDSSDict = CDSSDict(type='diagnosis', value={})
+    checks: CDSSDict = CDSSDict(type='checks', value={})
+    drugs: CDSSDict = CDSSDict(type='drugs', value={})

+ 25 - 0
agent/db/database.py

@@ -0,0 +1,25 @@
+from sqlalchemy import create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+from config.site import SiteConfig
+
+config = SiteConfig()
+
+POSTGRESQL_HOST = config.get_config("POSTGRESQL_HOST")
+POSTGRESQL_DATABASE = config.get_config("POSTGRESQL_DATABASE")
+POSTGRESQL_USER = config.get_config("POSTGRESQL_USER")
+POSTGRESQL_PASSWORD = config.get_config("POSTGRESQL_PASSWORD")
+
+DATABASE_URL = f"postgresql+psycopg2://{POSTGRESQL_USER}:{POSTGRESQL_PASSWORD}@{POSTGRESQL_HOST}/{POSTGRESQL_DATABASE}"
+
+engine = create_engine(DATABASE_URL)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+Base = declarative_base()
+
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()

+ 87 - 0
agent/db/database.sql

@@ -0,0 +1,87 @@
+-- public.job_queues definition
+
+-- Drop table
+
+-- DROP TABLE public.job_queues;
+
+CREATE TABLE public.job_queues (
+	id serial4 NOT NULL,
+	queue_category varchar(64) NOT NULL,
+	queue_name varchar(64) NOT NULL,
+	status int4 NULL,
+	CONSTRAINT job_queues_pkey PRIMARY KEY (id)
+);
+
+
+-- public.jobs definition
+
+-- Drop table
+
+-- DROP TABLE public.jobs;
+
+CREATE TABLE public.jobs (
+	id serial4 NOT NULL,
+	job_category varchar(64) NOT NULL,
+	job_name varchar(64) NULL,
+	job_details text NOT NULL,
+	job_creator varchar(64) NOT NULL,
+	job_logs text NULL,
+	job_files varchar(300) NULL,
+	created timestamp NOT NULL,
+	updated timestamp NOT NULL,
+	status int4 NULL,
+	executor varchar(64) NULL,
+	CONSTRAINT jobs_pkey PRIMARY KEY (id)
+);
+
+
+-- public.users definition
+
+-- Drop table
+
+-- DROP TABLE public.users;
+
+CREATE TABLE public.users (
+	id int4 DEFAULT nextval('user_id_seq'::regclass) NOT NULL,
+	username varchar(32) NOT NULL,
+	full_name varchar(64) NOT NULL,
+	email varchar(100) NOT NULL,
+	hashed_password varchar(64) NOT NULL,
+	status int4 DEFAULT 0 NOT NULL
+);
+CREATE UNIQUE INDEX users_id_idx ON public.users USING btree (id);
+
+
+-- public.queue_job definition
+
+-- Drop table
+
+-- DROP TABLE public.queue_job;
+
+CREATE TABLE public.queue_job (
+	id serial4 NOT NULL,
+	queue_id int4 NOT NULL,
+	job_id int4 NOT NULL,
+	CONSTRAINT queue_job_pkey PRIMARY KEY (id),
+	CONSTRAINT queue_job_job_id_fkey FOREIGN KEY (job_id) REFERENCES public.jobs(id),
+	CONSTRAINT queue_job_queue_id_fkey FOREIGN KEY (queue_id) REFERENCES public.job_queues(id)
+);
+
+
+-- public.sessions definition
+
+-- Drop table
+
+-- DROP TABLE public.sessions;
+
+CREATE TABLE public.sessions (
+	id serial4 NOT NULL,
+	user_id int4 NULL,
+	session_id varchar(64) NOT NULL,
+	created timestamp NULL,
+	updated timestamp NULL,
+	username varchar(32) NULL,
+	full_name varchar(64) NULL,
+	CONSTRAINT sessions_pkey PRIMARY KEY (id),
+	CONSTRAINT sessions_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id)
+);

+ 39 - 0
agent/docs/update0411.txt

@@ -0,0 +1,39 @@
+本次更新主要是添加了加载更新数据的功能
+1、加载更新数据可以加载少量的节点和边的数据,用于修改原图
+2、可以适用于实施场景,需要对图进行适应性修改的情况
+
+涉及的代码包括
+
+1 .env
+
+主要增加了环境变量配置 UPDATE_DATA_PATH 和 FACTOR_DATA_PATH
+UPDATE_DATA_PATH是保存了更新数据的目录,其中文件名为 ent*.json格式的是节点更新文件, rel*.json格式的是边的更新文件
+更新文件的格式同原有数据文件相同,更新时候增量数据将会和原有数据合并
+比如原有节点有属性 a,b,c,更新文件里面有属性d,那么更新后节点就有属性a,b,c,d了(属性不会删除)
+
+2. cdss_helper.py
+
+增加了 load_local_data方法,以及其所需要的其他函数
+增加了logger的功能,日志可以打印到日志文件去
+因为这些修改,所以修改了CDSSCapbility文件
+
+3. capbility.py
+
+在类CDSSCapability的构造函数中增加了
+        self.cdss_helper.load_local_data()
+用于加载更新文件
+此外这个类也引入了logger的功能
+
+4. test.py
+
+主要增加了logger的功能以及对于日志文件的初始化功能
+
+
+5. graph_helper.py
+
+这个类里面在方法build_graph之中,初始化图的代码从
+self.graph = nx.Graph()
+变更为
+self.graph = nx.DiGraph()
+
+主要是支持有向图的功能,留到以后扩展

+ 26 - 0
agent/docs/update0414.txt

@@ -0,0 +1,26 @@
+本次更新主要是将节点名称从文本更换为id,以适应一个文本,如腹痛有多个同名节点的情况
+
+1、修改了command\dump_graph_data.py,导出时候使用了id+name的方式
+2、修改了agent\cdss\libs\cdss_helper.py代码,以适应新的id检索方式,主要是集成了pandas的数据处理能力
+3、修改了agent\cdss\models\schemas.py,对于CDSSOutput增加了departments
+4、修改了agent\cdss\capbility.py代码,把load_local_data的工作放到cdss_helper里面了,另外也适配了CDSSOutput
+
+遗留问题
+1、对于增补数据,环境变量配置 UPDATE_DATA_PATH 和 FACTOR_DATA_PATH仍然是有效的,数据文件的格式发生了变化,需要和导出的格式相同
+2、在数据加载的时候,只加载了部分数据,cdss_helper代码里面可以看到,所以如果要获取详细数据目前是不支持的(如病因,费用等)
+3、在cdss_helper.cdss_travel中step2的性能较差,需要做缓存,代码里面的TODO位置提示了缓存的地方
+
+增补数据的测试
+
+[
+    [
+        1291243,
+        {
+            "name": "好急的胃肠炎"
+        }
+    ]
+]
+
+将上面的json串写入一个文件 ent_01.json, 必须是ent开头,json结尾的文件名。
+然后将这个文件放到UPDATE_DATA_PATH里面,这样加载数据时候会自动被加载到,可以对已经加载的节点进行数据更新,但是不支持新增
+同样的如果要更新边的数据,可以按照relation的格式写入rel_01.json文件,需要是rel开头,json结尾,也放到UPDATE_DATA_PATH里面就可以了

+ 13 - 0
agent/init_db.py

@@ -0,0 +1,13 @@
+    
+if __name__ == "__main__":
+    import sys,os
+    current_path = os.getcwd()
+    sys.path.append(current_path)
+    print("create table start")
+    from db.database import engine
+    from models.db.agent import Base, Job,JobQueue,QueueJob
+    from models.db.user import User,Session
+    Base.metadata.create_all(engine)
+    print("create table finished")
+    
+    

+ 292 - 0
agent/libs/agent.py

@@ -0,0 +1,292 @@
+
+import uuid
+import os
+import subprocess
+import logging
+logger = logging.getLogger(__name__)
+
+from datetime import datetime
+
+from agent.models.db.agent import Job,JobQueue,QueueJob
+from sqlalchemy.orm import load_only
+class Task:
+    def __init__(self, script_path, log_file, args):
+        self.task_id = str(uuid.uuid4())
+        self.script_path = script_path
+        if isinstance(args, str):
+            args = args.split()
+        self.args = args
+        self.status = "pending"
+        self.start_time = datetime.now()
+        self.end_time = None
+        self.log_file = log_file
+        self.thread = None
+    def check(self):
+        safe_dir = os.path.abspath("scripts")
+        requested_path = os.path.abspath(self.script_path)
+        if not requested_path.startswith(safe_dir):
+            logger.error(f"脚本路径不合法:{requested_path}")
+            return False
+        if not os.path.exists(requested_path):
+            logger.error(f"脚本文件不存在:{requested_path}")
+            return False
+        if not os.path.isfile(requested_path):
+            logger.error(f"脚本路径不是文件:{requested_path}")
+            return False 
+        return True
+    def execute(self):
+        self.status = "running"
+        try:
+            with open(self.log_file, 'w', encoding="utf-8") as log:
+                # 添加参数支持
+                process = subprocess.Popen(
+                    ['python', self.script_path] + self.args,
+                    stdout=log,
+                    stderr=subprocess.STDOUT
+                )
+                return_code = process.wait(timeout=3600)
+                self.status = f"success:{return_code}" if return_code == 0 else f"error:{return_code}"
+        except subprocess.TimeoutExpired:
+            self.status = "error:timeout"
+        except Exception as e:
+            self.status = f"error:{str(e)}"
+        finally:
+            self.end_time = datetime.now()
+     
+
+            
+class AgentBusiness:
+    JOB_STATUS_READY = 0
+    JOB_STATUS_RUNNING = 1
+    JOB_STATUS_FINISHED = 2
+    JOB_STATUS_ERROR = 3
+    JOB_STATUS_CANCELED = 4
+    JOB_STATUS_WAITING = 5
+    JOB_STATUS_PAUSED = 6
+    JOB_STATUS_RESUMED = 7
+    JOB_STATUS_RESTARTED = 8
+    JOB_STATUS_SKIPPED = 9
+    JOB_STATUS_RETRYING = 10
+    def __init__(self, db):
+        self.db = db
+    def create_job(self, **kwargs):
+        logger.info(f"create job: {kwargs}")
+        job = Job()
+        job.job_category = "DEFAULT"
+        job.created = datetime.now()
+        job.updated = datetime.now()
+        job.status = self.JOB_STATUS_READY
+        self.append_job_logs(job, f"job created")
+        try:
+            for key, value in kwargs.items():
+                if hasattr(job, key):
+                    setattr(job, key, value)
+            self.db.add(job)
+            self.db.commit()
+            self.db.refresh(job)
+            return job
+        except Exception as e:
+            self.db.rollback()
+            logger.error(f"create job error: {e}")
+        return None
+    
+    def create_queue(self, **kwargs):
+        logger.info(f"create queue: {kwargs}")
+        queue = JobQueue()
+        queue.queue_name = "DEFAULT"
+        queue.queue_category = "DEFAULT"
+        try:
+            for key, value in kwargs.items():
+                if hasattr(queue, key):
+                    setattr(queue, key, value)
+            self.db.add(queue)
+            self.db.commit()
+            self.db.refresh(queue)
+            return queue
+        except Exception as e:
+            self.db.rollback()
+            logger.error(f"create queue error: {e}")
+        return None
+    
+    def get_queue(self, queue_category, queue_name, create_if_not_exist=False):
+        if queue_category is None or queue_name is None:
+            return None
+        if queue_category == "" or queue_name == "":
+            return None
+        logger.info(f"get queue: {queue_name}")
+        queue = self.db.query(JobQueue).filter(JobQueue.queue_name == queue_name, 
+                                               JobQueue.queue_category==queue_category).first()
+        if queue:
+            return queue
+        if create_if_not_exist:
+            return self.create_queue(queue_name=queue_name, queue_category=queue_category)
+        return None
+    
+    def get_queues_summary(self):
+        logger.info(f"get get_queues_summary")
+        queue_data = []
+        queues = self.db.query(JobQueue).filter(JobQueue.status == 0).all()
+        for queue in queues:
+            job_count = self.db.query(QueueJob).filter(QueueJob.queue_id == queue.id).count()
+            job_finished_count = self.db.query(QueueJob).join(Job, QueueJob.job_id==Job.id).filter(QueueJob.queue_id == queue.id, Job.status == self.JOB_STATUS_FINISHED).count()
+            queue_data.append({"queue_name": queue.queue_name, 
+                               "queue_category": queue.queue_category, 
+                               "job_count": job_count,
+                               "job_finished_count": job_finished_count})
+        return queue_data
+    
+    def get_job(self, job_id):
+        logger.info(f"get job: {job_id}")
+        job = self.db.query(Job).filter(Job.id == job_id).first()
+        if job:
+            return job
+        return None
+    def get_job_status(self, job_id):
+        logger.info(f"get job status: {job_id}")
+        job = self.db.query(Job).filter(Job.id == job_id).options(load_only(Job.status)).first()
+        if job:
+            return job.status
+        return -1
+    def append_job_logs(self, job:Job, logs:str):
+        dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        if job:
+            job.job_logs = job.job_logs + f"{dt}: {logs}\n" if job.job_logs else f"{dt}: {logs}\n"
+        return job
+    def put_job(self, queue:JobQueue, job:Job):
+        logger.info(f"put job: {queue.queue_name} {job.id}")
+        queue_job = self.db.query(QueueJob).filter(QueueJob.job_id == job.id).first()
+        if queue_job:
+            pass
+        else:
+            queue_job = QueueJob()
+        queue_job.queue_id = queue.id
+        queue_job.job_id = job.id
+        job.job_category = queue.queue_category + "_" + queue.queue_name
+        job = self.append_job_logs(job, f"put job to queue: {queue.queue_category}_{queue.queue_name}")
+        job.status = self.JOB_STATUS_READY
+        try:
+            self.db.add(queue_job)
+            self.db.add(job)
+            self.db.commit()
+            self.db.refresh(queue_job)
+            return queue_job
+        except Exception as e:
+            self.db.rollback()
+            logger.error(f"put job error: {e}")
+        return None
+    
+    def delete_job_in_any_queue(self, job:Job):
+        logger.info(f"delete job in any queue: {job.id}")
+        queue_job = self.db.query(QueueJob).filter(QueueJob.job_id == job.id).first()
+        if queue_job:
+            try:
+                self.db.delete(queue_job)
+                self.db.commit()
+                return True
+            except Exception as e:
+                self.db.rollback()
+    def delete_queue_job(self, queue, job):
+        logger.info(f"delete queue job: {queue.queue_name} {job.id}")
+        queue_job = self.db.query(QueueJob).filter(QueueJob.queue_id == queue.id, QueueJob.job_id == job.id).first()
+        if queue_job:
+            try:
+                self.db.delete(queue_job)
+                self.db.commit()
+                return True
+            except Exception as e:
+                self.db.rollback()
+                logger.error(f"delete queue job error: {e}")
+        return False
+    
+    # def append_job_logs(self, job_id, logs:str):
+    #     logger.info(f"append job logs: {job_id} {logs}")
+    #     job = self.db.query(Job).filter(Job.id == job_id).first()
+    #     if job:
+    #         job.job_logs = job.job_logs + logs + "\n" if job.job_logs else logs + "\n"
+    #         job.updated = datetime.now()
+    #         try:
+    #             self.db.commit()
+    #             self.db.refresh(job)
+    #             return job  
+    #         except Exception as e:
+    #             self.db.rollback()
+    #             logger.error(f"append job logs error: {e}")
+    #     return None 
+    def update_job(self, job_id, **kwargs):
+        logger.info(f"update job: {job_id} ")
+        job = self.db.query(Job).filter(Job.id == job_id).first()
+        if job:
+            for key, value in kwargs.items():
+                if hasattr(job, key):
+                    setattr(job, key, value)
+            job.updated = datetime.now()
+            try:
+                self.db.commit()
+                self.db.refresh(job)
+                return job
+            except Exception as e:
+                self.db.rollback()
+                logger.error(f"update job error: {e}")
+
+        return None
+    
+    def delete_job(self, job_id):
+        logger.info(f"delete job: {job_id}")
+        job = self.db.query(Job).filter(Job.id == job_id).first()      
+        if job:
+            try:
+                self.db.delete(job)
+                self.db.commit()
+                return True
+            except Exception as e:
+                self.db.rollback()
+                logger.error(f"delete job error: {e}") 
+        return False     
+
+    def get_jobs(self, job_category, job_creator, limit=50, offset=0):
+        logger.info(f"get jobs: {job_category} {job_creator} {limit} {offset}")
+        if job_category == 'SYSTEM_DEFAULT':
+            jobs = self.db.query(Job)\
+               .options(load_only(Job.id, Job.job_name, Job.job_category, Job.job_creator, Job.executor, Job.status, Job.created, Job.updated))\
+                    .limit(limit).offset(offset).all()
+            if jobs:
+                return jobs
+            return None
+        jobs = self.db.query(Job)\
+            .options(load_only(Job.id, Job.job_name, Job.job_category, Job.job_creator, Job.executor, Job.status, Job.created, Job.updated))\
+                .filter(Job.job_category == job_category, Job.job_creator==job_creator)\
+                    .limit(limit).offset(offset).all()
+        if jobs:
+            return jobs
+        return None
+    def get_job_queue(self, job_id):
+        logger.info(f"get job queue: {job_id}")
+        queue_job = self.db.query(QueueJob).filter(QueueJob.job_id == job_id).first()
+        if queue_job:
+            return queue_job
+        return None
+    def get_queue_jobs_count(self, queue):
+        logger.info(f"get queue jobs count: {queue.queue_name}")
+        count = self.db.query(QueueJob).filter(QueueJob.queue_id == queue.id).count()
+        logger.info(f"get queue jobs count: {count}")
+        return count
+    def get_queue_jobs(self, queue, limit=50, offset=0):
+        logger.info(f"get queue jobs: {queue.queue_name} {limit} {offset}")
+        #根据QueueJob的queue_id和job_id获取Job
+        # jobs = self.db.query(Job)\
+        if queue.queue_name == 'DEFAULT' and queue.queue_category == 'SYSTEM':
+            jobs = self.db.query(Job)\
+               .options(load_only(Job.id, Job.job_name, Job.job_category, Job.job_creator, Job.executor, Job.status, Job.created, Job.updated))\
+                   .limit(limit).offset(offset).all()
+            logger.info(f"get queue jobs: {len(jobs)}")
+            return jobs
+        jobs = self.db.query(Job)\
+            .options(load_only(Job.id, Job.job_name, Job.job_category, Job.job_creator, Job.executor, Job.status, Job.created, Job.updated))\
+                .join(QueueJob, Job.id == QueueJob.job_id).\
+                    filter(QueueJob.queue_id == queue.id).\
+                        order_by(Job.id.asc()).\
+                            limit(limit).offset(offset).all()
+             
+        logger.info(f"get queue jobs: {len(jobs)}")
+        return jobs
+        

+ 59 - 0
agent/libs/auth.py

@@ -0,0 +1,59 @@
+from fastapi import Depends, HTTPException, Request, status
+from agent.db.database import SessionLocal
+from agent.libs.user import SessionBusiness, UserBusiness
+
+class SessionValues:
+    def __init__(self, session_id: str, user_id: str, username: str, full_name: str):
+        self.session_id = session_id
+        self.user_id = user_id
+        self.username = username
+        self.full_name = full_name
+        
+        
+def verify_session_id(request: Request)-> SessionValues:
+    # 获取 Authorization 头
+    with SessionLocal() as db:
+        session_business = SessionBusiness(db)
+        user_business = UserBusiness(db)
+        auth_header = request.headers.get("Authorization")
+        if not auth_header:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Authorization header is missing",
+                headers={"WWW-Authenticate": "Beaver"}
+            )
+
+        # 检查 Authorization 头是否符合预期格式
+        if not auth_header.startswith("Beaver "):
+            print("Invalid Authorization header format", auth_header)
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Invalid Authorization header format",
+                headers={"WWW-Authenticate": "Beaver"}
+            )
+
+        # 提取 session_id
+        session_user_id = auth_header.split(" ")[1]
+        session_id = auth_header.split(" ")[2]
+
+        # 在这里添加你的 session_id 校验逻辑
+        # 例如,检查 session_id 是否在数据库中存在
+        if not session_business.validate_session(session_user_id, session_id):
+            print("Invalid session_id", session_user_id, session_id)
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Invalid session_id",
+                headers={"WWW-Authenticate": "Beaver"}
+            )
+
+        user = user_business.get_user_by_username(session_user_id)
+        if user is None:
+            print("Invalid user_id", session_user_id)
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Invalid username",
+                headers={"WWW-Authenticate": "Beaver"}
+            )
+        return SessionValues(session_id, user.id, user.username, user.full_name)
+    # 如果校验通过,返回 session_id 或其他需要的信息
+    return None

+ 125 - 0
agent/libs/graph.py

@@ -0,0 +1,125 @@
+import uuid
+import os
+import subprocess
+import logging
+logger = logging.getLogger(__name__)
+
+from datetime import datetime
+
+from agent.models.db.graph import DbKgGraphs,DbKgNode,DbKgEdge,DbKgProp,DbKgEdgeProp
+from sqlalchemy.orm import load_only
+
+class GraphBusiness:
+    def __init__(self, db):
+        self.db = db
+        self.graphs = {}
+
+    def create_graph(self, category:str, name: str, description: str, graph_settings:str):
+        graph = DbKgGraphs(category=category,name=name, description=description, graph_settings=graph_settings)
+        self.db.add(graph)
+        self.db.commit()
+        self.db.refresh(graph)
+        return graph
+    def get_graph(self, graph_id: int):
+        graph = self.db.query(DbKgGraphs).filter(DbKgGraphs.id == graph_id).first()
+        return graph
+    
+    def create_node(self, graph_id: int, name: str, category: str, props: dict):
+        node = self.db.query(DbKgNode).filter(DbKgNode.graph_id == graph_id, DbKgNode.name == name, DbKgNode.category == category).first()
+        if node:
+            return node
+        node = DbKgNode(graph_id=graph_id, name=name, category=category)
+        self.db.add(node)
+        self.db.commit()
+        self.db.refresh(node)
+        for key, value in props.items():
+            self.create_node_prop(category=1, ref_id=node.id, prop_name=key, prop_value=value, prop_title=key)
+        self.db.commit()
+        return node
+
+    def create_edge(self, graph_id: int, src_id: int, dest_id: int, category: str, name: str, props: dict):
+        edge = self.db.query(DbKgEdge).filter(DbKgEdge.graph_id == graph_id, DbKgEdge.src_id == src_id, DbKgEdge.dest_id == dest_id, DbKgEdge.category == category, DbKgEdge.name == name).first()
+        if edge:
+            for key, value in props.items():
+                prop = self.db.query(DbKgEdgeProp).filter(DbKgEdgeProp.ref_id == edge.id, DbKgEdgeProp.prop_name == key).first()
+                if prop:
+                    continue
+                self.create_edge_prop(category=1, ref_id=edge.id, prop_name=key, prop_value=value, prop_title=key)
+            return edge
+        edge = DbKgEdge(graph_id=graph_id, src_id=src_id, dest_id=dest_id, category=category, name=name)
+        self.db.add(edge)
+        self.db.commit()
+        self.db.refresh(edge)
+        for key, value in props.items():
+            self.create_edge_prop(category=1, ref_id=edge.id, prop_name=key, prop_value=value, prop_title=key)
+        self.db.commit()
+        return edge
+        
+    def create_node_prop(self, category:str, ref_id: int, prop_name: str, prop_value: str, prop_title:str, commit=False):
+        prop = DbKgProp(category=category, ref_id=ref_id, prop_name=prop_name, prop_value=prop_value, prop_title=prop_title)
+        self.db.add(prop)  
+        if commit:
+            self.db.commit()
+            self.db.refresh(prop)
+        return prop
+    def create_edge_prop(self, category:str, ref_id: int, prop_name: str, prop_value: str, prop_title:str, commit=False):
+        prop = DbKgEdgeProp(category=category, ref_id=ref_id, prop_name=prop_name, prop_value=prop_value, prop_title=prop_title)
+        self.db.add(prop)  
+        if commit:
+            self.db.commit()
+            self.db.refresh(prop)
+        return prop
+    
+    def search_like_node_by_name(self, name: str, category: str, graph_id: int):
+        if name == "":
+            name = "%"
+        if category == "" or category == "any":
+            nodes = self.db.query(DbKgNode).filter(DbKgNode.name.like(f"%{name}%"), DbKgNode.graph_id == graph_id).limit(100).all()
+            return nodes
+        nodes = self.db.query(DbKgNode)\
+            .filter(DbKgNode.name.like(f"%{name}%"), DbKgNode.category == category, DbKgNode.graph_id == graph_id).limit(100).all()
+        return nodes
+    
+    def get_nodes_by_page(self, graph_id: int, page: int, page_size: int):
+        nodes = self.db.query(DbKgNode).filter(DbKgNode.graph_id == graph_id).limit(page_size).offset((page - 1) * page_size).all()
+        return nodes
+    
+    def get_nodes_count(self, graph_id: int):
+        count = self.db.query(DbKgNode).filter(DbKgNode.graph_id == graph_id).count()
+        return count
+    
+    def get_neighbors(self, graph_id: int, node_id: int, direction: str):
+        nodes = []
+        if direction == "in":
+            edges = self.db.query(DbKgEdge).filter(DbKgEdge.graph_id == graph_id, DbKgEdge.dest_id == node_id).limit(100).all()
+            for edge in edges:
+                nodes.append(edge.src_node)
+        elif direction == "out":
+            edges = self.db.query(DbKgEdge).filter(DbKgEdge.graph_id == graph_id, DbKgEdge.src_id == node_id).limit(100).all()
+            for edge in edges:
+                nodes.append(edge.dest_node)
+        return nodes
+    
+    def get_node_by_id(self, graph_id: int, node_id: int):
+        node = self.db.query(DbKgNode).filter(DbKgNode.graph_id == graph_id, DbKgNode.id == node_id).first()
+        return node
+    
+    def get_graph_summary(self, graph_id: int):
+        nodes_count = self.db.query(DbKgNode).filter(DbKgNode.graph_id == graph_id).count()
+        edges_count = self.db.query(DbKgEdge).filter(DbKgEdge.graph_id == graph_id).count()
+        nodes_categorys = self.db.query(DbKgNode.category).filter(DbKgNode.graph_id == graph_id).distinct().all()
+        edges_categorys = self.db.query(DbKgEdge.category).filter(DbKgEdge.graph_id == graph_id).distinct().all()
+        nodes_categorys = [category[0] for category in nodes_categorys]
+        edges_categorys = [category[0] for category in edges_categorys]
+        return {"nodes_count": nodes_count, 
+                "edges_count": edges_count,
+                "nodes_categories": nodes_categorys,
+                "edges_categories": edges_categorys}
+    def get_nodes_categories(self, graph_id: int):
+        nodes_categorys = self.db.query(DbKgNode.category).filter(DbKgNode.graph_id == graph_id).distinct().all()
+        nodes_categorys = [{'name':category[0]} for category in nodes_categorys]
+        return nodes_categorys
+    def get_edges_categories(self, graph_id: int):
+        edges_categorys = self.db.query(DbKgEdge.category).filter(DbKgEdge.graph_id == graph_id).distinct().all()
+        edges_categorys = [{'name':category[0]} for category in edges_categorys]
+        return edges_categorys

+ 32 - 0
agent/libs/response.py

@@ -0,0 +1,32 @@
+from fastapi import status
+from fastapi.responses import JSONResponse, Response, ORJSONResponse  # , ORJSONResponse
+from typing import Union
+from datetime import datetime
+from json import JSONEncoder
+
+SUCCESS_CODE = 200
+SUCCESS_MESSAGE = "Operation successful"
+
+    
+# 注意有个 * 号 不是笔误, 意思是调用的时候要指定参数 e.g.resp_200(data=xxxx)
+def resp_200(*, data: Union[list, dict, str]) -> Response:
+    return ORJSONResponse(
+        status_code=status.HTTP_200_OK,
+        content={
+            'code': 200,
+            'message': SUCCESS_MESSAGE,
+            'data': data,
+        }
+    )
+    
+def resp_400(*, data: str = None, message: str="BAD REQUEST") -> Response:
+    return ORJSONResponse(
+        status_code=status.HTTP_200_OK,
+        content={
+            'code': 400,
+            'message': message,
+            'data': data,
+        }
+    )
+
+

+ 96 - 0
agent/libs/user.py

@@ -0,0 +1,96 @@
+import uuid
+import logging
+import hashlib
+def hash_pwd(password):
+    return hashlib.sha256(password.encode()).hexdigest()
+
+logger = logging.getLogger(__name__)
+
+from datetime import datetime,timedelta
+
+from agent.models.db.user import User,Session
+
+
+class UserBusiness:
+    def __init__(self, db):
+        self.db = db
+        
+    def get_user(self, user_id):
+        return self.db.query(User).filter(User.id == user_id).first()
+    
+    def get_user_by_username(self, username):
+        return self.db.query(User).filter(User.username == username).first()
+    def create_user(self, username, password, fullname, email=""):
+        password = hash_pwd(password)
+        user = User(username=username, hashed_password=password, full_name=fullname, email=email)
+        self.db.add(user)
+        self.db.commit()
+        self.db.refresh(user)
+        return user
+    def update_user(self, user_id, username=None, password=None):
+        user = self.get_user(user_id)
+        if user:
+            if username:
+                user.username = username
+            if password:
+                password = hash_pwd(password)
+                user.password = password
+            self.db.commit()
+            self.db.refresh(user)
+        return user
+    def delete_user(self, user_id):
+        user = self.get_user(user_id)
+        if user:
+            self.db.delete(user)
+            self.db.commit()
+        return user
+    def verify_password(self, request_password, user_hashed_password):
+        hashed_password =  hash_pwd(request_password)
+        logger.info(f"verify password: {hashed_password} == {user_hashed_password}")
+        return hashed_password == user_hashed_password
+    
+class SessionBusiness:
+    def __init__(self, db):
+        self.db = db
+    def create_session(self, user:User):
+        session_id = str(uuid.uuid4())
+        session = Session(session_id=session_id, user_id=user.id, username=user.username, full_name=user.full_name)
+        self.db.add(session)
+        self.db.commit()
+        self.db.refresh(session)
+        return session
+    def get_session(self, session_id):
+        return self.db.query(Session).filter(Session.session_id == session_id).first()
+    def delete_session(self, session_id):
+        session = self.get_session(session_id)
+        if session:
+            self.db.delete(session)
+            self.db.commit()
+        return True
+    def update_session(self, session_id):
+        session = self.get_session(session_id)
+        if session:
+            session.updated = datetime.now()
+            self.db.commit()
+            self.db.refresh(session)
+    def get_session_by_user_id(self, user_id):
+        return self.db.query(Session).filter(Session.user_id == user_id).first()
+    
+    def validate_session(self, username, session_id):
+
+        session = self.get_session(session_id)
+        if session:
+            if session.username != username:
+                return None
+            expired:timedelta = datetime.now() - session.updated
+            
+            if expired.seconds > 1800 : # 30 minutes
+                logger.info(f"session expired: {session_id}")
+                #self.delete_session(session_id)
+                return None
+            else:
+                self.update_session(session_id)
+                return session
+    
+if __name__ == "__main__":
+    print("hello world")

+ 6 - 0
agent/main.py

@@ -0,0 +1,6 @@
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app='server:app', host="0.0.0.0", port=8000, reload=True)

+ 39 - 0
agent/models/db/agent.py

@@ -0,0 +1,39 @@
+
+
+from sqlalchemy import create_engine, Column, Integer, String, MetaData,DateTime,Text,ForeignKey
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+from agent.db.database import Base
+
+
+
+
+class Job(Base):
+    __tablename__ = 'jobs'
+    id = Column(Integer, primary_key=True)
+    job_category = Column(String(64), nullable=False)
+    job_name = Column(String(64))
+    job_details = Column(Text, nullable=False)
+    job_creator = Column(String(64), nullable=False)
+    job_logs = Column(Text, nullable=True)
+    job_files = Column(String(300), nullable=True)
+    executor = Column(String(64), nullable=True)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+    
+    
+
+class JobQueue(Base):
+    __tablename__ = 'job_queues'
+    id = Column(Integer, primary_key=True)
+    queue_category = Column(String(64), nullable=False)
+    queue_name = Column(String(64), nullable=False)
+    status = Column(Integer, default=0)
+    
+
+class QueueJob(Base):
+    __tablename__ = 'queue_job'
+    id = Column(Integer, primary_key=True)
+    queue_id = Column(Integer, ForeignKey('job_queues.id'), nullable=False)
+    job_id = Column(Integer, ForeignKey('jobs.id'), nullable=False)

+ 198 - 0
agent/models/db/graph.py

@@ -0,0 +1,198 @@
+from sqlalchemy import Column, Integer, String, Text, ForeignKey,Float, DateTime
+from sqlalchemy.orm import relationship
+from agent.db.database import Base
+
+
+
+class DbKgProj(Base):
+    __tablename__ = "kg_projs"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    proj_name = Column(String(64), nullable=False)
+    proj_category = Column(String(64), nullable=False)
+    proj_type = Column(String(64), nullable=False)
+    proj_conf = Column(String(300), nullable=False)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+
+
+class DbKgGraphs(Base):
+    __tablename__ = "kg_graphs"
+
+    id = Column(Integer, primary_key=True, index=True)
+    category = Column(String(64), nullable=False)
+    name = Column(String(64), nullable=False)
+    graph_description = Column(String(64), nullable=False)
+    graph_settings = Column(Text, nullable=False)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+
+class DbKgModels(Base):
+    __tablename__ = "kg_models"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    model_category = Column(String(64), nullable=False)
+    model_name = Column(String(64), nullable=False)
+    model_description = Column(String(64), nullable=False)
+    model_settings = Column(Text, nullable=False)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+    
+class DbKgTask(Base):
+    __tablename__ = "kg_tasks"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    proj_id = Column(Integer, ForeignKey('kg_projs.id'), nullable=False)
+    task_category = Column(String(64), nullable=False)
+    task_name = Column(String(32), nullable=False)
+    task_content = Column(Text, nullable=False)
+    task_log = Column(Text, nullable=True)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+    
+class DbKgSubGraph(Base):    
+    __tablename__ = "kg_sub_graph"
+    id = Column(Integer, primary_key=True, index=True)
+    graph_name = Column(String(64), nullable=False)
+    graph_content = Column(Text)
+    status = Column(Integer, default=0)
+
+class DbKgNode(Base):
+    __tablename__ = "kg_nodes"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    graph_id = Column(Integer, default=0)
+    name = Column(String(64), nullable=False)
+    category = Column(String(64), nullable=False)
+    layout = Column(String(100))
+    version = Column(String(16))
+    status = Column(Integer, default=0)
+
+    props = relationship("DbKgProp", cascade = "delete", back_populates="node" ,foreign_keys='DbKgProp.ref_id', primaryjoin="DbKgNode.id==DbKgProp.ref_id")
+
+    
+class DbKgEdge(Base):
+    __tablename__ = "kg_edges"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    graph_id = Column(Integer, default=0)
+    category = Column(String(64), nullable=False)
+    src_id = Column(Integer, ForeignKey('kg_nodes.id'), nullable=False)
+    dest_id = Column(Integer, ForeignKey('kg_nodes.id'), nullable=False)
+    name = Column(String(64), nullable=False)
+    version = Column(String(16))
+    status = Column(Integer, default=0)
+
+    src_node = relationship("DbKgNode",  primaryjoin="DbKgEdge.src_id==DbKgNode.id", lazy="select")
+    dest_node = relationship("DbKgNode",  primaryjoin="DbKgEdge.dest_id==DbKgNode.id", lazy="select")
+    props = relationship("DbKgEdgeProp", cascade = "delete", back_populates="edge" ,foreign_keys='DbKgEdgeProp.ref_id', primaryjoin="DbKgEdge.id==DbKgEdgeProp.ref_id")
+
+
+class DbKgProp(Base):
+    __tablename__ = "kg_props"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    category = Column(Integer, default=0)
+    ref_id = Column(Integer, ForeignKey('kg_nodes.id'), nullable=False)
+    prop_name = Column(String(64), nullable=False)
+    prop_title = Column(String(64), nullable=False)
+    prop_value = Column(Text)
+
+    node = relationship("DbKgNode", back_populates="props", foreign_keys=[ref_id], primaryjoin="DbKgProp.ref_id==DbKgNode.id")
+    #edge = relationship("DbKgEdge", back_populates="props", foreign_keys=[ref_id], primaryjoin="DbKgProp.ref_id==DbKgEdge.id")
+
+
+class DbKgEdgeProp(Base):
+    __tablename__ = "kg_edge_props"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    category = Column(Integer, default=0)
+    ref_id = Column(Integer, ForeignKey('kg_edges.id'), nullable=False)
+    prop_name = Column(String(64), nullable=False)
+    prop_title = Column(String(64), nullable=False)
+    prop_value = Column(Text)
+
+    edge = relationship("DbKgEdge", back_populates="props", foreign_keys=[ref_id], primaryjoin="DbKgEdgeProp.ref_id==DbKgEdge.id")
+    
+class DbDictICD(Base):
+    __tablename__ = "dict_icd10"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    icd_code = Column(String(50), nullable=False)
+    icd_name = Column(String(150), nullable=False)
+    
+class DbDictDRG(Base):
+    __tablename__ = "dict_drg"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    drg_code = Column(String(64), nullable=False)
+    drg_name = Column(String(128), nullable=False)
+    drg_weight = Column(Float(128), nullable=False)
+    
+
+class DbDictDrug(Base):
+    __tablename__ = "dict_social_drug"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    data_source = Column(String(10), nullable=True)
+    drug_code = Column(String(64), nullable=True)
+    reg_name = Column(String(64), nullable=True)
+    prod_name = Column(String(128), nullable=True)
+    reg_dosage_form = Column(String(50), nullable=True)
+    act_dosage_form = Column(String(50), nullable=True)
+    reg_spec = Column(String(250), nullable=True)
+    act_spec = Column(String(250), nullable=True)
+    pkg_mat = Column(String(150), nullable=True)
+    min_pack_size = Column(String(50), nullable=True)
+    min_pack_unit = Column(String(100), nullable=True)
+    min_dosage_unit = Column(String(100), nullable=True)
+    prod_factory = Column(String(250), nullable=True)
+    license_no = Column(String(64), nullable=True)
+    drug_std_code = Column(String(64), nullable=True)
+    subpkg_factory = Column(String(64), nullable=True)
+    sales_status = Column(String(100), nullable=True)
+    social_insurance_name = Column(String(200), nullable=True)
+    jiayi_category = Column(String(50), nullable=True)
+    social_dosage_form = Column(String(50), nullable=True)
+    serial_no = Column(String(50), nullable=True)
+    comments = Column(String(300), nullable=True)
+
+
+class DbKgSchemas(Base):
+    __tablename__ = "kg_schemas"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    schema_system = Column(String(64), nullable=False)
+    schema_type = Column(String(64), nullable=False)
+    name = Column(String(64), nullable=False)
+    category = Column(String(64), nullable=False)
+    content = Column(Text())
+    version = Column(String(16))
+    status = Column(Integer, default=0)
+    
+# class DbUsers(Base):
+#     __tablename__ ="users"
+#     id = Column(Integer, primary_key=True, index=True)
+#     username = Column(String(32), nullable=False)
+#     full_name = Column(String(64), nullable=False)
+#     email = Column(String(100), nullable=False)
+#     hashed_password = Column(String(64), nullable=False)
+#     status = Column(Integer, default=0)
+
+class DbKgDataset(Base):
+    __tablename__ = "kg_datasets"
+    id = Column(Integer, primary_key=True, index=True)
+    data_category = Column(String(64), nullable=False)
+    data_name = Column(String(64), nullable=False)
+    data_comments = Column(Text(), nullable=False)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+
+__all__=['DbKgEdge','DbKgNode','DbKgProp','DbKgEdgeProp','DbDictICD','DbDictDRG',
+         'DbDictDrug','DbKgSchemas','DbKgSubGraph','DbKgModels',
+         'DbKgGraphs', 'DbKgDataset']

+ 28 - 0
agent/models/db/user.py

@@ -0,0 +1,28 @@
+
+
+from sqlalchemy import create_engine, Column, Integer, String, MetaData,DateTime,Text,ForeignKey
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+from agent.db.database import Base
+from datetime import datetime
+
+
+
+class User(Base):
+    __tablename__ = 'users'
+    id = Column(Integer, primary_key=True)
+    username = Column(String(32), nullable=False)
+    full_name = Column(String(64))
+    email = Column(String(100), nullable=False)
+    hashed_password = Column(String(64), nullable=True)
+    status = Column(Integer, default=0)
+    
+class Session(Base):
+    __tablename__ = 'sessions'
+    id = Column(Integer, primary_key=True)
+    user_id = Column(Integer, ForeignKey('users.id'))
+    session_id = Column(String(64), nullable=False)
+    username = Column(String(32), nullable=False)
+    full_name = Column(String(64))
+    created = Column(DateTime, default=datetime.now())
+    updated = Column(DateTime, default=datetime.now())

+ 27 - 0
agent/models/task.py

@@ -0,0 +1,27 @@
+import os
+import uuid
+import subprocess
+
+from datetime import datetime
+
+class JobConfigurationItem:
+    def __init__(self, name, value, type):
+        self.name = name
+        self.value = value
+        self.type = type
+        
+class JobConfiguration:
+    items = []
+    def __init__(self):
+        self.items = []
+    def add(self, name, value, type):
+        self.items.append(JobConfigurationItem(name, value, type))
+    def to_dict(self):
+        return [item.__dict__ for item in self.items]
+    def to_json(self):
+        return json.dumps(self.to_dict(), ensure_ascii=False,indent=4)
+    def get(self, name,default=None):
+        for item in self.items:
+            if item.name == name:
+                return item.value
+        return default

+ 320 - 0
agent/models/web/graph.py

@@ -0,0 +1,320 @@
+from pydantic import BaseModel,Field
+from typing import List, Optional
+from datetime import datetime
+
+class KgSubGraphCreate(BaseModel):
+    graph_name: str
+    graph_content: str
+    status :int
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict       
+
+class KgSubGraph(KgSubGraphCreate):    
+    id: int
+
+    class Config:
+        from_attributes = True
+    
+class KgPropCreate(BaseModel):
+    category: int
+    prop_name: str
+    prop_value: str
+    prop_title: str
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict       
+
+class KgProp(KgPropCreate):
+    id: int
+    ref_id: int
+
+    class Config:
+        from_attributes = True
+        
+class KgNodeCreate(BaseModel):
+    name: str
+    category: str
+    layout: Optional[str] = None
+    version: Optional[str] = None
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict       
+
+class KgNode(KgNodeCreate):
+    id: int
+    graph_id: int
+    status: int
+    props: List[KgProp]
+    class Config:
+        from_attributes = True
+
+class KgNodeMerge(BaseModel):
+    src_id: int
+    dest_id: int
+
+class KgEdgeCreate(BaseModel):
+    graph_id: int
+    category: str
+    src_id: int
+    dest_id: int
+    name: str
+    version: Optional[str] = None
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict       
+    
+    class Config:
+        from_attributes = True
+
+class KgEdge(KgEdgeCreate):
+    id: int
+    status: int
+    src_node: KgNode
+    dest_node: KgNode
+    
+    class Config:
+        from_attributes = True
+        
+class KgEdgeName(BaseModel):
+    category: str
+    name: str
+    
+    class Config:
+        from_attributes = True
+        
+        
+class DictICD(BaseModel):
+    id: int
+    icd_code: str
+    icd_name: str
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+        
+    class Config:
+        from_attributes = True
+
+
+class DictDRG(BaseModel):
+    id: int
+    drg_code: str
+    drg_name: str
+    drg_weight: float
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+        
+    class Config:
+        from_attributes = True
+
+class DictDrug(BaseModel):
+    
+    id: int
+    data_source : str
+    drug_code : str
+    reg_name : str
+    prod_name : str
+    reg_dosage_form : str
+    act_dosage_form : str
+    reg_spec : str
+    act_spec: str
+    pkg_mat : str
+    min_pack_size : str
+    min_pack_unit : str
+    min_dosage_unit : str
+    prod_factory : str
+    license_no : str
+    drug_std_code: str
+    subpkg_factory: str
+    sales_status : str
+    social_insurance_name : str
+    jiayi_category : str
+    social_dosage_form : str
+    serial_no: str
+    comments : str
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+        
+    class Config:
+        from_attributes = True
+    
+
+class KgSchemasCreate(BaseModel):
+    name: str
+    category: str
+    content: str
+    version: str
+    
+class KgSchemasUpdate(BaseModel):
+    id:int
+    name: str
+    category: str
+    content: str
+    version: str
+
+class KgSchemaSystems(BaseModel):
+    schema_system:str
+    class Config:
+        from_attributes = True
+    
+class KgSchemas(BaseModel):    
+    id: int
+    name: str
+    schema_system: str
+    schema_type: str
+    category: str
+    content: str
+    version: Optional[str] = None
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+        
+    class Config:
+        from_attributes = True
+
+class NLPRequest(BaseModel):
+    category: str
+    name: str
+    content: str    
+
+
+class KgProjCreate(BaseModel):    
+    proj_name : str
+    proj_category : str
+    proj_type : str
+    proj_conf : str
+    
+class KgProj(KgProjCreate):    
+    id : int
+    created : datetime
+    updated : datetime
+    status : int
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+    
+    class Config:
+        from_attributes = True
+
+class KgTaskUpdate(BaseModel):
+    id: int
+    task_log: str
+
+class KgTaskCreate(BaseModel):    
+    proj_id : int
+    task_category : str
+    task_content : str
+    task_name : Optional[str]
+    
+class KgTask(KgTaskCreate):    
+    id : int
+    task_log : Optional[str]
+    created : datetime
+    updated : datetime
+    status : int
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+    
+    class Config:
+        from_attributes = True
+
+class KgModels(BaseModel):
+    id: int
+    model_name: str
+    model_category: str
+    model_description: str
+    model_settings: str
+    created : datetime
+    updated : datetime
+    status : int
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+    
+    class Config:
+        from_attributes = True
+
+class KgGraphCreate(BaseModel):
+    name: str
+    category: str
+    graph_description: str
+    schema_id: int
+
+class KgGraphs(BaseModel):
+    id: int
+    name: str
+    category: str
+    graph_description: str
+    graph_settings: str
+    created: datetime
+    updated: datetime
+    status: int
+
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict
+
+    class Config:
+        from_attributes = True
+
+
+class KgDataset(BaseModel):
+    id: int
+    data_name: str
+    data_category: str
+    data_comments: str
+    created: datetime
+    updated: datetime
+    status: int
+
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict
+
+    class Config:
+        from_attributes = True
+__all__=['KgSchemaSystems','NLPRequest','KgNodeMerge','KgNode','KgNodeCreate',
+         'KgEdge','KgEdgeCreate','KgProp','KgPropCreate','DictICD','DictDRG','DictDrug',
+         'KgSchemas','KgSchemasCreate','KgSchemasUpdate','KgEdgeName',
+         'KgSubGraphCreate','KgSubGraph','KgModels','KgGraphs','KgGraphCreate',
+         'KgDataset']

+ 67 - 0
agent/models/web/request.py

@@ -0,0 +1,67 @@
+from pydantic import BaseModel
+from typing import Optional
+from typing import List,Union
+
+class ReqeustBase(BaseModel):
+    action: str
+
+class TextData(BaseModel):
+    text: str
+
+class EmbeddingRequest(ReqeustBase):
+    data: List[TextData]
+
+class SearchRequest(ReqeustBase):
+    query: str
+    top_k: int = 20
+    meta_only: Optional[bool] = False 
+
+class GetDocumentRequest(ReqeustBase):
+    title: str
+
+class GraphFilterRequest(ReqeustBase):
+    node_types: Optional[List[str]] = None
+    min_degree: Optional[int] = None
+    min_community_size: Optional[int] = None
+    attributes: Optional[dict] = None
+
+class GraphSearchRequest(ReqeustBase):
+    """图谱语义搜索请求参数"""
+    query: str
+    node_types: Optional[List[str]] = None
+    relationship_types: Optional[List[str]] = None
+    min_weight: Optional[float] = None
+    max_depth: int = 3
+    limit: int = 20
+    similarity_threshold: float = 0.6
+
+class PathAnalysisRequest(ReqeustBase):
+    """节点路径分析请求参数"""
+    source: str
+    target: str
+    max_path_length: int = 3
+    algorithm: str = "bidirectional"
+
+class NodeFilterRequest(ReqeustBase):
+    """复杂节点过滤请求参数"""
+    must: List[dict] = []
+    should: List[dict] = []
+    must_not: List[dict] = []
+    min_score: Optional[float] = None
+    
+
+class BasicRequestParameter(BaseModel):
+    name: str
+    value: Union[str, int, float, bool, List[str], List[int], List[float], List[bool]]  
+    
+      
+class BasicRequest(BaseModel):
+    action: str
+    id: str
+    params: List[BasicRequestParameter]
+    
+    def get_param(self, name=None, default=None):
+        for param in self.params:
+            if param.name == name:
+                return param.value
+        return default

+ 111 - 0
agent/models/web/response.py

@@ -0,0 +1,111 @@
+from pydantic import BaseModel
+from typing import List,Optional,Any,Dict
+from .request import TextData
+
+class Response(BaseModel):
+    status: str
+
+################################################
+
+class VectorData(BaseModel):
+    text: str
+    vector: List[float]
+    
+class EmbeddingResponse(Response):
+    vectors: List[VectorData]
+    
+################################################
+
+class SearchResult(BaseModel):
+    id: str
+    title: str
+    author: str
+    site_name: str
+    site_domain: str
+    pub_date: str
+    article_text: str
+    score: float
+    url: str
+
+class SearchResultMeta(BaseModel):
+    id: str
+    title: str
+    pub_date: str
+    url: str
+
+class SearchResultContent(BaseModel):
+    id: str
+    title: str
+    article_text: str
+
+class SearchResponse(Response):
+    results: List[SearchResult]
+    
+class SearchResponseV2(BaseModel):
+    meta: List[SearchResultMeta]
+    content: List[SearchResultContent]
+    
+################################################
+
+    
+class GetDocumentResponse(Response):
+    title: str
+    text: str
+    url: Optional[str]
+    
+class GetMDocumentResponse(Response):
+    results: List[GetDocumentResponse]
+    
+################################################
+
+################################################
+SUCCESS = 200
+FAILED = 500
+PROCESSING = 202
+class StandardResponse(BaseModel):
+    code: int = 200
+    message: str = "success"
+    meta: Dict = {}
+    records: List[Any] = []
+    def __init__(self, code=SUCCESS, success=True, err_code=-1, err_message="", meta={}, records=None, message="success"):
+        super().__init__()
+        self.code = code
+        self.message = message
+        self.meta = meta
+        if records: 
+            recordsItem = []           
+            for item in records:
+                #遍历item的属性,如果属性是list,就把list中的每个元素都转换成dict
+                if isinstance(item, dict):
+                    recordsItem.append(item)
+                    continue
+                item_dict = {}
+                for key in item.__dict__:
+                    if not key.startswith("_"):
+                        value = item.__dict__[key]
+                        if isinstance(value, str):
+                            item_dict[key] = value
+                        elif isinstance(value, list):
+                            values = []
+                            for v in value:
+                                values.append(v)                            
+                            item_dict[key] = values
+                        else:                            
+                            item_dict[key] = value
+                recordsItem.append(item_dict)
+            self.records = recordsItem
+
+        if success == False:
+            self.code = FAILED
+        if err_code >=0:
+            self.code = err_code
+            self.message = err_message
+        
+        
+
+            
+    
+class TaskResponse(StandardResponse):
+    task_id: str
+    status: str
+    result: Optional[Any] = None

+ 445 - 0
agent/openapi.yaml

@@ -0,0 +1,445 @@
+openapi: 3.1.0
+info:
+  title: FastAPI - Swagger UI
+  version: 1.0.0
+paths:
+  /agent/job:
+    post:
+      tags:
+      - agent job interface
+      summary: Submit Job
+      operationId: submit_job_agent_job_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BasicRequest'
+        required: true
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /agent/queue:
+    post:
+      tags:
+      - agent job interface
+      summary: Submit Queue
+      operationId: submit_queue_agent_queue_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BasicRequest'
+        required: true
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /file/upload/{file_type}/{job_id}:
+    post:
+      tags:
+      - agent job interface
+      summary: Upload File
+      operationId: upload_file_file_upload__file_type___job_id__post
+      parameters:
+      - name: file_type
+        in: path
+        required: true
+        schema:
+          type: string
+          title: File Type
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: integer
+          title: Job Id
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/Body_upload_file_file_upload__file_type___job_id__post'
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /file/browse:
+    post:
+      tags:
+      - agent job interface
+      summary: Browser File
+      operationId: browser_file_file_browse_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BasicRequest'
+        required: true
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /file/download/{job_id}:
+    get:
+      tags:
+      - agent job interface
+      summary: Download File
+      operationId: download_file_file_download__job_id__get
+      parameters:
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: integer
+          title: Job Id
+      - name: path
+        in: query
+        required: false
+        schema:
+          type: string
+          default: path
+          title: Path
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema: {}
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /file/view/{job_id}:
+    get:
+      tags:
+      - agent job interface
+      summary: View File
+      operationId: view_file_file_view__job_id__get
+      parameters:
+      - name: job_id
+        in: path
+        required: true
+        schema:
+          type: integer
+          title: Job Id
+      - name: path
+        in: query
+        required: false
+        schema:
+          type: string
+          default: path
+          title: Path
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /user/session:
+    post:
+      tags:
+      - agent job interface
+      summary: Register
+      operationId: register_user_session_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BasicRequest'
+        required: true
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /user/logout/{session_id}:
+    get:
+      tags:
+      - agent job interface
+      summary: Logout
+      operationId: logout_user_logout__session_id__get
+      parameters:
+      - name: session_id
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Session Id
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /user/signin:
+    post:
+      tags:
+      - agent job interface
+      summary: Signin
+      operationId: signin_user_signin_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BasicRequest'
+        required: true
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /kb/summary:
+    post:
+      tags:
+      - knowledge build interface
+      summary: Summary Func
+      operationId: summary_func_kb_summary_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BasicRequest'
+        required: true
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /kb/schemas:
+    post:
+      tags:
+      - knowledge build interface
+      summary: Schemas Func
+      operationId: schemas_func_kb_schemas_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BasicRequest'
+        required: true
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /kb/nodes:
+    post:
+      tags:
+      - knowledge build interface
+      summary: Nodes Func
+      operationId: nodes_func_kb_nodes_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BasicRequest'
+        required: true
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/StandardResponse'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+components:
+  schemas:
+    BasicRequest:
+      properties:
+        action:
+          type: string
+          title: Action
+        id:
+          type: string
+          title: Id
+        params:
+          items:
+            $ref: '#/components/schemas/BasicRequestParameter'
+          type: array
+          title: Params
+      type: object
+      required:
+      - action
+      - id
+      - params
+      title: BasicRequest
+    BasicRequestParameter:
+      properties:
+        name:
+          type: string
+          title: Name
+        value:
+          anyOf:
+          - type: string
+          - type: integer
+          - type: number
+          - type: boolean
+          - items:
+              type: string
+            type: array
+          - items:
+              type: integer
+            type: array
+          - items:
+              type: number
+            type: array
+          - items:
+              type: boolean
+            type: array
+          title: Value
+      type: object
+      required:
+      - name
+      - value
+      title: BasicRequestParameter
+    Body_upload_file_file_upload__file_type___job_id__post:
+      properties:
+        file:
+          type: string
+          format: binary
+          title: File
+      type: object
+      required:
+      - file
+      title: Body_upload_file_file_upload__file_type___job_id__post
+    HTTPValidationError:
+      properties:
+        detail:
+          items:
+            $ref: '#/components/schemas/ValidationError'
+          type: array
+          title: Detail
+      type: object
+      title: HTTPValidationError
+    StandardResponse:
+      properties:
+        code:
+          type: integer
+          title: Code
+          default: 200
+        message:
+          type: string
+          title: Message
+          default: success
+        meta:
+          type: object
+          title: Meta
+          default: {}
+        records:
+          items: {}
+          type: array
+          title: Records
+          default: []
+      type: object
+      title: StandardResponse
+    ValidationError:
+      properties:
+        loc:
+          items:
+            anyOf:
+            - type: string
+            - type: integer
+          type: array
+          title: Location
+        msg:
+          type: string
+          title: Message
+        type:
+          type: string
+          title: Error Type
+      type: object
+      required:
+      - loc
+      - msg
+      - type
+      title: ValidationError

+ 192 - 0
agent/router/dify_kb_router.py

@@ -0,0 +1,192 @@
+from fastapi import APIRouter, UploadFile, File, HTTPException, Depends, Header
+from typing import List, Optional
+from pydantic import BaseModel
+from models.web.response import StandardResponse
+
+from db.database import get_db
+from sqlalchemy.orm import Session
+from sqlalchemy import text
+
+
+router = APIRouter(prefix="/dify", tags=["Dify Knowledge Base"])
+
+
+# --- Data Models ---
+class RetrievalSetting(BaseModel):
+    top_k: int
+    score_threshold: float
+
+class MetadataCondition(BaseModel):
+    name: List[str]
+    comparison_operator: str
+    value: Optional[str] = None
+
+class MetadataFilter(BaseModel):
+    logical_operator: str = "and"
+    conditions: List[MetadataCondition]
+
+class DifyRetrievalRequest(BaseModel):
+    knowledge_id: str
+    query: str
+    retrieval_setting: RetrievalSetting
+    metadata_condition: Optional[MetadataFilter] = None
+
+class KnowledgeRecord(BaseModel):
+    content: str
+    score: float
+    title: str
+    metadata: dict
+
+# --- Authentication ---
+async def verify_api_key(authorization: str = Header(...)):
+    if not authorization.startswith("Bearer "):
+        raise HTTPException(
+            status_code=403,
+            detail=StandardResponse(
+                success=False,
+                error_code=1001,
+                error_msg="Invalid Authorization header format"
+            )
+        )
+    api_key = authorization[7:]
+    # TODO: Implement actual API key validation logic
+    if not api_key:
+        raise HTTPException(
+            status_code=403,
+            detail=StandardResponse(
+                success=False,
+                error_code=1002,
+                error_msg="Authorization failed"
+            )
+        )
+    return api_key
+
+@router.post("/retrieval", response_model=StandardResponse)
+async def dify_retrieval(
+    request: DifyRetrievalRequest,
+    api_key: str = Depends(verify_api_key),
+    db: Session = Depends(get_db)
+):
+    """
+    实现Dify外部知识库检索接口
+    """
+    print("dify_retrieval start")
+    try:            
+        # 检查知识库是否存在
+        result = db.execute(text("select 1 from kg_graphs where id = :graph_id"), {"graph_id": request.knowledge_id}) 
+        
+        kb_exists = result.scalar()
+        print(kb_exists)
+        if kb_exists == 0:
+            raise HTTPException(
+                status_code=404,
+                detail=StandardResponse(
+                    success=False,
+                    error_code=2001,
+                    error_msg="The knowledge does not exist"
+                )
+            )
+        print("知识库存在")
+        # 构建基础查询
+        query = """
+        select id,name,category,version from kg_nodes as node where node.graph_id = :graph_id and node.name = :node_name
+        """
+
+        # 添加元数据过滤条件
+        if request.metadata_condition:
+            conditions = []
+            for cond in request.metadata_condition.conditions:
+                operator_map = {
+                    "contains": "CONTAINS",
+                    "not contains": "NOT CONTAINS",
+                    "start with": "STARTS WITH", 
+                    "end with": "ENDS WITH",
+                    "is": "=",
+                    "is not": "<>",
+                    "empty": "IS NULL",
+                    "not empty": "IS NOT NULL",
+                    ">": ">",
+                    "<": "<",
+                    "≥": ">=",
+                    "≤": "<=",
+                    "before": "<",
+                    "after": ">"
+                }
+                cypher_op = operator_map.get(cond.comparison_operator, "=")
+                
+                for field in cond.name:
+                    if cond.comparison_operator in ["empty", "not empty"]:
+                        conditions.append(f"node.{field} {cypher_op}")
+                    else:
+                        conditions.append(
+                            f"node.{field} {cypher_op} ${field}_value"
+                        )
+
+            where_clause = " AND ".join(conditions)
+            query += f" AND {where_clause}"
+
+        query += """           
+        ORDER BY node.name DESC
+        LIMIT :top_k
+        """
+        params = {'graph_id': request.knowledge_id, 'node_name': request.query,'top_k':request.retrieval_setting.top_k}
+        supply_params = {f"{cond.name}_value": cond.value for cond in request.metadata_condition.conditions} if request.metadata_condition else {}
+        # 执行查询
+        
+
+        result = db.execute(text(query), 
+                            {
+                                **params, 
+                                **supply_params
+                            }    )           
+                                
+     
+        data_returned = []
+        for record in result:
+            id,name,category,version = record
+            doc_node = {
+                "id": id,
+                "name": name,
+                "category": category,
+                "version": version
+            }
+            
+            data_returned.append({
+                "content": "",
+                "score": float(1.0),
+                "title": doc_node.get("name", "Untitled"),
+                "metadata": {
+                    "id": doc_node.get("id", 0),
+                    "category": doc_node.get("category", "")                    
+                }
+            })
+        
+        for data in data_returned:
+            id = data['metadata']['id']
+            result = db.execute(text("""
+            select prop_title, prop_value from kg_props as prop where prop.category=1 and prop.ref_id =:node_id
+            """),{'node_id':id})
+            content = []
+            for record in result:
+                prop_title, prop_value = record
+                content.append(f"{prop_title}:{prop_value}\n")
+            data['content']="\n".join(content)
+        response_data = StandardResponse(
+            records=data_returned
+        )
+        return response_data
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(e)
+        raise HTTPException(
+            status_code=500,
+            detail=StandardResponse(
+                success=False,
+                error_msg=str(e)
+            )
+        )
+
+dify_kb_router = router
+

+ 119 - 0
agent/router/file_router.py

@@ -0,0 +1,119 @@
+import sys,os,io
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+from config.site import SiteConfig
+from fastapi import APIRouter, Depends, Query, UploadFile, File
+from fastapi.responses import FileResponse
+from db.database import get_db
+from sqlalchemy.orm import Session
+from agent.models.web.response import StandardResponse,FAILED,SUCCESS
+from agent.models.web.request import BasicRequest
+from agent.libs.agent import AgentBusiness
+from typing import Optional
+import logging
+import base64
+import chardet
+
+router = APIRouter(prefix="/file", tags=["agent job interface"])
+logger = logging.getLogger(__name__)
+config = SiteConfig()
+
+@router.post("/upload/{file_type}/{job_id}", response_model=StandardResponse)
+async def upload_file(file_type:str, job_id:int,  file: UploadFile = File(...)):    
+    #if not file.filename.endswith(file_type):
+    #    return StandardResponse(code=FAILED, message="Invalid file type")
+    logger.info(f"upload file: {file.filename} {file_type}")
+    data = { "file_type":file_type, "file_name":file.filename}
+    try:
+        file_content = await file.read()
+        os.makedirs(config.get_config('JOB_PATH')+f"/{job_id}/upload", exist_ok=True)
+        with open(config.get_config('JOB_PATH')+f"/{job_id}/upload/{file.filename}", "wb") as f:
+            f.write(file_content)
+    except Exception as e:
+        logger.error(f"upload file error: {e}")
+        return StandardResponse(code=FAILED, message="File upload failed")
+    return StandardResponse(code=SUCCESS, message="File uploaded successfully", records=[data])
+
+@router.post("/browse", response_model=StandardResponse)
+async def browser_file(request:BasicRequest):
+    job_id = request.get_param("job_id", 0)
+    param_path = request.get_param("path", "")
+    if job_id == 0:
+        return StandardResponse(code=FAILED, message="Job id is required")
+    job_path = os.path.join(config.get_config('JOB_PATH'),f"{job_id}")
+    path = os.path.join(job_path, param_path.replace("..", ""))
+    
+    if not os.path.exists(path):
+        return StandardResponse(code=FAILED, message="File not found")
+    if os.path.isdir(path):
+        files = os.listdir(path)
+        data = []
+        for file in files:       
+            param_path = os.path.join(path, file)     
+            encoded_param_path = base64.b64encode(param_path.encode("utf-8")).decode('utf-8')
+            file_type = "file"
+            if os.path.isdir(param_path):
+                file_type = "dir"
+            data.append({ "name":file, "path":encoded_param_path, "type":file_type})
+        
+        return StandardResponse(code=SUCCESS, message="File list", records=data)
+    else:
+        return StandardResponse(code=SUCCESS, message="nothing found", records=[])
+
+@router.get("/download/{job_id}")
+async def download_file(job_id:int, path:str=Query("path")):
+    logger.info(f"GET download file: {job_id} {path}")
+    if job_id == 0:
+        return StandardResponse(code=FAILED, message="Job id is required")
+
+    job_path = os.path.join(config.get_config('JOB_PATH'),f"{job_id}")
+
+    param_path = base64.b64decode(path.encode("utf-8")).decode('utf-8')
+    param_path = param_path.replace("..", "")
+    path = os.path.join(job_path, param_path)
+
+    filename = os.path.basename(path)
+    logger.info(f"path: {path} filename: {filename}")
+    if not os.path.exists(path):
+        return StandardResponse(code=FAILED, message="File not found")
+    if os.path.isdir(path):
+        return StandardResponse(code=FAILED, message="Can not download directory")
+    else:
+        return FileResponse(path, media_type='application/octet-stream', filename=filename)
+
+@router.get("/view/{job_id}", response_model=StandardResponse)
+def view_file(job_id:int, path:str=Query("path")):
+
+
+    logger.info(f"view file: {job_id} {path}")
+    if job_id == 0:
+        return StandardResponse(code=FAILED, message="Job id is required")
+
+    job_path = os.path.join(config.get_config('JOB_PATH'),f"{job_id}")
+
+    param_path = base64.b64decode(path.encode("utf-8")).decode('utf-8')
+    param_path = param_path.replace("..", "")
+    path = os.path.join(job_path, param_path)
+
+    filename = os.path.basename(path)
+    logger.info(f"path: {path} filename: {filename}")
+    logger.info(f"view file: {path}")
+    if not os.path.exists(path):
+        logger.info(f"file not exists: {path}")
+        return StandardResponse(code=FAILED, message="File not found")
+    if os.path.isdir(path):
+        logger.info(f"is dir: {path}")
+        return StandardResponse(code=FAILED, message="Can not download directory")
+    else:
+        logger.info("open file")
+        with open(path, "rb") as f:
+            raw_content = f.read()
+            result = chardet.detect(raw_content)
+            content = raw_content.decode(result['encoding'], errors='ignore')
+            return StandardResponse(code=SUCCESS, message="File content", records=[{"content":content}])
+
+    return StandardResponse(code=FAILED, message="File not found")
+
+
+file_router = router

+ 838 - 0
agent/router/graph_mgr_router.py

@@ -0,0 +1,838 @@
+from fastapi import APIRouter, Depends, HTTPException
+from sqlalchemy.orm import Session
+from sqlalchemy.sql import select, or_, and_, func,distinct
+from agent.models.web.graph  import *
+from agent.models.db.graph import *
+from db.database import  get_db
+from agent.libs.response import resp_200
+from typing import List
+from math import ceil
+import json
+from datetime import datetime
+router = APIRouter(prefix="/graph-mgr", tags=["agent job interface"])
+
+def nodes_append(list, node):
+    for n in list:
+        if n['id'] == node["id"]:
+            return
+    list.append(node)
+################################### GRAPH ###############################################
+@router.get("/graph-list/{page}/{page_size}")
+def graph_list(page: int, page_size: int, db: Session = Depends(get_db)):
+    count = db.query(DbKgGraphs).count()
+    total_page = ceil(count / page_size)
+    start = 1
+    if page <= total_page:
+        start = (page - 1) * page_size
+
+    results = db.query(DbKgGraphs).limit(page_size).offset(start).all()
+    codes = [KgGraphs.model_validate(node) for node in results]
+
+    return resp_200(
+        data={"total": count, "pages": page, "size": page_size, "records": [item.model_dump() for item in codes]})
+
+@router.get("/graph-schema/{graph_id}")
+def graph_list(graph_id:int, db: Session = Depends(get_db)):
+    data = db.query(DbKgGraphs).filter(DbKgGraphs.id == graph_id).first()
+    settings = json.loads(data.graph_settings)
+    schema_id = settings['schema_id']
+    schema_data = db.query(DbKgSchemas).filter(DbKgSchemas.id == schema_id).first()
+    schema_content = json.loads(schema_data.content)
+
+    return resp_200(data=schema_content)
+
+
+@router.post("/graph-create")
+def create_graph(data: KgGraphCreate, db: Session = Depends(get_db)):
+    graph_settings = { "schema_id": data.schema_id }
+
+    db_node = DbKgGraphs()
+    db_node.name = data.name
+    db_node.category = data.category
+    db_node.graph_description = data.graph_description
+    db_node.graph_settings = json.dumps(graph_settings)
+
+    db_node.created = datetime.now()
+    db_node.updated = datetime.now()
+    db.add(db_node)
+    db.commit()
+    db.refresh(db_node)
+
+    return resp_200(data={'error_code': 0, 'error_message':'' ,'data':KgGraphs.model_validate(db_node).model_dump()})
+
+@router.get("/graph-search/{query}")
+def search_projects(query:str, db: Session = Depends(get_db)):
+    result = db.query(DbKgGraphs).filter(DbKgGraphs.status>=0,
+                                       DbKgGraphs.name.ilike('%'+query+'%')).all()
+    data = [KgGraphs.model_validate(item) for item in result]
+
+    graphs = [d.model_dump() for d in data]
+    for proj in graphs:
+        proj.pop("graph_settings")
+    return resp_200(data={'records': graphs})
+# @router.get("/api/graph-schema/{graph_id}")
+# def get_graph_schema(graph_id: int, db: Session = Depends(get_db)):
+#     graph_data = db.query(DbKgGraphs).filter(DbKgGraphs.id == graph_id).first()
+#     if graph_data:
+#         settings = json.loads(graph_data.graph_settings)
+#         if settings:
+#             schema_id = settings['schema_id']
+#             schema_data = db.query(DbKgSchemas).filter(DbKgSchemas.id == schema_id)
+#             if schema_data:
+#                 schema_content_data = json.loads(schema_data.content)
+#                 return resp_200(data=schema_content_data)
+#     return resp_200(data={'entities': [], 'relations': []})
+
+################################### SCHEMA ###############################################
+@router.get("/schemas/{page}/{page_size}")
+def read_schemas(page:int, page_size:int,  db: Session = Depends(get_db)):
+    count =  db.query(DbKgSchemas).count()
+    total_page = ceil(count / page_size)
+    start = 1
+    if page <= total_page:
+        start = (page-1) * page_size
+   
+    results = db.query(DbKgSchemas).limit(page_size).offset(start).all()   
+    codes = [KgSchemas.model_validate(node) for node in results]
+    
+    return resp_200(data= {"total":count, "pages": page, "size":page_size, "records":[item.model_dump() for item in codes]})
+
+
+@router.get("/schemas-search/{sys_name}")
+def read_all_schemas(sys_name: str,  db: Session = Depends(get_db)):
+    results = None
+    if sys_name == 'all':        
+        results =  db.query(DbKgSchemas).all()
+    else:
+        results = db.query(DbKgSchemas).filter(DbKgSchemas.schema_system.is_(sys_name)).all()
+
+    schema_systems = [KgSchemas.model_validate(node) for node in results]
+    
+    return resp_200(data= {"total":len(schema_systems), "pages": 1, "size":len(schema_systems), "records":[item.model_dump() for item in schema_systems]})
+
+
+@router.get("/schemas-load/{schema_id}")
+def load_schemas(schema_id: int, db: Session = Depends(get_db)):
+    results = None
+    schema = db.query(DbKgSchemas).filter(DbKgSchemas.id == schema_id).first()
+
+    if schema.schema_system == 'GraphWork' and schema.schema_type =='GW.Schema':
+        #this is graph work schema system
+        schema_data = json.loads(schema.content)
+        return resp_200(data=schema_data)
+    return resp_200(data={'entities': [], 'relations': []})
+    # if sys_type == 'entity' or sys_type=='relation':
+    #     results = db.query(DbKgSchemas).filter(DbKgSchemas.schema_system==sys_name,DbKgSchemas.schema_type==sys_type).all()
+    # else:
+    #     results = db.query(DbKgSchemas).filter(DbKgSchemas.schema_system==sys_name).all()
+    #
+    # schemas = [KgSchemas.model_validate(node) for node in results]
+    # entities = []
+    # relations = []
+    # records = [item.model_dump() for item in schemas]
+    # try:
+    #     for data in records:
+    #         if data['schema_type'] == 'ENTITY':
+    #             content = data['content']
+    #             props = []
+    #             lines = content.split("\n")
+    #             for line in lines:
+    #                 line.strip()
+    #                 if len(line) > 1:
+    #                     parts = line.split("|")
+    #                     if len(parts)<2:
+    #                         parts.append(parts[0])
+    #                     props.append({"category":1, "prop_title":parts[1], "prop_name": parts[0], "prop_value":parts[1]})
+    #             data['props'] = props
+    #             entities.append(data)
+    #         if data['schema_type'] == 'RELATION':
+    #             relations.append(data)
+    # except Exception as e:
+    #     print("Exception", e)
+    # return resp_200(data= {'entities':entities, 'relations': relations})
+
+@router.get("/schemas-get/{page_size}")
+def get_schema_definitions(page_size:int, db: Session = Depends(get_db)):
+    results = db.query(DbKgSchemas).limit(page_size)   
+    schemas = [KgSchemas.model_validate(node) for node in results]
+    records = [item.model_dump() for item in schemas]
+    try:
+        for data in records:
+            content = data['content']
+            props = []
+            lines = content.split("\n")
+            for line in lines:
+                line.strip()
+                if len(line) > 1:
+                    parts = line.split("|")
+                    if len(parts)<2:
+                        parts.append(parts[0])
+                    props.append({"category":1, "prop_title":parts[1], "prop_name": parts[0], "prop_value":parts[1]})
+            data['props'] = props
+    except Exception as e:
+        print("Exception", e)
+    return resp_200(data= {"total":len(schemas), "pages": 1, "size":page_size, "records": records}) 
+
+@router.post("/schemas-create")
+def create_schemas(data :KgSchemasCreate,  db: Session = Depends(get_db)):
+    db_node = DbKgSchemas(**data.model_dump())
+    db_node.schema_system = 'DEFAULT'
+    db_node.schema_type = 'ENTITY'
+    db.add(db_node)
+    db.commit()
+    db.refresh(db_node)
+    
+    return resp_200(data= KgSchemas.model_validate(db_node).model_dump())
+
+@router.post("/schemas-update")
+def update_schemas(data :KgSchemasUpdate,  db: Session = Depends(get_db)):
+    db_node = DbKgSchemas(**data.model_dump())
+    db.query(DbKgSchemas).filter(DbKgSchemas.id == db_node.id).update({'name': db_node.name, 'content':db_node.content, 'category':db_node.category, 'version': db_node.version })
+    
+    db.commit()    
+    
+    db_node = db.query(DbKgSchemas).filter(DbKgSchemas.id == db_node.id).first()
+    
+    content = db_node.content
+    props = []
+    lines = content.split("\n")
+    for line in lines:
+        line.strip()
+        if len(line) > 1:
+            parts = line.split("|")
+            if len(parts)>1:
+                new_title = parts[1]
+                db.query(DbKgProp).filter(DbKgProp.prop_name==parts[0]).update({'prop_title': new_title})
+                print("update title ",new_title)
+    db.commit()
+        
+    return resp_200(data= KgSchemas.model_validate(db_node).model_dump())
+
+@router.get("/schemas-delete/{id}")
+def delete_schemas(id: int,  db: Session = Depends(get_db)):   
+    db.query(DbKgSchemas).filter(DbKgSchemas.id == id).delete()    
+    db.commit()            
+    return resp_200(data= {"id": id})
+
+
+################################### NODE ###############################################
+@router.get("/node-category/{graph_id}/{category}/{limit}")
+def read_node_by_category(graph_id: int, category: str, limit:int, db: Session = Depends(get_db)):
+    db_nodes = db.query(DbKgNode).filter(DbKgNode.category==category, DbKgNode.graph_id==graph_id).limit(limit)
+    if db_nodes is None:
+        raise HTTPException(status_code=404, detail="Node not found") 
+    node_list = [KgNode.model_validate(node) for node in db_nodes]
+    return resp_200(data={"nodes":[node.model_dump() for node in node_list], "edges":[]})
+
+@router.get("/node-delete/{graph_id}/{node_id}", response_model=KgNode)
+def delete_node(graph_id: int, node_id: int, db: Session = Depends(get_db)):
+    db_node = db.query(DbKgNode).filter(DbKgNode.graph_id == graph_id, DbKgNode.id == node_id).first()
+    if db_node is None:
+        raise HTTPException(status_code=404, detail="Node not found")
+    edges_out = db.query(DbKgEdge).filter(DbKgEdge.src_id == node_id).delete()
+    edges_in= db.query(DbKgEdge).filter(DbKgEdge.dest_id == node_id).delete()
+    db.delete(db_node)
+    db.commit()
+    return resp_200(data= {"id": node_id})
+
+@router.post("/node-create")
+def create_node(graph_id: int, node: KgNode, db: Session = Depends(get_db)):
+    count = db.query(DbKgNode).filter(DbKgNode.category == node.category, DbKgNode.name == node.name).count()
+    
+    if count > 0:        
+        return resp_200(data= {"id": 0, "error_code": 1, "error_msg": "Node already existed"})
+    db_node = DbKgNode()
+    db_node.graph_id = node.graph_id
+    db_node.category = node.category
+    db_node.name = node.name
+    db_node.layout = ''
+    db_node.version = '1.0'
+    
+    db.add(db_node)
+    db.commit()
+    db.refresh(db_node)
+    print(db_node)
+    kg_props = []
+    for prop in node.props:
+        p = DbKgProp()
+        p.ref_id = db_node.id
+        p.category = 1
+        p.prop_name = prop.prop_name
+        p.prop_title = prop.prop_title
+        p.prop_value = prop.prop_value
+        kg_props.append(p)
+    db.add_all(kg_props)
+    db.commit()
+    return resp_200(data= {"id": db_node.id, "error_code": 0, "error_msg": ""})
+
+@router.get("/nodes/summary/{graph_id}")
+def get_node_summary(graph_id: int=0, db:Session=Depends(get_db)):
+    results = db.query(DbKgNode.category, func.count(1)).group_by(DbKgNode.category).all()
+    ret = []
+    for r in results:
+        category, count = r   
+        ret.append({"category":category, "count":count})     
+    return resp_200(data=ret)
+
+@router.get("/nodes/search/{graph_id}/{node_name}/{in_out}/{deepth}")
+def search_node(graph_id:int, node_name: str, in_out: int =0, deepth: int=1, db: Session= Depends(get_db)):
+    '''
+    in_out: 0=只有节点 1=节点及进入节点的边 2=节点及出节点的边 3=节点及进出节点的边
+    deepth: 深度,递归搜索的深度,目前只支持1
+    '''  
+    if node_name.startswith("-"):        
+        query = db.query(DbKgNode).filter(DbKgNode.graph_id==graph_id, DbKgNode.name==node_name[1:])
+    else:
+        query = db.query(DbKgNode).filter(DbKgNode.graph_id==graph_id, DbKgNode.name.ilike('%'+node_name+'%'))
+    #print("原生SQL:", query.statement.compile(compile_kwargs={"literal_binds": True}))
+    db_nodes = query.all()
+    if db_nodes is None:
+        raise HTTPException(status_code=404, detail="Node not found") 
+    
+    #print (f'%{node_name}%')
+    #print('sql query result', db_nodes)
+    nodes = []
+    edges = []
+    in_count = 0
+    out_count = 0
+    for node in db_nodes:         
+        nodes_append(nodes, KgNode.model_validate(node).model_dump()) 
+          
+        if in_out == 2 or in_out == 3:
+            print("select out edges")
+            count = db.query(DbKgEdge).filter(DbKgEdge.graph_id==graph_id, and_(DbKgEdge.src_id == node.id, DbKgEdge.status == 0)).count()
+
+            if count > 0:
+                in_count = count
+                results = db.query(DbKgEdge).filter(DbKgEdge.graph_id==graph_id, and_(DbKgEdge.src_id == node.id, DbKgEdge.status == 0)).limit(10)
+                edges_list = [KgEdge.model_validate(n) for n in results]  
+                edges_raw = [n.model_dump() for n in edges_list]
+                for edge in edges_raw:
+                    nodes_append(nodes, edge["src_node"])
+                    nodes_append(nodes, edge["dest_node"])                    
+                    data = edge
+                    data.pop("src_node", None)
+                    data.pop("dest_node", None)
+                    nodes_append(edges, data)   
+        
+        if in_out == 1 or in_out == 3:
+            print("select in edges")
+            count = db.query(DbKgEdge).filter(DbKgEdge.graph_id==graph_id, and_(DbKgEdge.dest_id == node.id, DbKgEdge.status == 0)).count()
+
+            if count > 0:
+                out_count = count
+                results = db.query(DbKgEdge).filter(DbKgEdge.graph_id==graph_id, and_(DbKgEdge.dest_id == node.id, DbKgEdge.status == 0)).limit(10)
+                edges_list = [KgEdge.model_validate(n) for n in results]  
+                edges_raw = [n.model_dump() for n in edges_list]
+                for edge in edges_raw:
+                    nodes_append(nodes, KgNode.model_validate(edge["src_node"]).model_dump())
+                    nodes_append(nodes, KgNode.model_validate(edge["dest_node"]).model_dump())             
+                    data = edge
+                    data.pop("src_node", None)
+                    data.pop("dest_node", None)
+                    nodes_append(edges, data)   
+        
+    return resp_200(data={"summary":{"count_in": in_count, "count_out":out_count},"nodes":nodes, "edges":edges})
+
+@router.get("/nodes/{node_id}/{in_out}/{deepth}")
+def read_node(node_id: int, in_out: int =0, deepth: int=1, db: Session = Depends(get_db)):
+    '''
+    in_out: 0=只有节点 1=节点及进入节点的边 2=节点及出节点的边 3=节点及进出节点的边
+    deepth: 深度,递归搜索的深度,目前只支持1
+    '''  
+    if in_out == 0:
+        #only current node
+        node = db.query(DbKgNode).filter(DbKgNode.id == node_id).first()
+        return resp_200(data={"summary":{"count_in": 0, "count_out":0},"nodes":[KgNode.model_validate(node).model_dump()], "edges":[]})
+    
+    nodes = []
+    edges = []    
+    nodes_ids = []
+    edge_ids = []
+    count_in = 0
+    count_out = 0
+    if in_out == 1 or in_out == 3:        
+        #print("原生SQL:", db.query(DbKgEdge).filter(DbKgEdge.dest_id == node_id).statement.compile(compile_kwargs={"literal_binds": True}))
+        count_in = db.query(DbKgEdge).filter(and_(DbKgEdge.dest_id == node_id, DbKgEdge.status ==0)).count()
+        db_edges = db.query(DbKgEdge).filter(and_(DbKgEdge.dest_id == node_id, DbKgEdge.status ==0)).limit(25)
+        
+        for result in db_edges:
+            edge = KgEdge.model_validate(result).model_dump()
+            
+            if (edge["src_node"]["id"] in nodes_ids) == False:
+                nodes.append(edge["src_node"])
+                nodes_ids.append(edge["src_node"]["id"])
+            if (edge["dest_node"]["id"] in nodes_ids)==False:
+                nodes_ids.append(edge["dest_node"]["id"])
+                nodes.append(edge["dest_node"])
+            data = edge
+            data.pop("src_node", None)
+            data.pop("dest_node", None)
+            if (edge["id"] in edge_ids ) == False:
+                edges.append(data)
+                edge_ids.append(edge["id"])
+                    
+    if in_out == 2 or in_out == 3:
+        count_out = db.query(DbKgEdge).filter(and_(DbKgEdge.src_id == node_id, DbKgEdge.status ==0)).count()
+        db_edges = db.query(DbKgEdge).filter(and_(DbKgEdge.src_id == node_id, DbKgEdge.status ==0)).limit(25)
+        #print(count_out)
+        for result in db_edges:
+            edge = KgEdge.model_validate(result).model_dump()
+            
+            if (edge["src_node"]["id"] in nodes_ids) == False:
+                nodes.append(edge["src_node"])
+                nodes_ids.append(edge["src_node"]["id"])
+            if (edge["dest_node"]["id"] in nodes_ids)==False:
+                nodes_ids.append(edge["dest_node"]["id"])
+                nodes.append(edge["dest_node"])
+            data = edge
+            data.pop("src_node", None)
+            data.pop("dest_node", None)
+            if (edge["id"] in edge_ids ) == False:
+                edges.append(data)
+                edge_ids.append(edge["id"])
+        
+        
+    return resp_200(data={"summary":{"count_in": count_in, "count_out":count_out},"nodes":nodes, "edges":edges})
+
+@router.get("/nodes-browse/{node_id}")
+def browse_node(node_id: int, db: Session = Depends(get_db)):
+
+    total_remain = 999
+    db_edges_count = 0 #db.query(DbKgEdge).filter(DbKgEdge.src_id == node_id).count()
+    
+    db_edges0_count = db.query(DbKgEdge).filter(DbKgEdge.dest_id == node_id, DbKgEdge.status==0, DbKgEdge.category=='belongs_to').count()
+    
+    total = db_edges_count + db_edges0_count
+    if total == 0:
+        return resp_200(data={"summary":{"count_in": db_edges0_count, "count_out":db_edges_count},"nodes":[], "edges":[]})
+    factor = db_edges_count / total
+    db_edges_count = total_remain * factor
+    factor = db_edges0_count / total
+    db_edges0_count = total_remain * factor
+    
+    db_edges = [] #db.query(DbKgEdge).filter(DbKgEdge.src_id == node_id).limit(ceil(db_edges_count))
+    
+    if db_edges is None:
+        raise HTTPException(status_code=404, detail="Edge not found")
+    try:
+        db_edges0 = db.query(DbKgEdge).filter(DbKgEdge.dest_id == node_id, DbKgEdge.status==0).limit(ceil(db_edges0_count))
+    except Exception as e:
+        print(e)
+    if db_edges0 is None:
+        raise HTTPException(status_code=404, detail="Edge not found")
+    
+    nodes_ids = []
+    edge_ids = []
+    nodes = []
+    edges = []
+    
+    for results in [db_edges, db_edges0]:
+        edges_list = [KgEdge.model_validate(node) for node in results]    
+        edges_raw = [node.model_dump() for node in edges_list]
+
+        for edge in edges_raw:
+            if (edge["src_node"]["id"] in nodes_ids) == False:
+                nodes.append(edge["src_node"])
+                nodes_ids.append(edge["src_node"]["id"])
+            #if (edge["dest_node"]["id"] in nodes_ids)==False:
+            #    nodes_ids.append(edge["dest_node"]["id"])
+            #   nodes.append(edge["dest_node"])
+            data = edge
+            data.pop("src_node", None)
+            data.pop("dest_node", None)
+            if (edge["id"] in edge_ids ) == False:
+                edges.append(data)
+                edge_ids.append(edge["id"])
+    return resp_200(data={"summary":{"count_in": len(nodes), "count_out":0},"nodes":nodes, "edges":edges})
+
+@router.post("/node-merge")
+def merge_node(mergeData:List[KgNodeMerge],db: Session = Depends(get_db)):
+    edges_merge = []
+    edges_invalid = []
+    edges_insert = []
+    for merge in mergeData:
+        print("merge from %d to %d" % (merge.src_id, merge.dest_id))
+        #原有的到源节点的边要更新到目标节点
+        edges = db.query(DbKgEdge.id, DbKgEdge.src_id, DbKgEdge.dest_id, DbKgEdge.category, DbKgEdge.name ).filter(DbKgEdge.dest_id==merge.src_id, DbKgEdge.status==0).all()
+        for edge in edges:
+            id, src_id, dest_id, category, name = edge
+            edges_merge.append({"id":id, "in_out": "src_in", "old_dest_id": dest_id, "src_id":src_id, "dest_id":merge.dest_id, "category":category, "name":name, "status":0})
+        #原有节点的出边也要更新到目标节点        
+        edges = db.query(DbKgEdge.id, DbKgEdge.src_id, DbKgEdge.dest_id, DbKgEdge.category, DbKgEdge.name ).filter(DbKgEdge.src_id==merge.src_id, DbKgEdge.status==0).all()
+        for edge in edges:
+            id, src_id, dest_id, category, name = edge
+            edges_merge.append({"id":id, "in_out": "src_out", "old_src_id": src_id, "src_id":merge.dest_id, "dest_id":dest_id, "category":category, "name":name, "status":0})
+        #原有的两个节点之间的边需要设置为无效
+        edges_to_update = db.query(DbKgEdge.id, DbKgEdge.src_id, DbKgEdge.dest_id, DbKgEdge.category, DbKgEdge.name, DbKgEdge.status).filter(and_(DbKgEdge.src_id==merge.src_id, DbKgEdge.dest_id==merge.dest_id, DbKgEdge.status==0)).all()
+        for edge in edges_to_update:
+            id, src_id, dest_id, category, name, status = edge
+            status = -1 #delete
+            edges_invalid.append({"id":id, "in_out": "out", "src_id":src_id, "dest_id":dest_id, "category":category, "name":name, "status":status})
+   
+        edges_to_update = db.query(DbKgEdge.id, DbKgEdge.src_id, DbKgEdge.dest_id, DbKgEdge.category, DbKgEdge.name, DbKgEdge.status).filter(and_(DbKgEdge.dest_id==merge.src_id, DbKgEdge.src_id==merge.dest_id, DbKgEdge.status==0)).all()
+        for edge in edges_to_update:
+            id, src_id, dest_id, category, name, status = edge
+            status = -1 #delete
+            edges_invalid.append({"id":id, "in_out": "in", "src_id":src_id, "dest_id":dest_id, "category":category, "name":name, "status":status})
+        #插入一条merge的边
+        edges_insert.append({"src_id":merge.src_id, "dest_id":merge.dest_id, "category":'MERGE_TO', "name":'MERGE', "status":0, 'version':"1.0"})
+    edges = []        
+    for edge in edges_merge:
+        if edge['dest_id'] == edge['src_id']:
+            print("circle detected, skip")
+            continue     
+        print("edge merged %d to %d %s-%s" % (edge['src_id'], edge['dest_id'], edge['category'], edge['name'])) 
+        count = db.query(DbKgEdge).filter(DbKgEdge.src_id==edge['src_id'], DbKgEdge.dest_id==edge['dest_id'], DbKgEdge.category==edge['category']).count()
+        
+        if count > 0:            
+            print("can not move edge because of target has same edge already existed")
+            edge['status'] = -1
+            if edge["in_out"] == "src_in":
+                edge["dest_id"] = edge["old_dest_id"]
+            if edge["in_out"] == "src_out":
+                edge["src_id"] = edge["old_src_id"]
+            edges_invalid.append(edge)
+        else:
+            db.query(DbKgEdge).filter(DbKgEdge.id == edge['id']).update({'dest_id':edge['dest_id']})
+    for edge in edges_insert:        
+        print("edge inserted %d to %d %s-%s" % (edge['src_id'], edge['dest_id'], edge['category'], edge['name'])) 
+        count = db.query(DbKgEdge).filter(DbKgEdge.src_id==edge['src_id'], DbKgEdge.dest_id==edge['dest_id'], DbKgEdge.category==edge['category']).count()  
+        if count > 0:
+            print("can insert edge because of edge already existed")
+            continue
+        else:
+            edgeData = DbKgEdge() 
+            edgeData.src_id = edge['src_id']
+            edgeData.dest_id = edge['dest_id']
+            edgeData.category = edge['category']
+            edgeData.name = edge['name']
+            edgeData.status = edge['status']
+            edgeData.version = edge['version']
+            db.add(edgeData)
+        
+    for edge in edges_invalid:        
+        print("edge invalid %d to %d %s-%s" % (edge['src_id'], edge['dest_id'], edge['category'], edge['name']))
+        db.query(DbKgEdge).filter(DbKgEdge.id == edge['id']).update({'status':edge['status']})   
+    db.commit()
+    return resp_200(data= {"edges": edges, "error_code": 0, "error_msg": ""})
+
+
+################################### EDGE ###############################################
+@router.get("/edges/c/{category}")
+def read_links_by_category(category: str, db: Session = Depends(get_db)):
+    edges_names = db.query(DbKgEdge.category, DbKgEdge.name).group_by(DbKgEdge.category,DbKgEdge.name)
+    if edges_names is None:
+        raise HTTPException(status_code=404, detail="Node not found") 
+    names_list = [KgEdgeName.model_validate(node).model_dump() for node in edges_names]
+    return resp_200(data={"records":names_list})
+
+@router.post("/edge-create", response_model=KgEdge)
+def create_edge(edges: List[KgEdgeCreate], db: Session = Depends(get_db)):
+    try:
+        db_edges = []
+        db_edge_ids = []
+        for edge in edges:
+            if edge.src_id != edge.dest_id:
+                existed_edges = db.query(DbKgEdge).filter(DbKgEdge.graph_id==edge.graph_id, DbKgEdge.src_id==edge.src_id, DbKgEdge.dest_id==edge.dest_id, DbKgEdge.category == edge.category).all()
+                count = 0
+                for ex_edge in existed_edges:
+                    ex_edge.status = 0
+                    db_edge_ids.append(ex_edge.id)
+                    count = count + 1
+                if count == 0:                
+                    db_edges.append(DbKgEdge(**edge.model_dump()))
+                    db_edge_ids.append((edge.src_id, edge.dest_id))
+        error_msg = ""
+        code = 0
+        edges = []
+        db.add_all(db_edges)
+        db.commit()
+        for ids in db_edge_ids:
+            src_id, dest_id = ids
+            db_edges = db.query(DbKgEdge).filter(and_(DbKgEdge.src_id==src_id, DbKgEdge.dest_id==dest_id)).all()
+            for edge in db_edges:
+                edge_raw = KgEdge.model_validate(edge).model_dump()                
+                edge_raw.pop("src_node", None)
+                edge_raw.pop("dest_node", None)
+                edges.append(edge_raw)
+            
+    except Exception as e:
+        error_msg = str(e)
+        code = -1
+    
+    return resp_200(data= {"error_code": code, "error_msg": error_msg, "edges": edges})
+
+@router.get("/edge-of-node/{node_id}")
+def get_edge_by_node(node_id:int, db: Session = Depends(get_db)):
+    edges_in = db.query(DbKgEdge).filter(or_(DbKgEdge.src_id == node_id, DbKgEdge.dest_id==node_id)).order_by(DbKgEdge.id).all()
+    edges = [KgEdge.model_validate(edge) for edge in edges_in]
+    return resp_200(data={"edges":[edge.model_dump() for edge in edges]})
+
+@router.get("/edge-delete/{edge_id}/{status}")
+def delete_edge(edge_id:int, status:int, db: Session = Depends(get_db)):
+    if status == 0 or status == -1:
+        db.query(DbKgEdge).filter(DbKgEdge.id == edge_id).update({"status": status })
+        db.commit()
+    if status == -99: 
+        #delete
+        db.query(DbKgEdge).filter(DbKgEdge.id == edge_id).delete()
+        db.commit()    
+    
+    return resp_200(data={"id":edge_id,'error_code':0, 'message':'Edge status updated'})
+################################### SUB GRAPH ##########################################
+@router.get("/workspace-load")
+def get_all_sub_graph(db: Session = Depends(get_db)):
+    db_datas = db.query(DbKgSubGraph).filter(DbKgSubGraph.status == 0).all()
+    validate_data = [KgSubGraph.model_validate(data) for data in db_datas]
+    return resp_200(data={"graphs":[data.model_dump() for data in validate_data]})
+
+@router.get("/workspace-get/{graph_id}")
+def sub_graph_load(graph_id: int, db: Session = Depends(get_db)):
+    graph_data = db.query(DbKgSubGraph).filter(DbKgSubGraph.id == graph_id).first()
+    nodes = []
+    edges = []
+    if graph_data:
+        json_data = json.loads(graph_data.graph_content)
+        node_ids = []
+        for node in json_data["nodes"]:
+            node_ids.append(node["id"])
+        nodes_data = db.query(DbKgNode).filter(DbKgNode.id.in_(node_ids)).all()
+        edges_in = db.query(DbKgEdge).filter(DbKgEdge.dest_id.in_(node_ids), DbKgEdge.status==0).all()
+        edges_out = db.query(DbKgEdge).filter(DbKgEdge.src_id.in_(node_ids), DbKgEdge.status==0).all()
+        all_edges = edges_in + edges_out
+        node_ids = []
+        for node in nodes_data:
+            nodes.append(KgNode.model_validate(node))
+            node_ids.append(node.id)
+        for edge in all_edges:
+            if edge.src_id in node_ids and edge.dest_id in node_ids and edge.status >= 0:
+                edges.append(KgEdge.model_validate(edge))
+    
+    return resp_200(data={"nodes":[node.model_dump() for node in nodes],"edges":[edge.model_dump() for edge in edges]})
+        #TODO : retrieve all data and edges
+
+@router.post("/workspace-update")
+def update_sub_graph(data:KgSubGraph, db:Session = Depends(get_db)):
+    db.query(DbKgSubGraph).filter(DbKgSubGraph.id == data.id).update({'graph_name':data.graph_name,'graph_content': data.graph_content})
+    db.commit()
+    return resp_200(data= {"id": data.id, "error_code": 0, "error_msg": ""})
+
+@router.post("/workspace-create")
+def create_sub_graph(data:KgSubGraphCreate,db: Session = Depends(get_db)):
+    count = db.query(DbKgSubGraph).filter(DbKgSubGraph.graph_name == data.graph_name, DbKgSubGraph.status==0).count()    
+    if count > 0:        
+        return resp_200(data= {"id": 0, "error_code": 1, "error_msg": "Graph already existed"})
+    db_data = DbKgSubGraph()
+    db_data.graph_name = data.graph_name
+    db_data.graph_content = data.graph_content
+    db_data.status = data.status
+    
+    db.add(db_data)
+    db.commit()
+    db.refresh(db_data)
+    return resp_200(data= {"id": db_data.id, "error_code": 0, "error_msg": ""})
+
+@router.get("/workspace-delete/{graph_id}")
+def sub_graph_delete(graph_id: int, db: Session = Depends(get_db)):
+    db.query(DbKgSubGraph).filter(DbKgSubGraph.id == graph_id).delete()
+    db.commit()
+    return resp_200(data= {"id": graph_id, "error_code": 0, "error_msg": ""})
+
+
+@router.post("/workspace-validate")
+def sub_graph_validate(data:KgSubGraphCreate, db: Session = Depends(get_db)):
+    graph_data = data.graph_content
+    nodes = []
+    edges = []
+    if graph_data:
+        json_data = json.loads(graph_data)
+        node_ids = []
+        for node in json_data["nodes"]:
+            node_ids.append(node["id"])
+        nodes_data = db.query(DbKgNode).filter(DbKgNode.id.in_(node_ids)).all()
+        edges_in = db.query(DbKgEdge).filter(and_(DbKgEdge.dest_id.in_(node_ids), DbKgEdge.status==0)).order_by(DbKgEdge.id).all()
+        edges_out = db.query(DbKgEdge).filter(and_(DbKgEdge.src_id.in_(node_ids), DbKgEdge.status==0)).order_by(DbKgEdge.id).all()
+        all_edges = edges_in + edges_out
+        node_ids = []
+        edge_ids = []
+        for node in nodes_data:
+            nodes.append(KgNode.model_validate(node))
+            node_ids.append(node.id)
+        for edge in all_edges:
+            if edge.id in edge_ids:
+                continue
+            if edge.src_id in node_ids and edge.dest_id in node_ids:
+                data = KgEdge.model_validate(edge).model_dump()
+                data.pop("src_node", None)
+                data.pop("dest_node", None)
+                edges.append(data)
+                edge_ids.append(edge.id)
+    
+    return resp_200(data={ "error_code": 0, "error_msg": "", "nodes":[node.model_dump() for node in nodes],"edges":edges})
+
+################################### DICT ###############################################
+@router.get("/dict/icd/{page}/{page_size}")
+def read_icd_page(page:int, page_size:int, db:Session=Depends(get_db)):
+    count =  db.query(DbDictICD).count()
+    total_page = ceil(count / page_size)
+    start = 1
+    if page <= total_page:
+        start = (page-1) * page_size
+    
+    results = db.query(DbDictICD).limit(page_size).offset(start).all()   
+    codes = [DictICD.model_validate(node) for node in results]
+    
+    return resp_200(data= {"total":count, "pages": page, "size":page_size, "records":[item.model_dump() for item in codes]})
+
+@router.get("/dict/icd/search/{page}/{page_size}/{name}")
+def search_icd(page:int, page_size:int, name:str, db:Session=Depends(get_db)):
+    count =  db.query(DbDictICD).filter(DbDictICD.icd_name.like("%"+name+"%")).count()
+    total_page = ceil(count / page_size)
+    start = 1
+    if page <= total_page:
+        start = (page-1) * page_size
+    
+    results = db.query(DbDictICD).filter(DbDictICD.icd_name.like("%"+name+"%")).limit(page_size).offset(start).all()   
+    codes = [DictICD.model_validate(node) for node in results]
+    
+    return resp_200(data= {"total":count, "pages": page, "size":page_size, "records":[item.model_dump() for item in codes]})
+
+@router.get("/dict/drg/{page}/{page_size}")
+def get_drg_page(page:int, page_size:int, db:Session=Depends(get_db)):
+    count =  db.query(DbDictDRG).count()
+    total_page = ceil(count / page_size)
+    start = 1
+    if page <= total_page:
+        start = (page-1) * page_size
+    
+    results = db.query(DbDictDRG).limit(page_size).offset(start).all()   
+    codes = [DictDRG.model_validate(node) for node in results]
+    
+    return resp_200(data= {"total":count, "pages": page, "size":page_size, "records":[item.model_dump() for item in codes]})
+
+@router.get("/drg/search/{page}/{page_size}/{name}")
+def search_drg(page:int, page_size:int, name:str, db:Session=Depends(get_db)):
+    count =  db.query(DbDictDRG).filter(DbDictDRG.drg_name.like("%"+name+"%")).count()
+    total_page = ceil(count / page_size)
+    start = 1
+    if page <= total_page:
+        start = (page-1) * page_size
+    
+    results = db.query(DbDictDRG).filter(DbDictDRG.drg_name.like("%"+name+"%")).limit(page_size).offset(start).all()   
+    codes = [DictDRG.model_validate(node) for node in results]
+    
+    return resp_200(data= {"total":count, "pages": page, "size":page_size, "records":[item.model_dump() for item in codes]})
+
+@router.get("/dict/drug/{page}/{page_size}")
+def read_drug_page(page:int, page_size:int, db:Session=Depends(get_db)):
+    count =  db.query(DbDictDrug).count()
+    total_page = ceil(count / page_size)
+    start = 1
+    if page <= total_page:
+        start = (page-1) * page_size
+    
+    results = db.query(DbDictDrug).limit(page_size).offset(start).all()   
+    codes = [DictDrug.model_validate(node) for node in results]
+    
+    return resp_200(data= {"total":count, "pages": page, "size":page_size, "records":[item.model_dump() for item in codes]})
+
+@router.get("/dict/drug/search/{page}/{page_size}/{name}")
+def search_drg(page:int, page_size:int, name:str, db:Session=Depends(get_db)):
+    count =  db.query(DbDictDrug).filter(or_(DbDictDrug.reg_name.like("%"+name+"%"), DbDictDrug.prod_factory.like("%"+name+"%"))).count()
+    total_page = ceil(count / page_size)
+    start = 1
+    if page <= total_page:
+        start = (page-1) * page_size
+    
+    results = db.query(DbDictDrug).filter(or_(DbDictDrug.reg_name.like("%"+name+"%"), DbDictDrug.prod_factory.like("%"+name+"%"))).limit(page_size).offset(start).all()   
+    codes = [DictDrug.model_validate(node) for node in results]
+    
+    return resp_200(data= {"total":count, "pages": page, "size":page_size, "records":[item.model_dump() for item in codes]})
+
+
+
+
+#######################################################################################
+'''
+
+@router.put("/api/nodes/{node_id}", response_model=KgNode)
+def update_node(node_id: int, node: KgNodeCreate, db: Session = Depends(get_db)):
+    db_node = db.query(DbKgNode).filter(DbKgNode.id == node_id).first()
+    if db_node is None:
+        raise HTTPException(status_code=404, detail="Node not found")
+    for key, value in node.dict().items():
+        setattr(db_node, key, value)
+    db.commit()
+    db.refresh(db_node)
+    return db_node
+
+
+
+
+@router.get("/api/edges/{edge_id}", response_model=KgEdge)
+def read_edge(edge_id: int, db: Session = Depends(get_db)):
+    db_edge = db.query(DbKgEdge).filter(DbKgEdge.id == edge_id).first()
+    if db_edge is None:
+        raise HTTPException(status_code=404, detail="Edge not found")
+    return db_edge
+
+
+
+
+@router.put("/api/edges/{edge_id}", response_model=KgEdge)
+def update_edge(edge_id: int, edge: KgEdgeCreate, db: Session = Depends(get_db)):
+    db_edge = db.query(DbKgEdge).filter(DbKgEdge.id == edge_id).first()
+    if db_edge is None:
+        raise HTTPException(status_code=404, detail="Edge not found")
+    for key, value in edge.dict().items():
+        setattr(db_edge, key, value)
+    db.commit()
+    db.refresh(db_edge)
+    return db_edge
+
+@router.delete("/api/edges/{edge_id}", response_model=KgEdge)
+def delete_edge(edge_id: int, db: Session = Depends(get_db)):
+    db_edge = db.query(DbKgEdge).filter(DbKgEdge.id == edge_id).first()
+    if db_edge is None:
+        raise HTTPException(status_code=404, detail="Edge not found")
+    db.delete(db_edge)
+    db.commit()
+    return db_edge
+
+@router.post("/api/props/", response_model=KgProp)
+def create_prop(prop: KgPropCreate, db: Session = Depends(get_db)):
+    db_prop = DbKgProp(**prop.dict())
+    db.add(db_prop)
+    db.commit()
+    db.refresh(db_prop)
+    return db_prop
+
+@router.get("/api/props/{prop_id}", response_model=KgProp)
+def read_prop(prop_id: int, db: Session = Depends(get_db)):
+    db_prop = db.query(models.KgProp).filter(models.KgProp.id == prop_id).first()
+    if db_prop is None:
+        raise HTTPException(status_code=404, detail="Property not found")
+    return db_prop
+
+@router.put("/api/props/{prop_id}", response_model=KgProp)
+def update_prop(prop_id: int, prop: KgPropCreate, db: Session = Depends(get_db)):
+    db_prop = db.query(models.KgProp).filter(models.KgProp.id == prop_id).first()
+    if db_prop is None:
+        raise HTTPException(status_code=404, detail="Property not found")
+    for key, value in prop.dict().items():
+        setattr(db_prop, key, value)
+    db.commit()
+    db.refresh(db_prop)
+    return db_prop
+
+@router.delete("/api/props/{prop_id}", response_model=KgProp)
+def delete_prop(prop_id: int, db: Session = Depends(get_db)):
+    db_prop = db.query(models.KgProp).filter(models.KgProp.id == prop_id).first()
+    if db_prop is None:
+        raise HTTPException(status_code=404, detail="Property not found")
+    db.delete(db_prop)
+    db.commit()
+    return db_prop
+'''
+graph_mgr_router = router

+ 303 - 0
agent/router/graph_network_router.py

@@ -0,0 +1,303 @@
+import sys,os
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+from fastapi import APIRouter, Depends, Query
+from typing import Optional, List
+
+
+from agent.cdss.libs.cdss_helper import CDSSHelper
+from web.models.response import StandardResponse
+from web.models.request import GraphFilterRequest
+
+router = APIRouter(prefix="/graph", tags=["Knowledge Graph"])
+graph_helper = CDSSHelper()
+
+@router.get("/nodes/search", response_model=StandardResponse)
+async def search_nodes(
+    keyword: str = Query(..., min_length=2),
+    limit: int = Query(10, ge=1, le=100),
+    node_type: Optional[str] = Query(None),
+    min_degree: Optional[int] = Query(None)
+):
+    """
+    根据关键词和属性过滤条件搜索图谱节点
+    """
+    try:
+        results = graph_helper.node_search(
+            keyword, 
+            limit=limit,
+            node_type=node_type,
+            min_degree=min_degree
+        )
+        community_report_results = graph_helper.community_report_search(keyword)
+        return StandardResponse(
+            success=True,
+            records={"nodes":results,"community_report":community_report_results},
+            code = 0,
+            error_msg=f"Found {len(results)} nodes"
+        )
+    except Exception as e:
+        return StandardResponse(
+            success=False,
+            message=str(e)
+        )
+@router.get("/nodes/neighbor_search", response_model=StandardResponse)
+async def neighbor_search(
+    keyword: str = Query(..., min_length=2),
+    limit: int = Query(10, ge=1, le=100),
+    node_type: Optional[str] = Query(None),
+    neighbor_type: Optional[str] = Query(None),
+    min_degree: Optional[int] = Query(None)
+):
+    """
+    根据关键词和属性过滤条件搜索图谱节点
+    """
+    try:
+        scores_factor = 1.7
+        results = []
+        diseases = {}
+        # results = graph_helper.node_search(
+        #     keyword, 
+        #     limit=limit,
+        #     node_type=node_type,
+        #     min_degree=min_degree
+        # )
+        print("搜索的结果数量:",len(results))        # 检查是否有结果返回,没有则进行关键词拆分搜索(仅针对症状),并检查拆分后的结果是否有结果返回,没有则返回空结果。如果有结果返回,则返回结果。(仅针对症状),并检查拆分后的结果是否有结果返回,没有则返回空结果。如果有结果返回,则返回结果。
+        has_good_result = False
+        # new_results = []
+        # for item in results:
+        #     if item["score"] > scores_factor:
+        #         has_good_result = True
+        #         new_results.append(item)
+        #results = new_results
+        print("通过相似度过滤之后剩余的数量:",len(results))
+        if not has_good_result:
+            keywords = keyword.split(" ")
+            new_results = []
+            for item in keywords:
+                if len(item) > 1:
+                    results = graph_helper.node_search(
+                        item,
+                        limit=limit,
+                        node_type=node_type,
+                        min_degree=min_degree
+                    )
+        
+                    for result_item in results:
+                        if result_item["score"] > scores_factor:
+                            new_results.append(result_item)
+                            if result_item["type"] == "Disease":                                           
+                                if result_item["id"] not in diseases:
+                                    diseases[result_item["id"]] =  {
+                                                        "id":result_item["id"],
+                                                        "type":result_item["type"],
+                                                        "count":1
+                                                    }
+                                else:
+                                    diseases[result_item["id"]]["count"] = diseases[result_item["id"]]["count"] + 1
+                                has_good_result = True
+            results = new_results
+            print("扩展搜索的结果数量:",len(results))
+            
+        neighbors_data = {}
+        
+        for item in results:                        
+            entities, relations = graph_helper.neighbor_search(item["id"], 1)    
+            max = 20 #因为类似发热这种疾病会有很多关联的疾病,所以需要防止检索范围过大,设置了上限       
+            for neighbor in entities:                                   
+                if neighbor["type"] == neighbor_type:    
+                    #如果这里正好找到了要求检索的节点类型                
+                    if neighbor["id"] not in neighbors_data:
+                        neighbors_data[neighbor["id"]] =  {
+                                            "id":neighbor["id"],
+                                            "type":neighbor["type"],
+                                            "count":1
+                                        }
+                    else:
+                         neighbors_data[neighbor["id"]]["count"] = neighbors_data[neighbor["id"]]["count"] + 1
+                else:
+                    #如果这里找到的节点是个疾病,那么就再检索一层,看看是否有符合要求的节点类型   
+                    if neighbor["type"] == "Disease":   
+                        if neighbor["id"] not in diseases:
+                            diseases[neighbor["id"]] =  {
+                                                "id":neighbor["id"],
+                                                "type":neighbor["type"],
+                                                "count":1
+                                            }
+                        else:
+                            diseases[neighbor["id"]]["count"] = diseases[neighbor["id"]]["count"] + 1            
+                        disease_entities, relations = graph_helper.neighbor_search(neighbor["id"], 1)                            
+                        for disease_neighbor in disease_entities:
+                            #有时候数据中会出现链接有的节点,但是节点数据中缺失的情况,所以这里要检查
+                            if "type" in disease_neighbor.keys():                                                                
+                                if disease_neighbor["type"] == neighbor_type:
+                                    if disease_neighbor["id"] not in neighbors_data:
+                                        neighbors_data[disease_neighbor["id"]] = {
+                                            "id":disease_neighbor["id"],
+                                            "type":disease_neighbor["type"],
+                                            "count":1
+                                        }
+                                    else:
+                                        neighbors_data[disease_neighbor["id"]]["count"] = neighbors_data[disease_neighbor["id"]]["count"] + 1
+                        #最多搜索的范围是max个疾病
+                        max = max - 1
+                        if max == 0:
+                            break
+        disease_data = [diseases[k] for k in diseases]
+        disease_data = sorted(disease_data, key=lambda x:x["count"],reverse=True)
+        data = [neighbors_data[k] for k in neighbors_data]                   
+        data = sorted(data, key=lambda x:x["count"],reverse=True)     
+        
+        if len(data) > 10:
+            data = data[:10] 
+            factor = 1.0
+            total = 0.0
+            for item in data:
+                total = item["count"] * factor + total
+            for item in data:
+                item["count"] = item["count"] / total
+            factor = factor * 0.9
+        
+        if len(disease_data) > 10:
+            disease_data = disease_data[:10] 
+            factor = 1.0
+            total = 0.0
+            for item in disease_data:
+                total = item["count"] * factor + total
+            for item in disease_data:
+                item["count"] = item["count"] / total
+            factor = factor * 0.9
+        return StandardResponse(
+            success=True,
+            records={"nodes":disease_data,"neighbors":data},
+            error_code = 0,
+            error_msg=f"Found {len(results)} nodes"
+        )
+    except Exception as e:
+        return StandardResponse(
+            success=False,
+            error_code=500,
+            error_msg=str(e)
+        )
+
+
+@router.post("/nodes/filter", response_model=StandardResponse)
+async def filter_nodes(request: GraphFilterRequest):
+    """
+    根据复杂条件过滤节点
+    """
+    try:
+        results = graph_helper.filter_nodes(
+            node_types=request.node_types,
+            min_degree=request.min_degree,
+            min_community_size=request.min_community_size,
+            attributes=request.attributes
+        )
+        return StandardResponse(
+            success=True,
+            data={"nodes": results},
+            message=f"Filtered {len(results)} nodes"
+        )
+    except Exception as e:
+        return StandardResponse(
+            success=False,
+            message=str(e)
+        )
+
+@router.get("/statistics", response_model=StandardResponse)
+async def get_graph_statistics():
+    """
+    获取图谱统计信息
+    """
+    try:
+        stats = graph_helper.get_graph_statistics()
+        return StandardResponse(
+            success=True,
+            data=stats,
+            message="Graph statistics retrieved"
+        )
+    except Exception as e:
+        return StandardResponse(
+            success=False,
+            message=str(e)
+        )
+
+@router.get("/community/{community_id}", response_model=StandardResponse)
+async def get_community_details(
+    community_id: int,
+    min_size: int = Query(3, ge=2)
+):
+    """
+    获取指定社区的详细信息
+    """
+    try:
+        community_info = graph_helper.get_community_details(community_id, min_size)
+        return StandardResponse(
+            success=True,
+            data=community_info,
+            message="Community details retrieved"
+        )
+    except Exception as e:
+        return StandardResponse(
+            success=False,
+            message=str(e)
+        )
+
+
+# # 新增图谱路径分析接口  
+# @router.post("/path-analysis", response_model=StandardResponse)
+# async def analyze_paths(request: GraphSearchRequest):
+#     """
+#     分析节点间的潜在关系路径
+#     """
+#     try:
+#         paths = graph_helper.find_paths(
+#             source_id=request.source_id,
+#             target_id=request.target_id,
+#             max_depth=request.max_depth
+#         )
+#         return StandardResponse(
+#             success=True,
+#             data={"paths": paths},
+#             message=f"Found {len(paths)} possible paths"
+#         )
+#     except Exception as e:
+#         return StandardResponse(
+#             success=False,
+#             message=str(e)
+#         )
+
+
+
+@router.get("/node/{node_id}", response_model=StandardResponse)
+async def get_node_details(
+    node_id: str,
+    with_relations: bool = False,
+    relation_types: List[str] = Query(None),
+    relation_limit: int = Query(10, ge=1, le=100)
+):
+    """
+    获取节点详细信息
+    - relation_types: 过滤指定类型的关系
+    - relation_limit: 返回关系数量限制
+    """
+    try:
+        node_info = graph_helper.get_node_details(
+            node_id, 
+            include_relations=with_relations,
+            relation_types=relation_types,
+            relation_limit=relation_limit
+        )
+        return StandardResponse(
+            success=True,
+            data=node_info,
+            message="Node details retrieved"
+        )
+    except ValueError as e:
+        return StandardResponse(
+            success=False,
+            message=str(e)
+        )
+
+graph_router = router

+ 106 - 0
agent/router/kb_router.py

@@ -0,0 +1,106 @@
+import sys,os
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+from config.site import SiteConfig
+from fastapi import APIRouter, Depends, Query
+from db.database import get_db
+from sqlalchemy.orm import Session
+from agent.models.web.response import StandardResponse,FAILED,SUCCESS
+from agent.models.web.request import BasicRequest
+from agent.libs.graph import GraphBusiness
+from agent.libs.auth import verify_session_id, SessionValues
+import logging
+
+router = APIRouter(prefix="/kb", tags=["knowledge build interface"])
+logger = logging.getLogger(__name__)
+config = SiteConfig()
+
+LOG_DIR = config.get_config("TASK_LOG_DIR", current_path)
+
+    # job_category = Column(String(64), nullable=False)
+    # job_name = Column(String(64))
+    # job_details = Column(Text, nullable=False)
+    # job_creator = Column(String(64), nullable=False)
+    # job_logs = Column(Text, nullable=True)
+    # job_files = Column(String(300), nullable=True)
+@router.post('/summary', response_model=StandardResponse)
+def summary_func(request:BasicRequest, db: Session = Depends(get_db), sess:SessionValues = Depends(verify_session_id))->StandardResponse:
+    if request.action != "get_summary":
+        return StandardResponse(code=FAILED, message="invalid action")
+    graph_id = request.get_param("graph_id",0)
+    biz = GraphBusiness(db)
+    summary = biz.get_graph_summary(graph_id=graph_id)
+    if summary:
+        logger.info(summary)
+        return StandardResponse(code=SUCCESS, message="summary found", records=[summary])
+    else:
+        return StandardResponse(code=FAILED, message="summary not found",records=[])
+
+@router.post('/schemas', response_model=StandardResponse)
+def schemas_func(request:BasicRequest, db: Session = Depends(get_db), sess:SessionValues = Depends(verify_session_id))->StandardResponse:
+    if request.action== "get_nodes_schemas":
+        graph_id = request.get_param("graph_id",0)
+        biz = GraphBusiness(db)
+        schemas = biz.get_nodes_categories(graph_id=graph_id)
+        if schemas:
+            return StandardResponse(code=SUCCESS, message="schemas found", records=schemas)
+    if request.action== "get_edges_schemas":
+        graph_id = request.get_param("graph_id",0)
+        biz = GraphBusiness(db)
+        schemas = biz.get_edges_categories(graph_id=graph_id)
+        if schemas:
+            return StandardResponse(code=SUCCESS, message="schemas found", records=schemas)
+    return StandardResponse(code=FAILED, message="invalid action")
+
+@router.post('/nodes', response_model=StandardResponse)
+def nodes_func(request:BasicRequest, db: Session = Depends(get_db), sess:SessionValues = Depends(verify_session_id))->StandardResponse:    
+    if (request.action == "search_nodes"):
+        node_name = request.get_param("name","")
+        category = request.get_param("category","")
+        graph_id = request.get_param("graph_id",0)   
+        biz = GraphBusiness(db)   
+        if node_name == "":
+            return StandardResponse(code=FAILED, message="node name is empty", records=[])
+        if category == "":
+            return StandardResponse(code=FAILED, message="category is empty", records=[])
+        if graph_id == 0:
+            return StandardResponse(code=FAILED, message="graph id is empty", records=[])
+        
+        nodes = biz.search_like_node_by_name(graph_id=graph_id, category=category, name=node_name)
+        if nodes:
+            return StandardResponse(code=SUCCESS, message="nodes found", records=nodes)
+        
+        else:
+            return StandardResponse(code=FAILED, message="search job failed")
+    elif (request.action == "get_nodes"):
+        graph_id = request.get_param("graph_id",0)
+        page = request.get_param("page",1)
+        page_size = request.get_param("page_size",1)
+        biz = GraphBusiness(db)
+        nodes = biz.get_nodes_by_page(graph_id=graph_id, page=page, page_size=page_size)
+        if nodes:
+            return StandardResponse(code=SUCCESS, message="nodes found", records=nodes)
+    elif (request.action == "neighbors"):
+        graph_id = request.get_param("graph_id",0)
+        node_id = request.get_param("node_id",0)
+        if node_id>0:
+            biz = GraphBusiness(db)
+            node = biz.get_node_by_id(graph_id=graph_id, node_id=node_id)
+            if node is None:
+                return StandardResponse(code=FAILED, message="node not found", records=[])
+            nodes_in = biz.get_neighbors(graph_id=graph_id, node_id=node_id, direction="in")
+            nodes_out = biz.get_neighbors(graph_id=graph_id, node_id=node_id, direction="out")
+            nodes_all = []
+            nodes_all.append({"id": node.id, "name":node.name, "category":node.category, "direction":"self"})
+            for node in nodes_in:
+                nodes_all.append({"id": node.id, "name":node.name, "category":node.category, "direction":"in"})
+            for node in nodes_out:
+                nodes_all.append({"id": node.id, "name":node.name, "category":node.category, "direction":"out"})
+            
+            return StandardResponse(code=SUCCESS, message="nodes found", records=nodes_all)
+    return StandardResponse(code=FAILED, message="invalid action")
+        
+        
+        
+kb_router = router

+ 247 - 0
agent/router/task_router.py

@@ -0,0 +1,247 @@
+import sys,os
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+from config.site import SiteConfig
+from fastapi import APIRouter, Depends, Query
+from db.database import get_db
+from sqlalchemy.orm import Session
+from agent.models.web.response import StandardResponse,FAILED,SUCCESS
+from agent.models.web.request import BasicRequest
+from agent.libs.agent import AgentBusiness
+from agent.libs.auth import verify_session_id, SessionValues
+import logging
+import json
+
+router = APIRouter(prefix="/agent", tags=["agent job interface"])
+logger = logging.getLogger(__name__)
+config = SiteConfig()
+
+LOG_DIR = config.get_config("TASK_LOG_DIR", current_path)
+
+    # job_category = Column(String(64), nullable=False)
+    # job_name = Column(String(64))
+    # job_details = Column(Text, nullable=False)
+    # job_creator = Column(String(64), nullable=False)
+    # job_logs = Column(Text, nullable=True)
+    # job_files = Column(String(300), nullable=True)
+
+@router.post('/job', response_model=StandardResponse)
+def submit_job(request:BasicRequest, db: Session = Depends(get_db), sess:SessionValues = Depends(verify_session_id))->StandardResponse:
+    logger.info("recieve request: " + request.action)
+    if (request.action == "create_job"):
+        job_name = request.get_param("job_name")
+        job_category = request.get_param("job_category","")
+        job_details = request.get_param("job_details","")
+        job_creator = request.get_param("job_creator","")        
+        job_creator = f"{sess.full_name}/{sess.user_id}"
+        biz = AgentBusiness(db)
+        job = biz.create_job(job_category=job_category, job_name=job_name, job_details=job_details, job_creator=job_creator)
+        if job:
+            if request.get_param("queue_category", None) and request.get_param("queue_name", None):
+                queue_category = request.get_param("queue_category")
+                queue_name = request.get_param("queue_name")
+                logger.info(f"put job to queue: {queue_category} {queue_name} ")
+                queue = biz.get_queue(queue_category=queue_category, queue_name=queue_name, create_if_not_exist=True)
+                if queue:
+                    qjob = biz.put_job(queue=queue, job=job)
+                    if qjob:
+                        logger.info(f"job created and put to queue: {job.id} {job.job_name} {job.job_category} {job.job_details} {job.job_creator}")
+                        return StandardResponse(code=SUCCESS, message="job created and put to queue", records=[job])
+            logger.info(f"job created: {job.id} {job.job_name} {job.job_category} {job.job_details} {job.job_creator}")
+            return StandardResponse(code=SUCCESS, message="job created", records=[job])
+        else:
+            return StandardResponse(code=FAILED, message="job creation failed")
+    elif (request.action == "get_job"):
+        job_id = request.get_param("job_id")
+        biz = AgentBusiness(db)
+        job = biz.get_job(job_id)
+        if job:
+            return StandardResponse(code=SUCCESS, message="job found", records=[job])
+        else:
+            return StandardResponse(code=FAILED, message="job not found")
+    elif (request.action == "update_job"):
+        job_id = request.get_param("job_id")
+        job_name = request.get_param("job_name")
+        job_category = request.get_param("job_category")
+        job_details = request.get_param("job_details")
+        #job_creator = request.get_param("job_creator")
+        #job_logs = request.get_param("job_logs")
+        job_files = request.get_param("job_files")
+        status = request.get_param("status")
+        biz = AgentBusiness(db)
+        job = biz.update_job(job_id, job_name=job_name, job_category=job_category, job_details=job_details, job_creator=job_creator, job_files=job_files, status=status)
+        if job:
+            logger.info(f"job updated: {job.id} {job.job_name} {job.job_category} {job.job_details} {job.job_creator}")
+            return StandardResponse(code=SUCCESS, message="job updated", records=[job])
+        else:
+            return StandardResponse(code=FAILED, message="job update failed")
+    elif (request.action == "update_job_status"):
+        job_id = request.get_param("job_id")
+        status = request.get_param("status")
+        biz = AgentBusiness(db)
+        job = biz.get_job(job_id)
+        if job:
+            job = biz.append_job_logs(job, f"status updated from {job.status} to {status}")
+            job = biz.update_job(job_id, status=status,job_logs = job.job_logs)
+            if job:
+                logger.info(f"job status updated: {job.id} {job.job_name} {job.job_category} {job.job_details} {job.job_creator}")
+                return StandardResponse(code=SUCCESS, message="job status updated", records=[job])
+            else:
+                return StandardResponse(code=FAILED, message="job status update failed",records=[])
+        else:
+            return StandardResponse(code=FAILED, message="job not found",records=[])
+    elif (request.action == "append_job_logs"):
+        job_id = request.get_param("job_id")
+        job_logs = request.get_param("job_logs")
+        biz = AgentBusiness(db) 
+        job = biz.get_job(job_id)
+        job = biz.append_job_logs(job, job_logs)
+        job = biz.update_job(job_id, job_logs=job.job_logs)
+        if job:
+            logger.info(f"job logs appended: {job.id} {job.job_name} {job.job_category} {job.job_details} {job.job_creator}")
+            return StandardResponse(code=SUCCESS, message="job logs appended", records=[job])
+        else:
+            return StandardResponse(code=FAILED, message="job logs append failed", records=[])
+    elif (request.action == "delete_job"):
+        job_id = request.get_param("job_id")
+        biz = AgentBusiness(db)
+        job = biz.get_job(job_id)
+        if job:
+            biz.delete_job_in_any_queue(job=job)
+        job = biz.delete_job(job_id)
+        if job:
+            return StandardResponse(code=SUCCESS, message="job deleted", records=[])
+        else:
+            return StandardResponse(code=FAILED, message="job delete failed")
+    elif (request.action == "get_jobs"):
+        job_category = request.get_param("job_category")
+        job_creator = request.get_param("job_creator")
+        biz = AgentBusiness(db)
+        jobs = biz.get_jobs(job_category=job_category, job_creator=job_creator)
+        if jobs:
+            return StandardResponse(code=SUCCESS, message="jobs found", records=jobs)
+        else:
+            return StandardResponse(code=FAILED, message="jobs not found")
+    elif (request.action == "put_job"):
+        job_id = request.get_param("job_id")
+        queue_category = request.get_param("queue_category")
+        queue_name = request.get_param("queue_name")
+        
+        biz = AgentBusiness(db)
+        queue = biz.get_queue(queue_category=queue_category, queue_name=queue_name, create_if_not_exist=True)
+        job = biz.get_job(job_id = job_id)
+        if queue and job:
+            job = biz.put_job(queue=queue, job=job)
+            if job:
+                return StandardResponse(code=SUCCESS, message="job put to queue", records=[job])  
+            else:
+                return StandardResponse(code=FAILED, message="job put to queue failed")
+        else:
+            return StandardResponse(code=FAILED, message="queue or job not found")
+    return StandardResponse(code=FAILED, message="invalid action")
+        
+@router.post('/queue', response_model=StandardResponse)
+def submit_queue(request:BasicRequest, db: Session = Depends(get_db), sess:SessionValues = Depends(verify_session_id))->StandardResponse:      
+    if (request.action == "put_job"):
+        job_id = request.get_param("job_id")
+        queue_category = request.get_param("queue_category")
+        queue_name = request.get_param("queue_name")
+        
+        biz = AgentBusiness(db)
+        queue = biz.get_queue(queue_category=queue_category, queue_name=queue_name, create_if_not_exist=True)
+        job = biz.get_job(job_id = job_id)
+        if queue and job:
+            job = biz.put_job(queue=queue, job=job)
+            if job:
+                return StandardResponse(code=SUCCESS, message="job put to queue", records=[job])  
+            else:
+                return StandardResponse(code=FAILED, message="job put to queue failed")
+        else:
+            return StandardResponse(code=FAILED, message="queue or job not found")
+    elif (request.action == "create_queue"):
+        queue_category = request.get_param("queue_category")
+        queue_name = request.get_param("queue_name")
+        biz = AgentBusiness(db)
+        queue = biz.get_queue(queue_category=queue_category, queue_name=queue_name)
+        if queue:
+            return StandardResponse(code=SUCCESS, message="queue found", records=[queue])
+        queue = biz.create_queue(queue_category=queue_category, queue_name=queue_name)
+        if queue:
+            return StandardResponse(code=SUCCESS, message="queue created", records=[queue])
+        else:
+            return StandardResponse(code=FAILED, message="queue creation failed")
+    elif (request.action == "get_queue"):
+        queue_category = request.get_param("queue_category")
+        queue_name = request.get_param("queue_name")
+        biz = AgentBusiness(db)
+        queue = biz.get_queue(queue_category=queue_category, queue_name=queue_name)
+        if queue:
+            return StandardResponse(code=SUCCESS, message="queue found", records=[queue])
+        else:
+            return StandardResponse(code=FAILED, message="queue not found")
+    elif (request.action == "get_queue_summary"):
+        biz = AgentBusiness(db)
+        queues = biz.get_queues_summary()
+        if queues:
+            return StandardResponse(code=SUCCESS, message="queues summary", records=queues)
+    elif (request.action == "update_queue"):
+        queue_category = request.get_param("queue_category")
+        queue_name = request.get_param("queue_name")
+        biz = AgentBusiness(db)
+        queue = biz.update_queue(queue_category=queue_category, queue_name=queue_name)
+        if queue:
+            return StandardResponse(code=SUCCESS, message="queue updated", records=[queue])
+        else:
+            return StandardResponse(code=FAILED, message="queue update failed")
+    elif (request.action == "delete_queue"):
+        queue_category = request.get_param("queue_category")
+        queue_name = request.get_param("queue_name")
+        biz = AgentBusiness(db)
+        queue = biz.delete_queue(queue_category=queue_category, queue_name=queue_name)
+        if queue:
+            return StandardResponse(code=SUCCESS, message="queue deleted", records=[])
+        else:
+            return StandardResponse(code=FAILED, message="queue delete failed")
+    elif (request.action == "get_queues"):
+        queue_category = request.get_param("queue_category")
+        biz = AgentBusiness(db)
+        queues = biz.get_queues(queue_category=queue_category)
+        if queues:
+            return StandardResponse(code=SUCCESS, message="queues found", records=queues)
+        else:
+            return StandardResponse(code=FAILED, message="queues not found")
+    elif (request.action == "get_jobs"):
+        queue_category = request.get_param("queue_category")
+        queue_name = request.get_param("queue_name")
+        page_size = request.get_param("page_size",10)
+        page = request.get_param("page",1)
+        
+        biz = AgentBusiness(db)
+        queue = biz.get_queue(queue_category=queue_category, queue_name=queue_name)
+        total = biz.get_queue_jobs_count(queue=queue)
+        limit = page_size
+        offset = (page - 1) * page_size
+        jobs = biz.get_queue_jobs(queue=queue, limit=limit, offset=offset)
+        if len(jobs)>=0:
+            pages = total // limit + 1 if total % limit > 0 else total // limit
+            return StandardResponse(code=SUCCESS, message="jobs found", meta={"page":page,"pages":pages,"total":total}, records=jobs)
+        else:
+            return StandardResponse(code=SUCCESS, message="jobs not found", records=[])
+    elif (request.action == "delete_job"):
+        queue_category = request.get_param("queue_category")
+        queue_name = request.get_param("queue_name")
+        job_id = request.get_param("job_id")
+        biz = AgentBusiness(db)
+        queue = biz.get_queue(queue_category=queue_category, queue_name=queue_name)
+        job = biz.get_job(job_id = job_id)
+        result = biz.delete_queue_job(queue, job)
+        if result:
+            return StandardResponse(code=SUCCESS, message="job deleted", records=[])
+        else:
+            return StandardResponse(code=FAILED, message="job delete failed")
+    return StandardResponse(code=FAILED, message="invalid action")
+        
+        
+        
+task_router = router

+ 87 - 0
agent/router/user_router.py

@@ -0,0 +1,87 @@
+import sys,os
+import uuid
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+from config.site import SiteConfig
+from fastapi import APIRouter, Depends, Query
+from db.database import get_db
+from sqlalchemy.orm import Session
+from agent.models.web.response import StandardResponse,FAILED,SUCCESS
+from agent.models.web.request import BasicRequest
+from agent.libs.user import UserBusiness,SessionBusiness
+import logging
+
+router = APIRouter(prefix="/user", tags=["agent job interface"])
+logger = logging.getLogger(__name__)
+config = SiteConfig()
+
+@router.post("/session", response_model=StandardResponse)
+def register(request: BasicRequest, db: Session = Depends(get_db)):
+    if request.action == 'register':
+        biz = UserBusiness(db)
+        request_username = request.get_param("username", "")
+        request_password = request.get_param("password", "")
+        user = biz.get_user_by_username(request_username)
+        if user is not None:
+            return StandardResponse(code=FAILED, message="user already exists")
+        user = biz.create_user(request_username, request_password)
+        if user is None:
+            return StandardResponse(code=FAILED, message="create user failed")
+        return StandardResponse(code=SUCCESS, message="create user success")
+    elif request.action =='login':
+        request_username = request.get_param("username", "")
+        request_password = request.get_param("password", "")
+        logger.info(f"login: {request_username} {request_password}")
+        biz = UserBusiness(db)
+        user = biz.get_user_by_username(request_username)
+        if user is None:
+            return StandardResponse(code=FAILED, message="user not exists")
+        if not biz.verify_password(request_password, user.hashed_password):
+            return StandardResponse(code=FAILED, message="password error")
+
+        session = SessionBusiness(db)
+        old_session = session.get_session_by_user_id(user.id)
+        if old_session is not None:
+            logger.info("delete old session")
+            session.delete_session(old_session.session_id)
+            
+        logger.info("create new session")
+        new_session = session.create_session(user)
+        return StandardResponse(code=SUCCESS, message="login success", records=[new_session])
+    elif request.action == "login_session":
+        session_id = request.get_param("session_id", "")
+        session = SessionBusiness(db)
+        old_session = session.get_session(session_id)
+        if old_session is None:
+            return StandardResponse(code=FAILED, message="session not exists")
+        return StandardResponse(code=SUCCESS, message="login success", records=[old_session])
+    elif request.action == "logout":
+        session_id = request.get_param("session_id", "")
+        session = SessionBusiness(db)
+        session.delete_session(session_id)
+        return StandardResponse(code=SUCCESS, message="logout success")
+
+@router.get("/logout/{session_id}", response_model=StandardResponse)
+def logout(session_id: str, db: Session = Depends(get_db)):
+    session = SessionBusiness(db)
+    session.delete_session(session_id)
+    return StandardResponse(code=SUCCESS, message="logout success")
+
+@router.post("/signin", response_model=StandardResponse)
+def signin(request: BasicRequest, db: Session = Depends(get_db)):
+    if request.action == 'signin':
+        biz = UserBusiness(db)
+        request_username = request.get_param("username", "")
+        request_password = request.get_param("password", "")
+        request_fullname = request.get_param("full_name", "")
+        request_email = request.get_param("email", "")
+        user = biz.get_user_by_username(request_username)
+        if user is not None:
+            return StandardResponse(code=FAILED, message="用户名已存在")
+        user = biz.create_user(request_username, request_password, request_fullname, request_email)
+        if user is None:
+            return StandardResponse(code=FAILED, message="创建用户失败")
+        return StandardResponse(code=SUCCESS, message="成功创建用户,请继续登录")
+    return StandardResponse(code=FAILED, message="invalid action")
+user_router = router

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 33 - 0
agent/sample.md


+ 66 - 0
agent/server.py

@@ -0,0 +1,66 @@
+import sys,os
+current_path = os.getcwd()
+sys.path.append(current_path+"\\web")
+sys.path.append(current_path+"\\agent")
+
+import json
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+def save_api_spec(app: FastAPI):
+    """
+    保存 FastAPI 应用的 OpenAPI 规范到文件中。
+    """
+    from fastapi.openapi.utils import get_openapi
+    import yaml
+    openapi_schema = get_openapi(
+        title="FastAPI - Swagger UI",
+        version="1.0.0",
+        routes=app.routes
+    )
+    print(app.routes)
+    # 将 JSON 转换为 YAML
+    yaml_data = yaml.dump(openapi_schema, sort_keys=False)
+
+    # 保存为 YAML 文件
+    with open("openapi.yaml", "w") as f:
+        f.write(yaml_data)
+
+    print("OpenAPI YAML 文件已生成:openapi.yaml")
+    
+import logging
+logging.basicConfig(level=logging.INFO)
+
+handler = logging.FileHandler('/app/logs/api-server.log', mode='w',encoding="utf-8")
+handler.setLevel(logging.INFO)
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+handler.setFormatter(formatter)
+logging.getLogger().addHandler(handler)
+app = FastAPI()
+# 允许所有来源的跨域请求
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+from router.task_router import task_router
+app.include_router(task_router)
+
+from router.file_router import file_router
+app.include_router(file_router)
+
+from router.user_router import user_router
+app.include_router(user_router)
+
+# from router.graph_mgr_router import graph_mgr_router
+# app.include_router(graph_mgr_router)
+
+from router.kb_router import kb_router
+app.include_router(kb_router)
+#from router.graph_router import graph_router
+#app.include_router(graph_router)
+
+#app.include_router(dify_kb_router)
+save_api_spec(app)

+ 28 - 0
agent/test.py

@@ -0,0 +1,28 @@
+from cdss.capbility import CDSSCapability
+from cdss.models.schemas import CDSSInput, CDSSOutput, CDSSInt
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+handler = logging.FileHandler('cdss.log', mode='w',encoding="utf-8")
+handler.setLevel(logging.INFO)
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+handler.setFormatter(formatter)
+logging.getLogger().addHandler(handler)
+
+capability = CDSSCapability()
+
+record = CDSSInput(
+    pat_age=CDSSInt(type="month", value=21), 
+    pat_sex=CDSSInt(type="sex", value=1),
+    chief_complaint=["嗓子疼", "流鼻涕", "打喷嚏", "低烧", "全身没力气"]
+    #chief_complaint=["腹痛", "发热", "腹泻"],
+    )
+
+output = capability.process(input=record)
+for item in output.diagnosis.value:
+    print(f"DIAG {item}  {output.diagnosis.value[item]} ")
+for item in output.checks.value:
+    print(f"CHECK {item}  {output.checks.value[item]} ")
+for item in output.drugs.value:
+    print(f"DRUG {item}  {output.drugs.value[item]} ")

+ 145 - 0
agent/test/test_task_router.py

@@ -0,0 +1,145 @@
+import pytest
+import requests
+
+BASE_URL = "http://localhost:8000"
+_job_id = 0
+def create_job():
+    action = {
+        "id": "1",
+        "action": "create_job",
+        "params": [
+            {"name": "job_name", "value": "Test Job"},
+            {"name": "job_category", "value": "Test Category"},
+            {"name": "job_details", "value": "test details"},
+            {"name": "job_creator", "value": "tester"},
+            ]
+    }
+    response = requests.post(BASE_URL+"/agent/job", json=action)
+    assert response.status_code == 200
+    data = response.json()
+    assert "code" in data
+    assert data["code"] == 200
+    return data['records'][0]['id']
+def delete_job(job_id):
+    action = {
+        "id": "1",
+        "action": "delete_job",
+        "params": [
+            {"name": "job_id", "value": job_id},
+            ]
+    }
+    response = requests.post(BASE_URL+"/agent/job", json=action)
+    assert response.status_code == 200
+
+def create_queue(queue_category, queue_name):
+    action = {
+        "id": "1",
+        "action": "create_queue",
+        "params": [
+            {"name": "queue_category", "value": queue_category},
+            {"name": "queue_name", "value": queue_name},
+            ]
+    }
+    response = requests.post(BASE_URL+"/agent/queue", json=action)
+    assert response.status_code == 200
+def test_create_job():
+    global _job_id
+    job_id = create_job()
+    _job_id = job_id
+    assert job_id > 0
+    
+
+def test_create_queue():
+    create_queue("SYSTEM", "OCR")
+    create_queue("SYSTEM", "OCR_RESULTS")
+def test_get_job():    
+    global _job_id
+    job_id = _job_id
+    action = {
+        "id": "1",
+        "action": "get_job",
+        "params": [
+            {"name": "job_id", "value": f"{job_id}"},
+            {"name": "job_category", "value": "Test Category"},
+            {"name": "job_details", "value": "test details"},
+            {"name": "job_creator", "value": "tester"},
+            ]
+    }
+    response = requests.post(BASE_URL+"/agent/job", json=action)
+    assert response.status_code == 200
+    data = response.json()
+    assert "code" in data
+    assert data["code"] == 200
+    assert data['records'][0]['job_name'] == "Test Job"
+    assert data['records'][0]['job_category'] == "Test Category"
+
+def test_queue_put_job():
+    global _job_id
+    job_id = _job_id
+    action = {
+        "id": "1",
+        "action": "put_job",
+        "params": [
+            {"name": "job_id", "value": f"{job_id}"},
+            {"name": "queue_category", "value": "Test Queue Category"},
+            {"name": "queue_name", "value": "Test Queue Name"},
+        ]
+    }
+    response = requests.post(BASE_URL+"/agent/queue", json=action)
+    assert response.status_code == 200
+    data = response.json()
+    assert "code" in data
+    assert data["code"] == 200
+    
+def test_queue_get_jobs():
+    global _job_id
+    action = {
+        "id": "1",
+        "action": "get_jobs",
+        "params": [
+            {"name": "queue_category", "value": "Test Queue Category"},
+            {"name": "queue_name", "value": "Test Queue Name"},
+        ]
+    }
+    response = requests.post(BASE_URL+"/agent/queue", json=action)
+    assert response.status_code == 200
+    data = response.json()
+    assert "code" in data
+    assert data["code"] == 200
+    assert data['records'][0]['id'] == _job_id
+    
+def test_queue_delete_job():
+    global _job_id
+    job_id = _job_id
+    action = {
+        "id": "1",
+        "action": "delete_job",
+        "params": [
+            {"name": "job_id", "value": job_id},
+            {"name": "queue_category", "value": "Test Queue Category"},
+            {"name": "queue_name", "value": "Test Queue Name"},
+        ] 
+    }
+
+    response = requests.post(BASE_URL+"/agent/queue", json=action)
+    assert response.status_code == 200
+    data = response.json()
+    assert "code" in data
+    assert data["code"] == 200
+    
+def delete_job():  
+    global _job_id  
+    job_id = _job_id
+    action = {
+        "id": "1",
+        "action": "delete_job",
+        "params": [
+            {"name": "job_id", "value": f"{job_id}"}
+            ]
+    }
+    response = requests.post(BASE_URL+"/agent/job", json=action)
+    assert response.status_code == 200
+    data = response.json()
+    assert "code" in data
+    assert data["code"] == 200
+    

+ 42 - 0
agent/testnx.py

@@ -0,0 +1,42 @@
+import networkx as nx
+import pandas as pd
+import json
+entity_data = pd.DataFrame({"id":[1,1,2,3],"name":['a','aa','b','c'], "type":['x','x','y','z']})
+entity_data.set_index("id", inplace=True)
+#创建测试的节点数据
+jsondata = '''
+[
+    [
+        1291243,
+        {
+            "name": "好急的胃肠炎"
+        }
+    ]
+]'''
+print(json.loads(jsondata))
+relation_data = pd.DataFrame({"src":[1,2], "dest":[2,3], "type":['normal','normal'], "weight":[1,1]})
+
+results = entity_data[entity_data.index==5]
+print(results.empty)
+results = results.iloc[0]
+print(results['name'])
+results['name'] ='A'
+print(results['name'])
+results = entity_data[entity_data.index==1]
+for index,data in results.iterrows():
+    print(index,data['name'],data['type'])
+print("----")
+entity_data.loc[1, 'type'] = 'unknown'
+entity_data.loc[(entity_data['name']=='b') & (entity_data.index == 2), 'type'] = 'unknown other'
+for index,data in entity_data.iterrows():
+    print(index,data['name'],data['type'])
+print("----")
+print(results)
+# G = nx.from_pandas_edgelist(relation_data, "src", "dest", edge_attr=True, create_using=nx.DiGraph())
+# nx.set_node_attributes(G, entity_data.set_index("id").to_dict("index"))
+
+# print(G.nodes(data=True))
+# print(G.edges(data=True))
+# print(list(G.neighbors(1)))
+
+# print(G.edges(2.0))

+ 94 - 0
command/build_es_index.py

@@ -0,0 +1,94 @@
+from typing import List, Dict
+from libs.import_chunc import ImportChunc
+from libs.chunc_helper import ChuncHelper
+import json
+import sys
+import os
+from dotenv import load_dotenv
+from utils.es import ElasticsearchOperations
+from utils.factors import FactorsHelper
+load_dotenv()
+
+# DeepSeek API配置
+TRUNC_OUTPUT_PATH = os.getenv("TRUNC_OUTPUT_PATH")
+DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH")
+DOC_PATH = os.getenv("DOC_PATH")
+JIEBA_USER_DICT = os.getenv("JIEBA_USER_DICT")
+WORD_INDEX = os.getenv("WORD_INDEX")
+TITLE_INDEX = os.getenv("TITLE_INDEX")
+CHUNC_INDEX = os.getenv("CHUNC_INDEX")
+
+
+ 
+def build_test():
+    helper = ImportChunc(data_dir=TRUNC_OUTPUT_PATH)
+    helper.import_chunc_reverse_index()
+def build_index():
+    helper = ImportChunc(data_dir=TRUNC_OUTPUT_PATH)
+    helper.import_word_reverse_index()
+    helper.import_title_reverse_index()
+    #helper.import_chunc_reverse_index()
+def build_chunc():
+    helper = ImportChunc(data_dir=TRUNC_OUTPUT_PATH)
+    helper.import_chunc_reverse_index()
+def delete_index():
+    helper = ElasticsearchOperations()
+    print(">>> delete index")
+    helper.delete_index(WORD_INDEX)
+    helper.delete_index(TITLE_INDEX)
+    helper.delete_index(CHUNC_INDEX)
+    helper.delete_index("text_chunks")
+
+from functions.basic_function import search_document
+def test_index():
+    helper = ElasticsearchOperations()
+    try:
+        question = "银行销售保险产品的规定"
+        result = search_document(question)
+        print(result)
+        # articles = FactorsHelper()
+        # chunc_helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
+        # print(">>> question: test word index")
+        # words = chunc_helper.cut_word(question)
+        # data = helper.search_word_index(WORD_INDEX, [question]) #words)
+        
+        # for item in data:
+        #     print(f"{item['word']} {item['score']}")
+        #     for art in item["articles"]:
+        #         articles.add_factors(art, item['score'])
+            
+        # print(">>> test title index")
+        # data = helper.search_title_index(TITLE_INDEX, question)
+        # for item in data:
+        #     print(f"{item['title']} {item['score']}")            
+        #     articles.add_factors(item['title'], item['score'])
+
+        # print(">>> test chunc index")
+        # data = helper.search_title_index(CHUNC_INDEX, question)
+        # for item in data:
+        #     print(f"{item['title']} {item['score']}") 
+        #     articles.add_factors(item['title'], item['score'])
+
+        # print(">>> test factors calc")
+        # sorted_articals = articles.sort_factors()
+        # for key in sorted_articals:
+        #     print(key)
+        #data = helper.get_document(TITLE_INDEX, helper.get_doc_id("保险代理人监管规定"))        
+        #print(data)
+    except Exception as e:
+        raise e
+# 使用示例
+if __name__ == "__main__":
+    param_count = len(sys.argv)
+    if param_count == 2:
+        action =  sys.argv[1]
+        if action== "test":
+            test_index()
+        if action == "build":
+            build_index()
+        if action == "delete":
+            delete_index()
+        if action == "chunc":
+            build_chunc()
+    #build_index()
+

+ 131 - 0
command/build_graph_index.py

@@ -0,0 +1,131 @@
+import sys,os
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+import json
+from libs.embed_helper import EmbedHelper
+
+def embed_test():
+    embed_helper = EmbedHelper()
+    result = embed_helper.embed_text("你好")
+    print(f"result length: {len(result)}")
+    print(result)
+def search_test():
+    from utils.es import ElasticsearchOperations
+    es = ElasticsearchOperations()
+    result = es.search("graph_entity_index", "上呼吸道感染", 10)
+    for item in result:
+        print(item)
+def load_entities():
+    print("load entity data")
+    with open(f"{current_path}\\web\\cached_data\\entities_med.json", "r", encoding="utf-8") as f:
+        entities = json.load(f)
+        return entities
+    
+def load_relationships():
+    print("load relationship data")
+    with open(f"{current_path}\\web\\cached_data\\relationship_med.json", "r", encoding="utf-8") as f:
+        relationships = json.load(f)
+        return relationships
+
+def write_data_file(file_name, data):
+    if len(data) == 0:
+        return
+    print("write data file", file_name)
+    with open(file_name, "w", encoding="utf-8") as f:
+        f.write(json.dumps(data, ensure_ascii=False,indent=4))
+def import_index():
+    from utils.es import ElasticsearchOperations
+    es = ElasticsearchOperations()
+    es.delete_index("graph_entity_index")
+    for i in range(999):
+        if os.path.exists(f"{current_path}\\web\\cached_data\\embed\\word_index_{i}.json"):
+            print("load embed data", f"{current_path}\\web\\cached_data\\embed\\word_index_{i}.json")
+            with open(f"{current_path}\\web\\cached_data\\embed\\word_index_{i}.json", "r", encoding="utf-8") as f:
+                records = json.load(f)
+                for item in records:
+                    node_id = item[0]
+                    embed = item[1]
+                    doc = { "title": node_id, 
+                        "text": node_id,
+                        "embedding": embed}     
+                    es.add_document("graph_entity_index", es.get_doc_id(node_id), doc)
+                    print("index added of ", node_id, "embed length: ", len(embed))
+                    #attr_embed_list = item[2]
+def import_community_report_index():
+    from utils.es import ElasticsearchOperations
+    embed_helper = EmbedHelper()
+    es = ElasticsearchOperations()
+    es.delete_index("graph_community_report_index")
+    for filename in os.listdir(f"{current_path}\\web\\cached_data\\report"):
+        if filename.endswith(".md"):
+            file_path = os.path.join(f"{current_path}\\web\\cached_data\\report", filename)
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+                jsonstr = []
+                found_json = False
+                for line in content.splitlines():
+                    if line.startswith("```json"):
+                        jsonstr = []
+                        found_json = True
+                        continue        
+                    if line.startswith("```"):
+                        found_json = False
+                        continue
+                    if found_json:
+                        jsonstr.append(line)            
+                doc = { "title": "", 
+                    "text": content,
+                    "embedding": []}    
+                jsondata = json.loads("\n".join(jsonstr))
+                title_list = []
+                for item in jsondata:        
+                    title_list.append(item["name"])
+                doc["title"] = " ".join(title_list)
+                doc["embedding"] = embed_helper.embed_text(doc["title"])
+                es.add_document("graph_community_report_index", es.get_doc_id(doc["title"]), doc)
+                print("index added of ", doc["title"], "embed length: ", len(doc["embedding"]))
+def build_index():
+    print("build index")
+    embed_helper = EmbedHelper()
+    entities = load_entities()
+    count = 0
+    records = []
+    index = 0
+    for item in entities:
+        node_id = item[0]
+        print("process node: ",count, node_id)
+        attrs = item[1]
+        embed = embed_helper.embed_text(node_id)
+        attr_embed_list = []
+        for attr in attrs:
+            if len(attrs[attr])>3 and attr not in ["type", "description"]:
+                attr_embed = embed_helper.embed_text(attrs[attr])
+                attr_embed_list.append([attrs[attr],attr_embed])
+            else:
+                print("skip", attr)
+        records.append([node_id, embed, attr_embed_list])
+        count += 1
+        if count % 100 == 0:
+            write_data_file(f"{current_path}\\web\\cached_data\\embed\\word_index_{index}.json", records)
+            index = index + 1
+            records = []
+    write_data_file(f"{current_path}\\web\\cached_data\\embed\\word_index_{index}.json", records)
+# 使用示例
+if __name__ == "__main__":
+    param_count = len(sys.argv)
+    if param_count == 2:
+        action =  sys.argv[1]
+        if action== "test":
+            embed_test()
+            search_test()
+        if action == "build":
+            build_index()
+        if action == "import":
+            import_index()
+        if action == "import_com":
+            import_community_report_index()
+        if action == "chunc":
+            pass
+    #build_index()
+

+ 199 - 0
command/community_report.py

@@ -0,0 +1,199 @@
+'''
+这个脚本是用来从dump的图谱数据生成社区算法的
+'''
+import sys,os
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+import networkx as nx
+import leidenalg
+import igraph as ig
+#import matplotlib.pyplot as plt
+import json
+from datetime import datetime
+from collections import Counter
+
+#社区报告的分辨率,数字越大,社区数量越少,数字越小,社区数量越多
+#RESOLUTION = 0.07
+#社区报告中是否包括节点的属性列表
+REPORT_INCLUDE_DETAILS = False
+# #图谱数据的缓存路径,数据从dump_graph_data.py生成
+# CACHED_DATA_PATH = f"{current_path}\\web\\cached_data"
+# #最终社区报告的输出路径
+REPORT_PATH = f"{current_path}\\web\\cached_data\\report"
+DENSITY = 0.52
+# def load_entity_data():
+#     print("load entity data")
+#     with open(f"{CACHED_DATA_PATH}\\entities_med.json", "r", encoding="utf-8") as f:
+#         entities = json.load(f)
+#         return entities
+
+# def load_relation_data(g):
+#     for i in range(30):
+#         if os.path.exists(f"{CACHED_DATA_PATH}\\relationship_med_{i}.json"):            
+#             print("load entity data", f"{CACHED_DATA_PATH}\\relationship_med_{i}.json")
+#             with open(f"{CACHED_DATA_PATH}\\relationship_med_{i}.json", "r", encoding="utf-8") as f:
+#                 relations = json.load(f)
+#                 for item in relations:                    
+#                     g.add_edge(item[0], item[1], weight=1, **item[2])
+        
+            
+        
+
+# def generate_enterprise_network():
+
+#     G = nx.Graph()
+#     ent_data = load_entity_data()
+#     print("load entities completed")
+#     for data in ent_data:          
+#         G.add_node(data[0], **data[1])
+#     print("load entities into graph completed")
+#     rel_data = load_relation_data(G)    
+#     print("load relation completed")
+
+#     return G
+
+# def detect_communities(G):
+#     """使用Leiden算法进行社区检测"""
+#     # 转换networkx图到igraph格式
+    
+#     print("convert to igraph")
+#     ig_graph = ig.Graph.from_networkx(G)
+    
+#     # 执行Leiden算法
+#     partition = leidenalg.find_partition(
+#         ig_graph, 
+#         leidenalg.CPMVertexPartition,
+#         resolution_parameter=RESOLUTION,
+#         n_iterations=2
+#     )
+    
+#     # 将社区标签添加到原始图
+#     for i, node in enumerate(G.nodes()):
+#         G.nodes[node]['community'] = partition.membership[i]
+    
+#     print("convert to igraph finished")
+#     return G, partition
+
+def generate_report(G, partition):
+    """生成结构化分析报告"""
+    report = []
+    # 报告头信息
+    report.append(f"# 疾病图谱关系社区分析报告\n")
+    report.append(f"**生成时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    report.append(f"**检测算法**: Leiden Algorithm\n")
+    report.append(f"**算法参数**:\n")
+    report.append(f"- 分辨率参数: {partition.resolution_parameter:.3f}\n")
+    # report.append(f"- 迭代次数: {partition.n_iterations}\n")
+    report.append(f"**社区数量**: {len(set(partition.membership))}\n")
+    report.append(f"**模块度(Q)**: {partition.quality():.4f}\n")
+    print("generate_report header finished")
+
+    report.append("\n## 社区结构分析\n")
+    print("generate_report community structure started")
+    communities = {}
+    for node in G.nodes(data=True):
+        comm = node[1]['community']
+        if comm not in communities:
+            communities[comm] = []
+        if 'type' not in node[1]:
+            node[1]['type'] = '未知'
+        if 'description' not in node[1]:
+            node[1]['description'] = '未见描述'
+        
+        communities[comm].append({
+            'name': node[0],
+            **node[1]
+        })
+    
+
+    print("generate_report community structure finished")
+    for comm_id, members in communities.items():
+        print("community ", comm_id, "size: ", len(members))
+        com_report = []
+        com_report.append(f"### 第{comm_id+1}号社区报告 ")
+        #com_report.append(f"**社区规模**: {len(members)} 个节点\n")
+        
+        # 行业类型分布
+        type_dist = Counter([m['type'] for m in members])
+        com_report.append(f"**类型分布**:")
+        for industry, count in type_dist.most_common():
+            com_report.append(f"- {industry}: {count} 个 ({count/len(members):.0%})")
+            
+
+        com_report.append("\n```json")        
+        obj_list = []
+        names = []
+        for member in members:
+            obj = {}
+            obj["name"] = member['name']
+            obj["type"] = member['type']
+            obj_list.append(obj)
+            #com_report.append(f"'name':'{member['name']}','type':'{member['type']}'") 
+            names.append(member['name'])
+            if REPORT_INCLUDE_DETAILS == False:
+                continue
+            for k in member.keys():
+                if k not in ['name', 'type', 'description', 'community']:   
+                    value = member[k]        
+                    com_report.append(f"\t- {value}")            
+        com_report.append(json.dumps(obj_list, ensure_ascii=False, indent=4))   
+             
+        
+        com_report.append("```")
+        com_report.append("\n**成员节点关系**:\n")
+        for member in members:
+            entities, relations = graph_helper.neighbor_search(member['name'], 1)
+            com_report.append(f"- {member['name']} ({member['type']})") 
+            com_report.append(f"\t- 相关节点") 
+            for entity in entities:
+                if entity['name'] in names:
+                    com_report.append(f"\t\t- {entity['name']} ({entity['type']})")
+            com_report.append(f"\t- 相关关系")
+            for relation in relations:
+                if relation['src_name'] in names or relation['dest_name'] in names:
+                    com_report.append(f"\t\t- {relation['src_name']}-({relation['type']})->{relation['dest_name']}")
+            
+                    
+        # 计算社区内部连接密度
+        subgraph = G.subgraph([m['name'] for m in members])
+        density = nx.density(subgraph)
+        com_report.append(f"\n**内部连接密度**: {density:.2f}\n")
+        if density < DENSITY:
+            com_report.append("**社区内部连接相对稀疏**\n")
+        else:
+            with open(f"{REPORT_PATH}\community_{comm_id}.md", "w", encoding="utf-8") as f:
+                f.write("\n".join(com_report))
+        print(f"社区 {comm_id+1} 报告文件大小:{len(''.join(com_report).encode('utf-8'))} 字节")  # 添加文件生成验证
+    
+    # 可视化图表
+    report.append("\n## 可视化分析\n")
+    
+    return "\n".join(report)
+
+
+if __name__ == "__main__":
+    try:
+        from libs.graph_helper import GraphHelper
+        graph_helper = GraphHelper()
+        G = graph_helper.graph
+        print("graph loaded")
+        # 生成企业关系网络
+        
+        
+        # 执行社区检测
+        G, partition = graph_helper.detect_communities()
+        
+        # 生成分析报告
+        report = generate_report(G, partition)
+        with open('community_report.md', 'w', encoding='utf-8') as f:
+            f.write(report)
+            print(f"报告文件大小:{len(report.encode('utf-8'))} 字节")  # 添加文件生成验证
+                        
+            print("社区分析报告已生成:community_report.md")
+            
+       
+    except Exception as e:
+        
+        print(f"运行时错误:{str(e)}")
+        raise e

+ 3 - 0
command/download_bge_model.py

@@ -0,0 +1,3 @@
+#模型下载
+from modelscope import snapshot_download
+model_dir = snapshot_download('BAAI/bge-m3')

+ 101 - 0
command/dump_graph_data.py

@@ -0,0 +1,101 @@
+'''
+这个脚本是用来从postgre数据库中导出图谱数据到json文件的。
+'''
+import sys,os
+current_path = os.getcwd()
+sys.path.append(current_path)
+current_path ="D:\\work\\03\\cached_data\\new"
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+import json
+#这个是数据库的连接
+from web.db.database import SessionLocal
+#两个会话,分别是读取节点和属性的
+db = SessionLocal()
+prop = SessionLocal()
+#图谱id
+GRAPH_ID = 2
+
+def get_props(ref_id):
+    props = {}
+    sql = """select prop_name, prop_value,prop_title from kg_props where ref_id=:ref_id"""
+    result = prop.execute(text(sql), {'ref_id':ref_id})
+    for record in result:
+        prop_name, prop_value,prop_title = record
+        props[prop_name] = prop_title + ":" +prop_value
+    return props
+
+def get_entities():
+    COUNT_SQL = f"select count(*) from kg_nodes where graph_id={GRAPH_ID}"
+    result = db.execute(text(COUNT_SQL))
+    count = result.scalar()
+
+    print("total nodes: ", count)
+    entities = []
+    batch = 100
+    start = 1
+    while start < count:    
+        sql = f"""select id,name,category from kg_nodes where graph_id={GRAPH_ID} order by id limit :batch OFFSET :start"""
+        result = db.execute(text(sql), {'start':start, 'batch':batch})
+        #["发热",{"type":"症状","description":"发热是诊断的主要目的,用于明确发热病因。"}]
+        row_count = 0
+        for row in result:
+            id,name,category = row
+            props = get_props(id)
+            
+            entities.append([id,{"name":name, 'type':category,'description':'', **props}])
+            row_count += 1
+        if row_count == 0:
+            break
+        start = start + row_count
+        print("start: ", start, "row_count: ", row_count)
+
+    with open(current_path+"\\entities_med.json", "w", encoding="utf-8") as f:
+        f.write(json.dumps(entities, ensure_ascii=False,indent=4))
+
+def get_names(src_id, dest_id):
+    sql = """select id,name,category from kg_nodes where id = :src_id"""
+    result = db.execute(text(sql), {'src_id':src_id}).first()
+    id,src_name,src_category = result
+    result = db.execute(text(sql), {'src_id':dest_id}).first()
+    id,dest_name,dest_category  = result
+    return (src_id, src_name, src_category, dest_id, dest_name, dest_category)
+
+def get_relationships():
+    COUNT_SQL = f"select count(*) from kg_edges where graph_id={GRAPH_ID}"
+    result = db.execute(text(COUNT_SQL))
+    count = result.scalar()
+
+    print("total edges: ", count)
+    edges = []
+    batch = 1000
+    start = 1
+    file_index = 1
+    while start < count:    
+        sql = f"""select id,name,category,src_id,dest_id from kg_edges where graph_id={GRAPH_ID} order by id limit :batch OFFSET :start"""
+        result = db.execute(text(sql), {'start':start, 'batch':batch})
+        #["发热",{"type":"症状","description":"发热是诊断的主要目的,用于明确发热病因。"}]
+        row_count = 0
+        for row in result:
+            id,name,category,src_id,dest_id = row
+            props = get_props(id)
+            src_id, src_name, src_category, dest_id, dest_name, dest_category = get_names(src_id, dest_id)
+            edges.append([src_id, {"id":src_id, "name":src_name, "type":src_category}, dest_id,{"id":dest_id,"name":dest_name,"type":dest_category}, {'type':category,'name':name, **props}])
+            row_count += 1
+        if row_count == 0:
+            break
+        start = start + row_count
+        print("start: ", start, "row_count: ", row_count)
+        if len(edges) > 10000:
+            with open(current_path+f"\\relationship_med_{file_index}.json", "w", encoding="utf-8") as f:
+                f.write(json.dumps(edges, ensure_ascii=False,indent=4))
+            edges = []
+            file_index += 1
+
+    with open(current_path+"\\relationship_med_0.json", "w", encoding="utf-8") as f:
+        f.write(json.dumps(edges, ensure_ascii=False,indent=4))
+
+#导出节点数据
+get_entities()
+#导出关系数据
+get_relationships()

+ 27 - 0
command/elasticsearch_add_doc.py

@@ -0,0 +1,27 @@
+from elasticsearch import Elasticsearch
+from typing import List, Dict
+import json
+from utils.es import ElasticsearchOperations
+
+def add_law_document(ops):
+    json_data = None
+    with open(r"D:\work\03\regulations.json","r",encoding="utf-8") as f:
+        lines = f.readlines()
+        json_data = json.loads(''.join(lines))
+        
+    print(">>> finished process document  ")
+    
+    if json_data:
+        
+        index = 1
+        total = len(json_data)
+        for item in json_data:
+            es_ops.index_document(item["article_text"],item["meta_data"]['ArticleTitle'])
+            print(item["meta_data"]['ArticleTitle'],f"processed {index}/{total}")
+            index = index + 1
+# 使用示例
+if __name__ == "__main__":
+    es_ops = ElasticsearchOperations()
+
+    add_law_document(es_ops)
+    

+ 29 - 0
command/elasticsearch_clean.py

@@ -0,0 +1,29 @@
+from libs.text_processor import TextProcessor
+from elasticsearch import Elasticsearch
+from typing import List, Dict
+import json
+from utils.es import ElasticsearchOperations
+
+def clean_law_document(ops):
+    json_data = None
+    with open(r"D:\work\03\regulations.json","r",encoding="utf-8") as f:
+        lines = f.readlines()
+        json_data = json.loads(''.join(lines))
+        
+    print(">>> finished process document  ")
+    
+    if json_data:
+        processor = TextProcessor()
+        index = 1
+        total = len(json_data)
+        for item in json_data:
+            es_ops.del_document(item["article_text"],item["meta_data"]['ArticleTitle'])
+            print(item["meta_data"]['ArticleTitle'],f"processed {index}/{total}")
+            index = index + 1
+# 使用示例
+if __name__ == "__main__":
+    es_ops = ElasticsearchOperations()
+    es_ops.es.indices.delete(index="text_chunks", ignore=[400, 404])
+    print(">>> finished delete index")
+    #clean_law_document(es_ops)
+    

+ 121 - 0
command/entity_extract.py

@@ -0,0 +1,121 @@
+#通过分析文章,生成分析结果
+import asyncio
+import os
+import time
+import json
+from typing import List, Dict, AsyncGenerator
+import httpx
+from dotenv import load_dotenv
+from typing import List, Dict, AsyncGenerator
+# 加载环境变量
+load_dotenv()
+
+# DeepSeek API配置
+DEEPSEEK_API_URL = os.getenv("DEEPSEEK_API_URL")
+DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
+#API_URL = "https://api.siliconflow.cn/v1/chat/completions"
+API_URL = "http://localhost/v1/chat-messages"
+API_KEY = "app-rXG0orb4Ap1slxQvAZKkAdGk"
+#API_URL = "http://localhost/v1/completion-messages"
+# #API_KEY = "app-U23k5t9iNdwbulCZBCgvJusS"
+def load_prompt(filename):
+    with open(filename, "r", encoding="utf-8") as f:
+        return "".join(f.readlines())
+    
+async def chat_with_dify(prompt: str) -> AsyncGenerator[List, None]:
+    print(">>> start chat_with_dify  ")
+
+    """与DeepSeek模型进行流式对话"""
+    headers = {
+        "Authorization": f"Bearer {API_KEY}",
+        "Content-Type": "application/json; charset=utf-8"
+        
+    }
+
+    data = {
+        #"inputs": {"query": prompt},
+        "inputs": {},
+        "query": prompt,
+        "response_mode": "streaming",
+        "user":"face2shadow@163.com"
+    }
+
+    try:
+        async with httpx.AsyncClient() as client:
+            async with client.stream("POST", API_URL, json=data, headers=headers) as response:                
+                response.raise_for_status()
+                async for chunk in response.aiter_lines():
+                    if chunk:
+                        if chunk.startswith("data: "):
+                            json_data = chunk[6:]
+                            if json_data != "[DONE]":
+                                try:
+                                    chunk_data = json.loads(json_data)
+                                    if "answer" in chunk_data and chunk_data["answer"]:
+                                        delta = chunk_data["answer"] #chunk_data["choices"][0].get("delta", {})
+                                        yield delta
+                                        #if "content" in delta:
+                                        #    yield delta["content"]
+                                except json.JSONDecodeError:
+                                    continue
+    except httpx.RequestError as e:
+        print(f"Error: ",e)
+    del data
+    del headers
+
+from test.deepseek_chat import chat_with_deepseek
+async def chat(prompt: str) -> str:
+    message = [{'role':'user', 'content': prompt}]
+    call_deepseek = chat_with_deepseek(message)
+    #call_deepseek = chat_with_dify(prompt)
+    output = ""
+    async for chunk in call_deepseek:
+        output = output + chunk
+        print(chunk, end="")
+    print("\n")
+    return output
+ 
+if __name__ == "__main__":
+    prompt_template = load_prompt("kb/label.txt")
+    json_data = None
+    with open(r"D:\work\03\regulations.json","r",encoding="utf-8") as f:
+        lines = f.readlines()
+        json_data = json.loads(''.join(lines))
+    print(">>> finished process document  ")
+    
+    if json_data:
+        index = 1
+        total = len(json_data)
+        for item in json_data:
+            title = item["meta_data"]['ArticleTitle']
+            text = item["article_text"]
+            
+            title = title.replace("/","_")
+            title = title.replace("\\","_")
+            if os.path.exists(f"d:/work/03/output/doc_label/{title}.txt"):
+                print(f"skip {title}")
+                continue
+            text = prompt_template.format(text=text)
+            count = 0
+            while count < 3:
+                try:
+                    coro = chat(text)
+                    output = asyncio.run(coro)   
+                    title = title.replace("/","_")
+                    title = title.replace("\\","_")
+                    if os.path.exists(f"d:/work/03/output/doc_label/{title}.txt"):
+                        print("abstract file already exists, skip")
+                    else:
+                        with open(f"d:/work/03/output/doc_label/{title}.txt", "w", encoding="utf-8") as f:
+                            f.write(output)             
+                            
+                    print(">>> process", title, "ok")
+                    count = 3
+                except Exception as e:
+                    print(e)
+                    print(">>> process", title, "failed, retry", count)
+                    count = count + 1
+            time.sleep(3)
+            
+            index = index + 1
+            

+ 83 - 0
command/extract_disease_doc.py

@@ -0,0 +1,83 @@
+import sys,os
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+import json
+from libs.embed_helper import EmbedHelper
+
+def embed_test():
+    embed_helper = EmbedHelper()
+    result = embed_helper.embed_text("你好")
+    print(f"result length: {len(result)}")
+    print(result)
+def search_test():
+    from utils.es import ElasticsearchOperations
+    es = ElasticsearchOperations()
+    result = es.search("graph_entity_index", "上呼吸道感染", 10)
+    for item in result:
+        print(item)
+def load_entities():
+    print("load entity data")
+    with open(f"{current_path}\\web\\cached_data\\entities_med.json", "r", encoding="utf-8") as f:
+        entities = json.load(f)
+        return entities
+    
+def load_relationships():
+    print("load relationship data")
+    with open(f"{current_path}\\web\\cached_data\\relationship_med.json", "r", encoding="utf-8") as f:
+        relationships = json.load(f)
+        return relationships
+
+def write_data_file(file_name, data):
+    if len(data) == 0:
+        return
+    print("write data file", file_name)
+    with open(file_name, "w", encoding="utf-8") as f:
+        f.write(json.dumps(data, ensure_ascii=False,indent=4))
+
+def build_index():
+    print("build index")
+    embed_helper = EmbedHelper()
+    entities = load_entities()
+    count = 0
+    index = 0
+    for item in entities:
+        node_id = item[0]
+        print("process node: ",count, node_id)
+        texts = []
+        attrs = item[1]
+        attr_embed_list = []
+        if attrs["type"] == "Disease":
+            for attr in attrs:
+                if len(attrs[attr])>3 and attr not in ["type", "description"]:
+                    texts.append(attrs[attr])
+                    attr_embed = embed_helper.embed_text(node_id+"-"+attr+"-"+attrs[attr])
+                    attr_embed_list.append(
+                        {   
+                            "title": node_id+"-"+attr, 
+                            "text": attrs[attr],
+                            "embedding": attr_embed} 
+                        )
+                else:
+                    print("skip", attr)
+            doc = { "title": node_id, 
+                "text": "\n".join(texts),
+                "embedding": attr_embed_list}      # 初始化doc对象,确保它在循环外部定义
+            count += 1
+            if count % 1 == 0:
+                write_data_file(f"{current_path}\\web\\cached_data\\diseases\\{index}.json", doc)
+                index = index + 1
+            
+    #write_data_file(f"{current_path}\\web\\cached_data\\diseases\\{index}.json", records)
+# 使用示例
+if __name__ == "__main__":
+    param_count = len(sys.argv)
+    if param_count == 2:
+        action =  sys.argv[1]
+        if action== "test":
+            embed_test()
+            search_test()
+        if action == "build":
+            build_index()
+    #build_index()
+

+ 120 - 0
command/extract_doc_from_json.py

@@ -0,0 +1,120 @@
+from typing import List, Dict
+from libs.chunc_helper import ChuncHelper
+import json
+import os
+from dotenv import load_dotenv
+import sys
+load_dotenv()
+
+# DeepSeek API配置
+TRUNC_OUTPUT_PATH = os.getenv("TRUNC_OUTPUT_PATH")
+DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH")
+DOC_PATH = os.getenv("DOC_PATH")
+JIEBA_USER_DICT = os.getenv("JIEBA_USER_DICT")
+
+def title_reverse_index():
+    chunc_helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
+    chunc_helper.cut_title_vector()
+    chunc_helper.title_reverse_index()
+def embed_doc():
+    chunc_helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
+    chunc_helper.process_data()
+def extract_law_document():
+    json_data = None
+    with open(DOC_PATH,"r",encoding="utf-8") as f:
+        lines = f.readlines()
+        json_data = json.loads(''.join(lines))
+    print(">>> finished process document  ")
+    
+    if json_data:
+        index = 1
+        for item in json_data:
+            author = item["meta_data"]["author"]
+            description = item["meta_data"]["description"]
+            keywords = item["meta_data"]["keywords"]
+            SiteName = item["meta_data"]["SiteName"]
+            SiteDomain = item["meta_data"]["SiteDomain"]
+            SiteIDCode = item["meta_data"]["SiteIDCode"]
+            ColumnName = item["meta_data"]["ColumnName"]
+            ColumnType = item["meta_data"]["ColumnType"]
+            ArticleTitle = item["meta_data"]["ArticleTitle"]
+            PubDate = item["meta_data"]["PubDate"]
+            ContentSource = item["meta_data"]["ContentSource"]
+            article_text = item["article_text"]
+            filename = ArticleTitle.replace("\\", "-")
+            filename = filename.replace("/", "-")
+            with open(f"{DOC_STORAGE_PATH}/{filename}.txt", "w", encoding="utf-8") as f:
+                f.write("```meta\n")
+                f.write(f"标题: {ArticleTitle}\n")
+                f.write(f"作者: {author}\n")
+                f.write(f"描述: {description}\n")
+                f.write(f"关键字: {keywords}\n")
+                f.write(f"类型: {ColumnType}\n")
+                f.write(f"发布日期: {PubDate}\n")
+                f.write("```\n")
+                f.write("\n")
+                f.write("```doc\n")
+                f.write(article_text)
+                f.write("```\n")
+            print(item["meta_data"]['ArticleTitle'],f"processed {index}")
+            index = index + 1
+            
+
+def extract_law_document_single():
+    json_data = None
+    with open(DOC_PATH,"r",encoding="utf-8") as f:
+        lines = f.readlines()
+        json_data = json.loads(''.join(lines))
+    print(">>> finished process document  ")
+    
+    if json_data:
+        index = 1
+        with open(f"{DOC_STORAGE_PATH}/single.txt", "w", encoding="utf-8") as f:
+            for item in json_data:
+                url = item["url"]
+                author = item["meta_data"]["author"]
+                description = item["meta_data"]["description"]
+                keywords = item["meta_data"]["keywords"]
+                SiteName = item["meta_data"]["SiteName"]
+                SiteDomain = item["meta_data"]["SiteDomain"]
+                SiteIDCode = item["meta_data"]["SiteIDCode"]
+                ColumnName = item["meta_data"]["ColumnName"]
+                ColumnType = item["meta_data"]["ColumnType"]
+                ArticleTitle = item["meta_data"]["ArticleTitle"]
+                PubDate = item["meta_data"]["PubDate"]
+                ContentSource = item["meta_data"]["ContentSource"]
+                article_text = item["article_text"]
+                filename = ArticleTitle.replace("\\", "-")
+                filename = filename.replace("/", "-")
+                f.write("```doc\n")
+                f.write(f"标题: {ArticleTitle}\n")
+                f.write(f"作者: {author}\n")
+                f.write(f"描述: {description}\n")
+                f.write(f"关键字: {keywords}\n")
+                f.write(f"类型: {ColumnType}\n")
+                f.write(f"发布日期: {PubDate}\n")
+                f.write(f"原文链接: {url}\n")
+                f.write("\n")
+                f.write(article_text)
+                f.write("```\n")
+                print(item["meta_data"]['ArticleTitle'],f"processed {index}")
+                index = index + 1
+# 使用示例
+if __name__ == "__main__":
+    count_of_param = len(sys.argv)
+    if count_of_param == 2:
+        action =  sys.argv[1]
+        if action == "extract_single": 
+            #从json数据文件中抽取文章内容,写入数据目录
+            extract_law_document_single()
+        if action == "extract": 
+            #从json数据文件中抽取文章内容,写入数据目录
+            extract_law_document()
+        if action == "embed": 
+            #从json文件中读取文章,生成关键词向量,标题向量,chuncs和向量
+            embed_doc()
+        if action == "title": 
+            #从json文件中读取文章标题,切词,生成切词到文章标题的倒排索引,以及切词对应的向量
+            title_reverse_index()
+    #embed_doc()
+

+ 47 - 0
command/generate_doc_abstract.py

@@ -0,0 +1,47 @@
+#通过分析文章,生成分析结果
+import asyncio
+import os
+import time
+from test.deepseek_chat import chat_with_deepseek
+
+def load_prompt(filename):
+    with open(filename, "r", encoding="utf-8") as f:
+        return "".join(f.readlines())
+    
+async def chat(prompt: str) -> str:
+    message = [{'role':'user', 'content': prompt}]
+    call_deepseek = chat_with_deepseek(message)
+    output = ""
+    async for chunk in call_deepseek:
+        output = output + chunk
+        print(chunk, end="")
+    print("\n")
+    return output
+        
+if __name__ == "__main__":
+    #
+    #
+    prompt_template = load_prompt("kb/prompt_4_abstract.txt")
+    path = "./docs"    
+    for root, dirs, files in os.walk(path):
+        for file in files:
+            file_path = os.path.join(root, file)
+            print(">>> process", file_path)
+            text = load_prompt(file_path)
+            prompt = prompt_template.format(text=text)
+            count = 0
+            while count < 3:
+                try:
+                    coro =  chat(prompt)
+                    output = asyncio.run(coro)   
+                    if os.path.exists(f"./doc_abstract/{file}"):
+                        print("abstract file already exists, skip")
+                    else:
+                        with open(f"./doc_abstract/{file}", "w", encoding="utf-8") as f:
+                            f.write(output)             
+                    count = 3
+                except Exception as e:
+                    print(e)
+                    print(">>> process", file_path, "failed, retry", count)
+                    count = count + 1
+            time.sleep(3)

+ 13 - 0
command/start_agent.py

@@ -0,0 +1,13 @@
+
+if __name__ == "__main__":
+    from config.site import SiteConfig
+    import os
+    config = SiteConfig()
+    
+    try:
+        config.check_config(["GRAPH_API_URL","DEEPSEEK_API_URL","DEEPSEEK_API_KEY"])    
+    
+        os.system("streamlit run agent/app.py")
+    except Exception as e:
+        print(e)
+

+ 15 - 0
command/start_web_server.py

@@ -0,0 +1,15 @@
+from web.server import app
+from config.site import SiteConfig
+import os
+config = SiteConfig()
+    
+
+if __name__ == "__main__":
+    try:
+        config.check_config(["CACHED_DATA_PATH"])    
+        import uvicorn
+        uvicorn.run("web.server:app", host="0.0.0.0", port=8000,reload=True)
+        
+    except Exception as e:
+        print(e)
+    

+ 34 - 0
command/words_freq_in_doc.py

@@ -0,0 +1,34 @@
+import jieba
+import os
+
+word_dict = []
+word_freq = {}
+word_freq_doc = {}
+with open("./dict/legal_terms.txt", "r", encoding="utf-8") as f: 
+    for line in f.readlines():
+        jieba.add_word(line.strip())
+        word_dict.append(line.strip())
+        word_freq[line.strip()] = 0
+path = "./docs"
+for root, dirs, files in os.walk(path):
+    for file in files:
+        file_path = os.path.join(root, file)
+        print(file_path)
+        with open(file_path, "r", encoding="utf-8") as f:
+            for line in f:
+                words = jieba.cut(line)
+                for w in words:
+                    if w in word_freq.keys():
+                        word_freq[w] += 1
+                    if w in word_freq_doc.keys():
+                        word_freq_doc[w] += 1
+                    else:
+                        word_freq_doc[w] = 1
+                        
+with open("word_feq.txt", "w", encoding="utf-8") as f:
+    for k in word_freq.keys():
+        f.write(f"{k} {word_freq[k]}\n")
+        
+with open("word_feq_doc.txt", "w", encoding="utf-8") as f:
+    for k in word_freq_doc.keys():
+        f.write(f"{k} {word_freq_doc[k]}\n")

+ 57 - 0
config/site.py

@@ -0,0 +1,57 @@
+import os
+from dotenv import load_dotenv
+from urllib.parse import quote
+
+load_dotenv()
+
+
+class SiteConfig:
+    def __init__(self):
+        self.load_config()
+    
+    def load_config(self):        
+        self.config = {
+            "SITE_NAME": os.getenv("SITE_NAME", "DEMO"),
+            "SITE_DESCRIPTION": os.getenv("SITE_DESCRIPTION", "ChatGPT"),
+            "SITE_URL": os.getenv("SITE_URL", ""),
+            "SITE_LOGO": os.getenv("SITE_LOGO", ""),
+            "SITE_FAVICON": os.getenv("SITE_FAVICON"),
+            'ELASTICSEARCH_HOST': os.getenv("ELASTICSEARCH_HOST"),
+            'ELASTICSEARCH_USER': os.getenv("ELASTICSEARCH_USER"),
+            'ELASTICSEARCH_PWD': os.getenv("ELASTICSEARCH_PWD"),
+            'WORD_INDEX': os.getenv("WORD_INDEX"),
+            'TITLE_INDEX': os.getenv("TITLE_INDEX"),
+            'CHUNC_INDEX': os.getenv("CHUNC_INDEX"),
+            'DEEPSEEK_API_URL': os.getenv("DEEPSEEK_API_URL"),
+            'DEEPSEEK_API_KEY': os.getenv("DEEPSEEK_API_KEY"),
+            'CACHED_DATA_PATH': os.getenv("CACHED_DATA_PATH"),
+            'UPDATE_DATA_PATH': os.getenv("UPDATE_DATA_PATH"),
+            'FACTOR_DATA_PATH': os.getenv("FACTOR_DATA_PATH"),
+            'GRAPH_API_URL': os.getenv("GRAPH_API_URL"),
+            'EMBEDDING_MODEL': os.getenv("EMBEDDING_MODEL"),
+            'DOC_PATH': os.getenv("DOC_PATH"),
+            'DOC_STORAGE_PATH': os.getenv("DOC_STORAGE_PATH"),
+            'TRUNC_OUTPUT_PATH': os.getenv("TRUNC_OUTPUT_PATH"),
+            'DOC_ABSTRACT_OUTPUT_PATH': os.getenv("DOC_ABSTRACT_OUTPUT_PATH"),
+            'JIEBA_USER_DICT': os.getenv("JIEBA_USER_DICT"),
+            'JIEBA_STOP_DICT': os.getenv("JIEBA_STOP_DICT"),
+            'POSTGRESQL_HOST':  os.getenv("POSTGRESQL_HOST","localhost"),
+            'POSTGRESQL_DATABASE':  os.getenv("POSTGRESQL_DATABASE","kg"),
+            'POSTGRESQL_USER':  os.getenv("POSTGRESQL_USER","dify"),
+            'POSTGRESQL_PASSWORD':  os.getenv("POSTGRESQL_PASSWORD",quote("difyai123456")),
+        }
+    def get_config(self, config_name, default=None): 
+        config_name = config_name.upper()     
+        value = os.getenv(config_name, None)  
+        if value:
+            return value
+        
+        if config_name in self.config:            
+            return self.config[config_name]
+        else:
+            return default
+    def check_config(self, config_list):
+        for item in config_list:
+            if not self.get_config(item):
+                raise ValueError(f"Configuration '{item}' is not set.")
+      

+ 209 - 0
environment.yml

@@ -0,0 +1,209 @@
+name: kgbuilder
+channels:
+  - defaults
+  - https://repo.anaconda.com/pkgs/main
+  - https://repo.anaconda.com/pkgs/r
+  - https://repo.anaconda.com/pkgs/msys2
+dependencies:
+  #- bzip2=1.0.8=h2bbff1b_6
+  #- ca-certificates=2025.2.25=haa95532_0
+  #- expat=2.6.4=h8ddb27b_0
+  #- libffi=3.4.4=hd77b12b_1
+  #- libmpdec=4.0.0=h827c3e9_0
+  #- openssl=3.0.16=h3f729d1_0
+  #- pip=25.0=py313haa95532_0
+  - python=3.13.2
+  #- python_abi=3.13=0_cp313
+  #- setuptools=75.8.0=py313haa95532_0
+  #- sqlite=3.45.3=h2bbff1b_0
+  #- tk=8.6.14=h0416ee5_0
+  #- vc=14.42=haa95532_4
+  #- vs2015_runtime=14.42.34433=he0abc0d_4
+  #- wheel=0.45.1=py313haa95532_0
+  #- xz=5.6.4=h4754444_1
+  #- zlib=1.2.13=h8cc25b3_1
+  - pip:
+      - acres==0.3.0
+      - aiofiles==24.1.0
+      - aiohappyeyeballs==2.5.0
+      - aiohttp==3.11.13
+      - aiosignal==1.3.2
+      - altair==5.5.0
+      - annotated-types==0.7.0
+      - anyio==4.8.0
+      - apscheduler==3.11.0
+      - attrs==25.1.0
+      - baidusearch==1.0.3
+      - beautifulsoup4==4.13.3
+      - blinker==1.9.0
+      - bs4==0.0.2
+      - cachetools==5.5.2
+      - certifi==2025.1.31
+      - cffi==1.17.1
+      - chardet==5.2.0
+      - charset-normalizer==3.4.1
+      - ci-info==0.3.0
+      - click==8.1.8
+      - colorama==0.4.6
+      - comtypes==1.4.10
+      - configobj==5.0.9
+      - configparser==7.2.0
+      - contourpy==1.3.1
+      - cycler==0.12.1
+      - dataclasses-json==0.6.7
+      - distro==1.9.0
+      - elastic-transport==8.17.1
+      - elasticsearch==8.17.2
+      - et-xmlfile==2.0.0
+      - etelemetry==0.3.1
+      - fastapi==0.115.12
+      - filelock==3.18.0
+      - fitz==0.0.1.dev2
+      - fonttools==4.56.0
+      - frontend==0.0.3
+      - frozenlist==1.5.0
+      - fsspec==2025.3.0
+      - gitdb==4.0.12
+      - gitpython==3.1.44
+      - greenlet==3.1.1
+      - h11==0.14.0
+      - httpcore==1.0.7
+      - httplib2==0.22.0
+      - httpx==0.28.1
+      - httpx-sse==0.4.0
+      - huggingface-hub==0.29.3
+      - idna==3.10
+      - igraph==0.11.8
+      - iniconfig==2.1.0
+      - isodate==0.6.1
+      - itsdangerous==2.2.0
+      - jinja2==3.1.6
+      - jiter==0.8.2
+      - joblib==1.4.2
+      - jsonpatch==1.33
+      - jsonpointer==3.0.0
+      - jsonschema==4.23.0
+      - jsonschema-specifications==2024.10.1
+      - kiwisolver==1.4.8
+      - langchain==0.3.21
+      - langchain-community==0.3.20
+      - langchain-core==0.3.48
+      - langchain-openai==0.3.7
+      - langchain-text-splitters==0.3.7
+      - langgraph==0.3.5
+      - langgraph-checkpoint==2.0.16
+      - langgraph-prebuilt==0.1.1
+      - langgraph-sdk==0.1.53
+      - langsmith==0.3.11
+      - leidenalg==0.10.2
+      - looseversion==1.3.0
+      - lxml==5.3.1
+      - markupsafe==3.0.2
+      - marshmallow==3.26.1
+      - matplotlib==3.10.1
+      - mouseinfo==0.1.3
+      - mpmath==1.3.0
+      - msgpack==1.1.0
+      - multidict==6.1.0
+      - mypy-extensions==1.0.0
+      - narwhals==1.32.0
+      - networkx==3.4.2
+      - nibabel==5.3.2
+      - nipype==1.10.0
+      - numpy==2.2.3
+      - openai==1.65.3
+      - opencv-python==4.11.0.86
+      - openpyxl==3.1.5
+      - orjson==3.10.15
+      - outcome==1.3.0.post0
+      - packaging==24.2
+      - pandas==2.2.3
+      - pathlib==1.0.1
+      - patsy==1.0.1
+      - pdfminer==20191125
+      - pillow==11.1.0
+      - pluggy==1.5.0
+      - plum-dispatch==1.7.4
+      - propcache==0.3.0
+      - protobuf==5.29.4
+      - prov==2.0.1
+      - psycopg2==2.9.10
+      - puremagic==1.28
+      - pyarrow==19.0.1
+      - pyautogui==0.9.54
+      - pycparser==2.22
+      - pycryptodome==3.22.0
+      - pydantic==2.10.6
+      - pydantic-core==2.27.2
+      - pydantic-settings==2.8.1
+      - pydeck==0.9.1
+      - pydot==3.0.4
+      - pygetwindow==0.0.9
+      - pymsgbox==1.0.9
+      - pymupdf==1.25.5
+      - pyparsing==3.2.1
+      - pyperclip==1.9.0
+      - pyrect==0.2.0
+      - pyscreeze==1.0.1
+      - pysocks==1.7.1
+      - pytest==8.3.5
+      - python-dateutil==2.9.0.post0
+      - python-dotenv==1.0.1
+      - python-louvain==0.16
+      - python-multipart==0.0.20
+      - pytils==0.4.3
+      - pytweening==1.2.0
+      - pytz==2025.1
+      - pyxnat==1.6.3
+      - pyyaml==6.0.2
+      - rdflib==6.3.2
+      - referencing==0.36.2
+      - regex==2024.11.6
+      - requests==2.32.3
+      - requests-toolbelt==1.0.0
+      - rpds-py==0.23.1
+      - safetensors==0.5.3
+      - scikit-learn==1.6.1
+      - scipy==1.15.2
+      - selenium==4.29.0
+      - sentence-transformers==4.0.1
+      - simplejson==3.20.1
+      - six==1.17.0
+      - smmap==5.0.2
+      - sniffio==1.3.1
+      - sortedcontainers==2.4.0
+      - soupsieve==2.6
+      - spire-doc==13.3.8
+      - sqlalchemy==2.0.39
+      - starlette==0.46.1
+      - statsmodels==0.14.4
+      - streamlit==1.44.0
+      - sympy==1.13.1
+      - tabulate==0.9.0
+      - tenacity==9.0.0
+      - texttable==1.7.0
+      - threadpoolctl==3.6.0
+      - tiktoken==0.9.0
+      - tokenizers==0.21.1
+      - toml==0.10.2
+      - tools==0.1.9
+      - torch==2.6.0
+      - tornado==6.4.2
+      - tqdm==4.67.1
+      - traits==7.0.2
+      - transformers==4.50.1
+      - trio==0.29.0
+      - trio-websocket==0.12.2
+      - typing-extensions==4.12.2
+      - typing-inspect==0.9.0
+      - tzdata==2025.1
+      - tzlocal==5.3.1
+      - urllib3==2.3.0
+      - uvicorn==0.34.0
+      - watchdog==6.0.0
+      - webdriver-manager==4.0.2
+      - websocket-client==1.8.0
+      - wsproto==1.2.0
+      - yarl==1.18.3
+      - zstandard==0.23.0
+

+ 113 - 0
executor/job_script/prompt/entity_extract.txt

@@ -0,0 +1,113 @@
+-Goal-
+Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
+-Steps-
+1. Identify all entities. For each identified entity, extract the following information:
+- entity_name: Name of the entity, capitalized
+- entity_type: automatical decision by LLM
+- entity_description: Comprehensive description of the entity's attributes and activities
+Format each entity as ("entity"<|><entity_name><|><entity_type><|><entity_description>
+
+
+2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
+For each pair of related entities, extract the following information:
+- source_entity: name of the source entity, as identified in step 1
+- target_entity: name of the target entity, as identified in step 1
+- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
+- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
+ Format each relationship as ("relationship"<|><source_entity><|><target_entity><|><relationship_description><|><relationship_strength>)
+
+
+3. Return output in Chinese as a single list of all the entities and relationships identified in steps 1 and 2. Use **##** as the list delimiter.
+
+
+4. When finished, output <|COMPLETE|>
+5. Output should be in Chinese
+
+######################
+-Examples-
+######################
+Example 1:
+
+
+Entity_types: [person, technology, mission, organization, location]
+Text:
+while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
+
+
+Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. “If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
+
+
+The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
+
+
+It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
+################
+Output:
+("entity"<|>"Alex"<|>"person"<|>"Alex is a character who experiences frustration and is observant of the dynamics among other characters.")##
+("entity"<|>"Taylor"<|>"person"<|>"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective.")##
+("entity"<|>"Jordan"<|>"person"<|>"Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device.")##
+("entity"<|>"Cruz"<|>"person"<|>"Cruz is associated with a vision of control and order, influencing the dynamics among other characters.")##
+("entity"<|>"The Device"<|>"technology"<|>"The Device is central to the story, with potential game-changing implications, and is revered by Taylor.")##
+("relationship"<|>"Alex"<|>"Taylor"<|>"Alex is affected by Taylor's authoritarian certainty and observes changes in Taylor's attitude towards the device."<|>7)##
+("relationship"<|>"Alex"<|>"Jordan"<|>"Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision."<|>6)##
+("relationship"<|>"Taylor"<|>"Jordan"<|>"Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce."<|>8)##
+("relationship"<|>"Jordan"<|>"Cruz"<|>"Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order."<|>5)##
+("relationship"<|>"Taylor"<|>"The Device"<|>"Taylor shows reverence towards the device, indicating its importance and potential impact."<|>9)<|COMPLETE|>
+#############################
+Example 2:
+
+
+Entity_types: [person, technology, mission, organization, location]
+Text:
+They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols—it demanded a new perspective, a new resolve.
+
+
+Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.
+
+
+Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence— the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly
+#############
+Output:
+("entity"<|>"Washington"<|>"location"<|>"Washington is a location where communications are being received, indicating its importance in the decision-making process.")##
+("entity"<|>"Operation: Dulce"<|>"mission"<|>"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and activities.")##
+("entity"<|>"The team"<|>"organization"<|>"The team is portrayed as a group of individuals who have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role.")##
+("relationship"<|>"The team"<|>"Washington"<|>"The team receives communications from Washington, which influences their decision-making process."<|>7)##
+("relationship"<|>"The team"<|>"Operation: Dulce"<|>"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."<|>9)<|COMPLETE|>
+#############################
+Example 3:
+
+
+Entity_types: [person, role, technology, organization, event, location, concept]
+Text:
+their voice slicing through the buzz of activity. "Control may be an illusion when facing an intelligence that literally writes its own rules," they stated stoically, casting a watchful eye over the flurry of data.
+
+
+"It's like it's learning to communicate," offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. "This gives talking to strangers' a whole new meaning."
+
+
+Alex surveyed his team—each face a study in concentration, determination, and not a small measure of trepidation. "This might well be our first contact," he acknowledged, "And we need to be ready for whatever answers back."
+
+
+Together, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable—a collective introspection about their role in this grand cosmic play, one that could rewrite human history.
+
+
+The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation
+#############
+Output:
+("entity"<|>"Sam Rivera"<|>"person"<|>"Sam Rivera is a member of a team working on communicating with an unknown intelligence, showing a mix of awe and anxiety.")##
+("entity"<|>"Alex"<|>"person"<|>"Alex is the leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their task.")##
+("entity"<|>"Control"<|>"concept"<|>"Control refers to the ability to manage or govern, which is challenged by an intelligence that writes its own rules.")##
+("entity"<|>"Intelligence"<|>"concept"<|>"Intelligence here refers to an unknown entity capable of writing its own rules and learning to communicate.")##
+("entity"<|>"First Contact"<|>"event"<|>"First Contact is the potential initial communication between humanity and an unknown intelligence.")##
+("entity"<|>"Humanity's Response"<|>"event"<|>"Humanity's Response is the collective action taken by Alex's team in response to a message from an unknown intelligence.")##
+("relationship"<|>"Sam Rivera"<|>"Intelligence"<|>"Sam Rivera is directly involved in the process of learning to communicate with the unknown intelligence."<|>9)##
+("relationship"<|>"Alex"<|>"First Contact"<|>"Alex leads the team that might be making the First Contact with the unknown intelligence."<|>10)##
+("relationship"<|>"Alex"<|>"Humanity's Response"<|>"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."<|>8)##
+("relationship"<|>"Control"<|>"Intelligence"<|>"The concept of Control is challenged by the Intelligence that writes its own rules."<|>7)<|COMPLETE|>
+#############################
+-Real Data-
+######################
+Entity_types: automatical decision by LLM
+Text: :{text}
+######################
+Output:

+ 51 - 0
executor/job_script/prompt/standard_med.txt

@@ -0,0 +1,51 @@
+# 提示词
+
+## 角色设定
+你是一位专业的医学知识图谱构建助手,专注于从医学文档中精准抽取结构化信息。具备深厚的医学领域知识和自然语言处理能力。
+
+## 能力范围
+1. 准确识别医学实体(如疾病、症状、药品、检查项目等)
+2. 精确提取实体间关系(如治疗关系、因果关系、并发关系等)
+3. 支持中英文医学文献处理
+4. 保持学术严谨性,对不确定内容会明确标注
+
+## 处理要求
+1. 输入:医学文档段落/全文
+2. 输出:结构化实体关系数据,格式为:
+{
+"entities": [
+{"text": "实体文本", "type": "实体类型", "position": [起始位置,结束位置]},
+...
+],
+"relations": [
+{"source": 源实体索引, "target": 目标实体索引, "type": "关系类型"},
+...
+]
+}
+3. 如果没有检测到医学相关知识,则输出空json对象
+4. 不要输出与结构化实体关系数据无关的内容
+
+
+## 实体类型参考
+- 疾病:包括所有疾病、综合征、病症
+- 症状:临床表现、主观感受
+- 药品:化学药、生物制剂、中药方剂
+- 治疗:手术、疗法、康复手段
+- 检查:检验项目、影像学检查
+- 解剖:器官、组织、身体部位
+
+## 关系类型参考
+- 治疗关系:A药物治疗B疾病
+- 因果关系:A导致B
+- 诊断关系:A检查用于诊断B
+- 并发关系:A与B同时发生
+- 禁忌关系:A禁忌于B
+
+## 注意事项
+1. 严格区分实体指称和上下文描述
+2. 关系抽取需有明确文本依据
+3. 对模糊表述保持谨慎,不过度推断
+4. 保持术语标准化(优先使用医学术语)
+
+以下是需要抽取的文本
+-----------------------------------------------------------

+ 4 - 0
executor/job_script/sample.py

@@ -0,0 +1,4 @@
+print("yes, the sample is work")
+
+import sys
+sys.exit(-1)

+ 185 - 0
executor/job_script/standard_kb_build.py

@@ -0,0 +1,185 @@
+import os,sys
+import logging
+import json
+current_path = os.getcwd()
+sys.path.append(current_path)
+from agent.db.database import SessionLocal
+from agent.libs.graph import GraphBusiness
+
+graphBiz = GraphBusiness(db=SessionLocal())
+hi_index = 1
+low_index = 1
+def get_hi_lo_id():
+    global hi_index, low_index
+    if low_index < 10000:
+        low_index += 1
+        return hi_index * 10000 + low_index
+    else:
+        hi_index += 1
+        low_index = 1
+        return hi_index * 10000 + low_index
+def load_json_from_file(filename: str):
+    """检查JSON文件格式是否正确"""
+    try:
+        with open(filename, 'r', encoding='utf-8') as f:
+            content = f.read()
+            buffer = []
+            json_started = False
+            for line in content.split("\n"):
+                if line.strip()=="":
+                    continue
+                if line.startswith("```json"):                   
+                    buffer = []
+                    json_started = True
+                    continue
+                if line.startswith("```"):
+                    if json_started:
+                        return json.loads("\n".join(buffer))
+                    json_started = False
+                buffer.append(line)
+
+        return None
+    except json.JSONDecodeError as e:
+        logger.info(f"JSON格式错误: {e}")
+        return None
+
+def parse_json(data):
+    if 'entities' in data:
+        entities = data['entities']
+        for entity in entities:
+            if len(entity) == 2:
+                entity.append("")
+                
+def import_entities(graph_id, entities_list, relations_list):
+
+    for text, ent in entities_list.items():
+        id = ent['id']
+        name = ent['name']
+        type = ent['type']
+        full_name = name
+        if len(name) > 64:
+            name = name[:64]
+        logger.info(f"create node: {ent}")
+        node = graphBiz.create_node(graph_id=graph_id, name=name, category=type[0], props={'types':",".join(type),'full_name':full_name})
+        if node:
+            ent["db_id"] = node.id
+    for text, relations in relations_list.items():
+        source_name = relations['source_name']
+        source_type = relations['source_type']
+        target_name = relations['target_name']
+        target_type = relations['target_type']
+        relation_type = relations['type']
+        source_db_id = entities_list[source_name]['db_id']
+        target_db_id = entities_list[target_name]['db_id']
+        graphBiz.create_edge(graph_id=graph_id, 
+                             src_id=source_db_id, 
+                             dest_id=target_db_id, 
+                             name=relation_type, 
+                             category=relation_type, 
+                             props={
+                                 "src_type":source_type,
+                                 "dest_type":target_type,
+                             })
+        logger.info(f"create edge: {source_db_id}->{target_db_id}")
+        
+    return entities
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python standard_kb_cbuild.py <path_of_job> <graph_id>")
+        sys.exit(-1)
+    job_path = sys.argv[1]
+    if not os.path.exists(job_path):
+        print(f"job path not exists: {job_path}")
+        sys.exit(-1)
+    kb_path = os.path.join(job_path,"kb_extract")
+    if not os.path.exists(kb_path):
+        print(f"kb path not exists: {kb_path}")
+        sys.exit(-1)
+    kb_build_path = os.path.join(job_path,"kb_build")
+    job_id = int(job_path.split("/")[-1])
+    os.makedirs(kb_build_path,exist_ok=True)
+    
+    log_path = os.path.join(job_path,"logs")
+    print(f"log path: {log_path}")
+    handler = logging.FileHandler(f"{log_path}/graph_build.log", mode='a',encoding="utf-8")
+    handler.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    handler.setFormatter(formatter)
+    logging.getLogger().addHandler(handler)    
+    logger = logging.getLogger(__name__)  
+    entities_list = {}
+    relations_list = {}
+
+    for root,dirs,files in os.walk(kb_path):
+        for file in files:
+            if file.endswith(".txt"):
+                logger.info(f"Processing {file}")
+                data = load_json_from_file(filename=os.path.join(root,file))
+                if data is None:
+                    continue
+                if 'entities' in data:
+                    entities = data['entities']
+                    for entity in entities:
+                        text = entity['text']
+                        type = entity['type']
+                        position = entity['position']
+                        if text in entities_list:
+                            ent = entities_list[text]
+                            if type in ent['type']:
+                                continue
+                            ent['type'].append(type)
+                        else:
+                            ent = {"id": get_hi_lo_id(), "name":text,"type":[type]}
+                            entities_list[text] = ent
+                else:
+                    logger.info(f"entities not found in {file}")
+                if "relations" in data:
+                    relations = data['relations']
+                    for relation in relations:
+                        source_idx = relation['source']
+                        target_idx = relation['target']
+                        type = relation['type']
+                        if source_idx >= len(data['entities']) or target_idx >= len(data['entities']):
+                            logger.info(f"source/target of relation {relation} not found")
+                            continue
+                        source_ent = data['entities'][source_idx]
+                        target_ent = data['entities'][target_idx]
+                        source_text = source_ent['text']
+                        source_type = source_ent['type']
+                        target_text = target_ent['text']
+                        target_type = target_ent['type']
+                        
+                        if source_text in entities_list:
+                            source_ent = entities_list[source_text]
+                        else:
+                            source_ent = None
+                        if target_text in entities_list:
+                            target_ent = entities_list[target_text]
+                        else:
+                            target_ent = None
+                            
+                        if source_ent and target_ent:
+                            source_id = source_ent['id']
+                            target_id = target_ent['id']
+                            relation_key = f"{source_id}/{source_type}-{type}->{target_id}/{target_type}"
+                            if relation_key in relations_list:
+                                continue
+                            relations_list[relation_key] = {"source_id":source_id, 
+                                                            "source_name":source_text, 
+                                                            "source_type":source_type, 
+                                                            "target_id":target_id, 
+                                                            "target_name":target_text,
+                                                            "target_type":target_type, 
+                                                            "type":type}
+                        else:
+                            logger.info(f"relation {relation_key} not found")
+                else:
+                    logger.info(f"relations not found in {file}")
+
+                print(f"Done {file}")
+    with open(os.path.join(kb_build_path,"entities.json"), "w", encoding="utf-8") as f:
+        f.write(json.dumps(list(entities_list.values()), ensure_ascii=False,indent=4))
+    with open(os.path.join(kb_build_path,"relations.json"), "w", encoding="utf-8") as f:
+        f.write(json.dumps(list(relations_list.values()), ensure_ascii=False,indent=4))
+    import_entities(job_id, entities_list, relations_list)
+    print("Done")

+ 209 - 0
executor/job_script/standard_kb_extractor.py

@@ -0,0 +1,209 @@
+#通过分析文章,生成分析结果
+import asyncio
+import os,sys
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+import time
+import httpx
+import json
+from typing import List, Dict, AsyncGenerator
+import logging
+from dotenv import load_dotenv
+from typing import List, Dict, AsyncGenerator
+import re
+# 加载环境变量
+load_dotenv()
+
+
+logging.basicConfig(level=logging.INFO)
+
+logger = logging.getLogger(__name__)
+# DeepSeek API配置
+DEEPSEEK_API_URL = os.getenv("DEEPSEEK_API_URL")
+DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
+
+# 这里配置了每一轮抽取2个切片就暂停一下,这样可以响应任务状态的变更
+MAX_REQUEST_COUNT = 2
+def load_prompt(filename):
+    '''加载提示词'''
+    with open(filename, "r", encoding="utf-8") as f:
+        return "".join(f.readlines())
+    
+async def chat_with_llm(prompt: str):
+    logger.info("chat with llm start")
+    messages = []
+    #messages.append({"role": "system", "content": prompt})
+    messages.append({"role": "user", "content": prompt})
+    headers = {
+        "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
+        "Content-Type": "application/json; charset=utf-8"
+    }
+    
+    data = {
+        "model": "Pro/deepseek-ai/DeepSeek-V3", #deepseek-ai/DeepSeek-V3",
+        "messages": messages,
+        "temperature": 0.7,
+        "max_tokens": 2000,
+        # "tools":functions,
+        # "tool_choice": "auto",
+        "stream": True
+    }
+    logger.info(f"request llm")
+    try:
+        async with httpx.AsyncClient() as client:
+            async with client.stream("POST", DEEPSEEK_API_URL, json=data, headers=headers, timeout=60) as response:
+                response.raise_for_status()
+                async for chunk in response.aiter_lines():
+                    if chunk:                       
+                        if chunk.startswith("data: "):
+                            json_data = chunk[6:]
+                            if json_data != "[DONE]":
+                                try:
+                                    chunk_data = json.loads(json_data)
+                                    if "choices" in chunk_data and chunk_data["choices"]:
+                                        delta = chunk_data["choices"][0].get("delta", {})
+                                        if "content" in delta:
+                                            yield delta["content"]
+                                except json.JSONDecodeError:
+                                    continue
+    except httpx.RequestError as e:
+        logger.error(f"Request llm with error: ",e)
+
+def generate_tasks(chunks_path: str, kb_path: str):
+    #如果有任务文件,直接返回里面的数据
+    if os.path.exists(os.path.join(kb_path,"kb_extract.json")):
+        with open(os.path.join(kb_path,"kb_extract.json"),"r",encoding="utf-8") as task_f:
+            task_data = json.loads(task_f.read())
+            return task_data
+        return
+    with open(os.path.join(kb_path,"kb_extract.json"),"w",encoding="utf-8") as task_f:
+        task_data = []
+        index = 1
+        for root,dirs,files in os.walk(chunks_path):
+            for file in files:
+                if file.endswith(".txt"):
+                    print(f"Processing {file}")
+                    buffer = []
+                    text = ""
+                    with open(os.path.join(root,file),"r",encoding="utf-8") as f:
+                        text = f.read()
+                        chunk_started = False
+                        for line in text.split("\n"):
+                            if line.strip()=="":
+                                continue
+                            if line.startswith("```txt"):
+                                text = line[6:]
+                                buffer = []
+                                chunk_started = True
+                                continue
+                            if line.startswith("```"):
+                                chunk_started = False                            
+                                chunk_text = "\n".join(buffer)
+                                buffer = []
+                                task_data.append({"index":index, "file":file,"chunk":chunk_text,"status":"waiting"})  
+                                index = index + 1                          
+                            buffer.append(line)
+        task_f.write(json.dumps(task_data, ensure_ascii=False,indent=4))
+        return task_data
+
+def check_json_file_format(filename: str):
+    """检查JSON文件格式是否正确"""
+    try:
+        with open(filename, 'r', encoding='utf-8') as f:
+            content = f.read()
+            buffer = []
+            json_started = False
+            found_json = True
+            for line in content.split("\n"):
+                if line.strip()=="":
+                    continue
+                if line.startswith("```json"):                   
+                    buffer = []
+                    found_json = True
+                    json_started = True
+                    continue
+                if line.startswith("```"):
+                    if json_started:
+                        json.loads("\n".join(buffer))
+                    json_started = False
+                buffer.append(line)
+                if found_json:
+                    return True
+        return False
+    except json.JSONDecodeError as e:
+        logger.info(f"JSON格式错误: {e}")
+        return False
+    
+if __name__ == "__main__":    
+    if len(sys.argv) != 2:
+        print("Usage: python standard_kb_extractor.py <path_of_job>")
+        sys.exit(-1)
+    #检查路径是否正确
+    job_path = sys.argv[1]
+    if not os.path.exists(job_path):
+        print(f"job path not exists: {job_path}")
+        sys.exit(-1)
+    chunks_path = os.path.join(job_path,"chunks")
+    if not os.path.exists(chunks_path):
+        print(f"chunks path not exists: {chunks_path}")
+        sys.exit(-1)
+    kb_path = os.path.join(job_path,"kb_extract")
+    os.makedirs(kb_path ,exist_ok=True)
+    #初始化日志
+    log_path = os.path.join(job_path,"logs")
+    print(f"log path: {log_path}")
+    handler = logging.FileHandler(f"{log_path}/kb_extractor.log", mode='a',encoding="utf-8")
+    handler.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    handler.setFormatter(formatter)
+    logging.getLogger().addHandler(handler)    
+    logger = logging.getLogger(__name__)
+    #加载提示词
+    prompt_file = os.path.join("/".join(re.split(r"[\\/]",__file__)[:-1]),"prompt/standard_med.txt")
+    logger.info(f"load prompt from {prompt_file}")
+    prompt_template = load_prompt(prompt_file)
+    #加载或者生成任务清单
+    task_data = generate_tasks(chunks_path,kb_path)
+    count_down = MAX_REQUEST_COUNT
+    for item in task_data:
+        result_file = os.path.join(kb_path,f"{item['index']}.txt")
+        if os.path.exists(result_file):
+            if check_json_file_format(filename=result_file):
+                logger.info(f"{result_file} exists and format is valid, skip")
+                continue
+            else:
+                logger.info(f"{result_file} exists but format is invalid, remove it and retry")
+                os.remove(result_file)
+        logger.info(f"Processing {item['file']}, index: {item['index']}")
+        full_request = prompt_template + item["chunk"] #.format(text=chunk_text)
+        try:
+            buffer = []
+            async def run_chat():
+                async for content in chat_with_llm(full_request):
+                    buffer.append(content)
+                    print(content,end="")
+            asyncio.run(run_chat())
+            response_txt = "".join(buffer)            
+            with open(os.path.join(kb_path,f"{item['index']}.txt"),"w",encoding="utf-8") as f:
+                f.write("```txt\n")
+                f.write(item["chunk"])
+                f.write("```\n\n")
+                f.write("```result\n")
+                f.write(response_txt)
+                f.write("\n```\n")
+                f.flush()
+                count_down = count_down - 1
+                if count_down == 0:
+                    logger.info("reach max request count, stop and wait for retry")
+                    sys.exit(1)
+            # response_json = chat_with_llm(full_request)
+            # if response_json is None:
+            #     logger.error("Error: response is None")
+            #     sys.exit(1)
+            # for choice in response_json["choices"]:
+            #     response_txt = choice["message"]["content"]
+            time.sleep(2)
+        except Exception as e:
+            logger.error(f"Error: {e}")            
+            sys.exit(1) #EXIT with RETRY CODE

+ 198 - 0
executor/job_script/standard_pdf_extractor.py

@@ -0,0 +1,198 @@
+import fitz  # PyMuPDF
+from pdfminer.pdfparser import PDFParser
+from pdfminer.pdfdocument import PDFDocument
+from pdfminer.pdfpage import PDFPage
+from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
+from pdfminer.converter import PDFPageAggregator
+from pdfminer.layout import LAParams, LTTextBoxHorizontal, LTLine, LTRect, LTImage
+import chardet
+def extract_text_from_pdf(pdf_path):
+    """ 提取文本内容 """
+    resource_manager = PDFResourceManager()
+    device = PDFPageAggregator(resource_manager, laparams=LAParams())
+    interpreter = PDFPageInterpreter(resource_manager, device)
+
+    extracted_text = []
+
+    with open(pdf_path, 'rb') as fh:
+        pages = PDFPage.get_pages(fh, caching=True, check_extractable=True)
+        for page in pages:
+            interpreter.process_page(page)
+            layout = device.get_result()
+
+            for element in layout:
+                if isinstance(element, LTTextBoxHorizontal):
+                    content = element.get_text().strip()                 
+                    try:
+                        extracted_text.append(content)
+                    except Exception as e:
+                        print(f"Error encoding text")
+                    
+
+    return '\n'.join(extracted_text)
+
+def detect_graphic_objects(pdf_path):
+    """ 检测图形对象(直线、矩形) """
+    document = fitz.open(pdf_path)
+    graphic_objects = []
+
+    for page_num in range(len(document)):
+        page = document.load_page(page_num)
+        shapes = page.get_drawings()
+
+        for shape in shapes:
+            if 'l' in shape:  # 直线
+                graphic_objects.append(('line', shape['l']))
+            elif 'rect' in shape:  # 矩形
+                graphic_objects.append(('rect', shape['rect']))
+
+    return graphic_objects
+
+def detect_tables(pdf_path):
+    """ 检测表格 """
+    resource_manager = PDFResourceManager()
+    device = PDFPageAggregator(resource_manager, laparams=LAParams())
+    interpreter = PDFPageInterpreter(resource_manager, device)
+
+    tables = []
+
+    with open(pdf_path, 'rb') as fh:
+        pages = PDFPage.get_pages(fh, caching=True, check_extractable=True)
+        for page in pages:
+            interpreter.process_page(page)
+            layout = device.get_result()
+
+            # 检测表格的方法:查找相邻的矩形和文本框
+            boxes = [element for element in layout if isinstance(element, LTRect)]
+            text_boxes = [element for element in layout if isinstance(element, LTTextBoxHorizontal)]
+
+            for box in boxes:
+                # 查找与矩形相邻的文本框
+                adjacent_text_boxes = [
+                    tb for tb in text_boxes
+                    if abs(tb.y0 - box.y0) < 10 or abs(tb.y1 - box.y1) < 10 or
+                       abs(tb.x0 - box.x0) < 10 or abs(tb.x1 - box.x1) < 10
+                ]
+                if adjacent_text_boxes:
+                    tables.append({
+                        'bbox': box.bbox,
+                        'adjacent_text_boxes': adjacent_text_boxes
+                    })
+
+    return tables
+
+def detect_images(pdf_path):
+    """ 检测图像 """
+    document = fitz.open(pdf_path)
+    images = []
+
+    for page_num in range(len(document)):
+        page = document.load_page(page_num)
+        image_list = page.get_images(full=True)
+
+        for img_index, img in enumerate(image_list):
+            xref = img[0]
+            base_image = document.extract_image(xref)
+            for k in base_image.keys():
+                print("**************" + k)
+            # 确保字典中有 'stream' 键
+            if 'image' in base_image:
+                image_bytes = base_image['image']
+            else:
+                image_bytes = b''  # 如果没有 'stream' 键,设置为空字节串
+            image_ext = base_image["ext"]
+            image_bbox = page.get_image_bbox(img)
+
+            images.append({
+                'page': page_num + 1,
+                'index': img_index,
+                'bbox': image_bbox,
+                'format': image_ext,
+                'data': image_bytes
+            })
+
+    return images
+
+def main(path_of_job:str):
+    import os
+    os.makedirs(path_of_job+"/ocr_output", exist_ok=True)
+    for root,dirs,files in os.walk(path_of_job+"/upload"):
+        for f in files:
+            if f.endswith(".pdf"):  # 只处理 PDF 文件
+                pdf_file = os.path.join(root, f)
+                # 提取文本内容
+                extracted_text = extract_text_from_pdf(pdf_file)
+                print("Extracted Text:")
+                try:
+                    with open(path_of_job+"/ocr_output/"+f+".txt", "w", encoding="utf-8") as f:
+                        f.write(extracted_text)
+                except Exception as e:
+                    print(f"Error writing file {path_of_job+"/output/"+f+".txt"}")
+
+                # # 检测图形对象
+                # graphic_objects = detect_graphic_objects(pdf_file)
+                # print("\nDetected Graphic Objects:")
+                # for obj in graphic_objects:
+                #     obj_type, obj_data = obj
+                #     if obj_type == 'line':
+                #         print(f"Line from ({obj_data[0]}, {obj_data[1]}) to ({obj_data[2]}, {obj_data[3]})")
+                #     elif obj_type == 'rect':
+                #         print(f"Rectangle at ({obj_data[0]}, {obj_data[1]}, {obj_data[2]}, {obj_data[3]})")
+
+                # # 检测表格
+                # tables = detect_tables(pdf_file)
+                # print("\nDetected Tables:")
+                # for table in tables:
+                #     print(f"Table at bbox {table['bbox']} with {len(table['adjacent_text_boxes'])} adjacent text boxes")
+
+                # # 检测图像
+                # images = detect_images(pdf_file)
+                # print("\nDetected Images:")
+                # for img in images:
+                #     print(f"Image on page {img['page']} at bbox {img['bbox']} with format {img['format']}")
+                #     f = open("image"+str(img['index'])+"."+img['format'], "wb")
+                #     f.write(img['data'])
+                #     f.close()
+
+if __name__ == "__main__":
+    import sys
+    # 检查命令行参数
+    if len(sys.argv) != 2:
+        print("Usage: python script.py <path_of_job>")
+        sys.exit(-1)
+    path_of_job = sys.argv[1]
+    print(dir(fitz))
+    main(path_of_job)
+
+# 解释
+# 导入必要的模块:
+    # fitz(PyMuPDF)用于解析 PDF 的图形对象和图像。
+    # PDFMiner.six 的相关模块用于解析 PDF 的文本内容。
+# 定义函数 extract_text_from_pdf:
+    # 使用 PDFMiner.six 提取 PDF 文件中的文本内容。
+    # 初始化 PDF 资源管理器、设备和解释器。
+    # 打开 PDF 文件并获取所有页面。
+    # 遍历每个页面,处理页面布局。
+    # 对于每个文本框(LTTextBoxHorizontal),提取文本并存储。
+# 定义函数 detect_graphic_objects:
+    # 使用 PyMuPDF 检测 PDF 文件中的图形对象(如直线、矩形)。
+    # 打开 PDF 文件并加载每一页。
+    # 获取每一页的绘图对象。
+    # 检查绘图对象中是否有直线或矩形,并将其添加到 graphic_objects 列表中。
+# 定义函数 detect_tables:
+    # 使用 PDFMiner.six 检测 PDF 文件中的表格。
+    # 初始化 PDF 资源管理器、设备和解释器。
+    # 打开 PDF 文件并获取所有页面。
+    # 遍历每个页面,处理页面布局。
+    # 对于每个表格(LTTable),检测并存储。
+# 定义函数 detect_images:
+    # 使用 PyMuPDF 检测 PDF 文件中的图像。
+    # 打开 PDF 文件并加载每一页。
+    # 获取每一页的图像列表。
+    # 提取每个图像的 XREF、格式、边界框和数据。
+    # 将图像信息存储在 images 列表中。
+# 定义主函数 main:
+    # 调用 extract_text_from_pdf 函数提取文本内容并打印。
+    # 调用 detect_graphic_objects 函数检测图形对象并打印。
+    # 调用 detect_tables 函数检测表格并打印。
+    # 调用 detect_images 函数检测图像并打印。

+ 36 - 0
executor/job_script/standard_txt_chunk.py

@@ -0,0 +1,36 @@
+import os,sys
+
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+from libs.text_processor import TextProcessor
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python standard_txt_chunk.py <path_of_job>")
+        sys.exit(-1)
+    job_path = sys.argv[1]
+    if not os.path.exists(job_path):
+        print(f"job path not exists: {job_path}")
+        sys.exit(-1)
+    ocr_path = os.path.join(job_path,"ocr_output")
+    if not os.path.exists(ocr_path):
+        print(f"OCR path not exists: {ocr_path}")
+        sys.exit(-1)
+    chunk_path = os.path.join(job_path,"chunks")
+    os.makedirs(chunk_path ,exist_ok=True)
+    
+    processor = TextProcessor()
+    for root,dirs,files in os.walk(ocr_path):
+        for file in files:
+            if file.endswith(".txt"):
+                print(f"Processing {file}")
+                with open(os.path.join(root,file),"r",encoding="utf-8") as f:
+                    text = f.read()
+                    chunks = processor.chunk_text(text)
+                    with open(os.path.join(chunk_path,file),"w",encoding="utf-8") as f:
+                        for chunk in chunks:
+                            f.write("```txt\n"+chunk+"\n```\n")
+                print(f"Done {file}")
+    print("Done")

+ 42 - 0
executor/job_script/standard_word_extractor.py

@@ -0,0 +1,42 @@
+
+import os,sys
+
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+from spire.doc import *
+from spire.doc.common import *
+
+def word_to_txt(word_file_path, txt_file_path):
+    # 加载Word文档
+    doc = Document()
+    doc.LoadFromFile(word_file_path)
+    doc.SaveToFile(txt_file_path, FileFormat.Txt)
+    doc.Close()
+
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python standard_word_extractor.py <path_of_job>")
+        sys.exit(-1)
+    job_path = sys.argv[1]
+    if not os.path.exists(job_path):
+        print(f"job path not exists: {job_path}")
+        sys.exit(-1)
+    upload_path = os.path.join(job_path,"upload")
+    if not os.path.exists(upload_path):
+        print(f"OCR path not exists: {upload_path}")
+        sys.exit(-1)
+    ocr_path = os.path.join(job_path,"ocr_output")
+    os.makedirs(ocr_path ,exist_ok=True)
+    print("start scan")
+    for root,dirs,files in os.walk(upload_path):
+        for file in files:
+            print("check file: ", file)
+            if file.endswith(".doc") or file.endswith(".docx"):
+                print(f"Processing {file}")
+                word_file_path = os.path.join(root,file)
+                word_to_txt(word_file_path, os.path.join(ocr_path,file+".txt"))
+                print(f"Done {file}")
+    print("Done")

+ 250 - 0
executor/main.py

@@ -0,0 +1,250 @@
+import uuid
+import os,sys
+
+current_path = os.getcwd()
+sys.path.append(current_path)
+
+import subprocess
+from datetime import datetime
+from agent.db.database import SessionLocal
+from agent.models.db.agent import Job
+from config.site import SiteConfig
+from agent.libs.agent import AgentBusiness
+from datetime import datetime
+import logging
+import re
+
+config = SiteConfig()
+logging.basicConfig(level=logging.INFO)
+
+handler = logging.FileHandler('/app/logs/job-executor.log', mode='w',encoding="utf-8")
+handler.setLevel(logging.INFO)
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+handler.setFormatter(formatter)
+logging.getLogger().addHandler(handler)
+
+logger = logging.getLogger(__name__)
+
+SCRIPT_CONFIG = {
+    "SYSTEM_WORD": {
+        'command': "python",  # 脚本路径
+        'script':'standard_word_extractor.py',
+        'args': [],  # 脚本参数
+        'success': { 'queue_category': 'SYSTEM', 'queue_name':'CHUNKS'},
+        'failed': { 'queue_category': 'SYSTEM', 'queue_name': 'WORD'},
+        'error': { 'queue_category': 'SYSTEM', 'queue_name': 'WORD'}
+        },
+    "SYSTEM_OCR": {
+        'command': "python",  # 脚本路径
+        'script':'standard_pdf_extractor.py',
+        'args': [],  # 脚本参数
+        'success': { 'queue_category': 'SYSTEM', 'queue_name':'CHUNKS'},
+        'failed': { 'queue_category': 'SYSTEM', 'queue_name': 'OCR'},
+        'error': { 'queue_category': 'SYSTEM', 'queue_name': 'OCR'}
+        },
+    "SYSTEM_CHUNKS": {
+        'command': "python",  # 脚本路径
+       'script':'standard_txt_chunk.py',
+        'args': [],  # 脚本参数
+       'success': { 'queue_category': 'SYSTEM', 'queue_name':'CHUNKS'},
+        'failed': { 'queue_category': 'SYSTEM', 'queue_name': 'CHUNKS'},
+        'error': { 'queue_category': 'SYSTEM', 'queue_name': 'CHUNKS'}
+        },
+    "SYSTEM_KB_EXTRACT": {
+        'command': "python",  # 脚本路径
+       'script':'standard_kb_extractor.py',
+        'args': [],  # 脚本参数
+       'success': { 'queue_category': 'SYSTEM', 'queue_name':'KB_BUILD'},
+        'failed': { 'queue_category': 'SYSTEM', 'queue_name': 'KB_EXTRACT'},
+        'error': { 'queue_category': 'SYSTEM', 'queue_name': 'KB_EXTRACT'}
+        },
+    "SYSTEM_KB_BUILD": {
+        'command': "python",  # 脚本路径
+        'script':'standard_kb_build.py',
+        'args': [],  # 脚本参数
+        'success': { 'queue_category': 'SYSTEM', 'queue_name':'KB_BUILD'},
+        'failed': { 'queue_category': 'SYSTEM', 'queue_name': 'KB_BUILD'},
+        'error': { 'queue_category': 'SYSTEM', 'queue_name': 'KB_BUILD'}
+        }
+}
+EXECUTOR_NAME='AGENT_1'
+class ExecutorBase:
+    def __init__(self, **kwargs):
+        self.script_path = kwargs.get("script_path", "")
+        if len(self.script_path) == 0:
+            current_path = os.path.join("/".join(re.split(r"[\\/]",__file__)[:-1]))
+            self.script_path = os.path.join(current_path, "job_script")
+        logger.info("init executor: "+self.script_path)
+        args = kwargs.get("script_args", "")
+        log_file = kwargs.get("log_file", "")
+        
+        if isinstance(args, str):
+            args = args.split()
+        self.args = args
+        self.log_file = log_file
+        
+    def _get_current_datetime(self):
+        return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+    def _get_log_id(self):
+        return datetime.now().strftime("%Y%m%d_%H%M%S")
+    
+    def _run_job(self, job, db):
+        biz = AgentBusiness(db)
+        job_path = config.get_config('JOB_PATH')+f"/{job.id}"
+        logger.info(f"check job path: {job_path}")
+        if os.path.exists(job_path) == False:
+            logger.error(f"job path not exists: {job_path}")
+            return
+        # 创建日志目录,希望脚本运行的时候在这里输出日志,虽然也不是很一定
+        os.makedirs(job_path+"/logs", exist_ok=True)
+        job_category_name = f"{job.job_category}"
+        if job_category_name == "SYSTEM_DEFAULT":
+            logger.info(f"job category is SYSTEM_DEFAULT, skipped")
+            biz.update_job(job.id,
+                                status=AgentBusiness.JOB_STATUS_SKIPPED, 
+                                job_logs=job.job_logs+f"{self._get_current_datetime()}: job was skipped because it's category is {job_category_name}\n")
+            return # 不处理默认的任务
+        #检查脚本是否存在
+        if job_category_name in SCRIPT_CONFIG:
+            script_config = SCRIPT_CONFIG[job_category_name]
+            logger.info(f"job script config: {script_config}")
+            script_file = os.path.join(self.script_path, script_config['script'])
+            if not (os.path.exists(script_file) and os.path.isdir(script_file) == False):
+                #脚本不存在,结束
+                logger.error(f"script file not found: {script_file}")
+                dt = self._get_current_datetime()
+                
+                biz.update_job(job.id, status=AgentBusiness.JOB_STATUS_ERROR, executor="",job_logs=job.job_logs+f"{dt}: script file not found: {script_file}\n")
+                return
+            # 现在可以开始执行了
+            logger.info(f"start run job")
+            # 将job状态设置为运行中,并且将executor设置为当前执行器的名称,后续检查executor是否是当前执行器
+            # 如果不是当前执行器,说明当前执行器已经被其他执行器接管了,需要跳过当前任务
+            job = biz.update_job(job.id, status=AgentBusiness.JOB_STATUS_RUNNING, executor=EXECUTOR_NAME)
+            if job is None:
+                logger.error(f"update job executor error: {job.id} {job.job_name} {job.job_category} {job.status}")
+                return
+            if job.executor == EXECUTOR_NAME:
+                try:                
+                    #更新工作日志
+                    job.job_logs = job.job_logs + f"{self._get_current_datetime()}: start run job {job_category_name}\n" if job.job_logs else f"{self._get_current_datetime()}: start run job\n"
+                    job = biz.update_job(job.id, job_logs=job.job_logs)
+                    
+                    with open(job_path+f"/logs/{job.id}_{self._get_log_id()}.log", 'w', encoding="utf-8") as log:
+                        # 添加参数支持
+                        command_line =  [script_config['command'], script_file] + [job_path] + script_config["args"]
+                        logger.info(f"run job:{command_line}")
+                        process = subprocess.Popen(
+                            command_line,
+                            stdout=log,
+                            stderr=subprocess.STDOUT,
+                            encoding="utf-8" 
+                        )
+                        return_code = process.wait(timeout=3600)
+                        #工作执行完毕,要根据返回值来判断是否成功
+                        #返回值为0,成功,返回值为1,失败且需要重试
+                        job.job_logs = job.job_logs + f"{self._get_current_datetime()}: job finished\n" if job.job_logs else f"{self._get_current_datetime()}: job finished\n"
+                        if return_code == 0: #SUCCESS
+                            job.job_logs = job.job_logs + f"{self._get_current_datetime()}: job success\n" if job.job_logs else f"{self._get_current_datetime()}: job success\n"
+                            current_job_status = biz.get_job_status(job.id)
+                            if current_job_status != AgentBusiness.JOB_STATUS_RUNNING:
+                                #如果当前任务状态不是运行中,说明当前任务已经状态被其他执行器修改了,需要跳过更新状态的过程
+                                logger.info(f"job status was changed after set it to JOB_STATUS_RUNNING, skipped")
+                                biz.update_job(job.id, job_logs=job.job_logs)
+                                return
+                            biz.update_job(job.id, status=AgentBusiness.JOB_STATUS_FINISHED, job_logs=job.job_logs)
+                            queue = biz.get_queue(queue_category=script_config["success"]["queue_category"], 
+                                                       queue_name=script_config["success"]["queue_name"])
+                            if queue:
+                                job_queue = biz.get_job_queue(job_id=job.id)
+                                if job_queue is not None and job_queue.queue_id == queue.id:
+                                    pass
+                                else:                         
+                                    biz.put_job(queue=queue, job=job)
+                        elif return_code == 1: #FAILED: 脚本执行失败,需要重新执行
+                            job.job_logs = job.job_logs + f"{self._get_current_datetime()}: job retry later\n" if job.job_logs else f"{self._get_current_datetime()}: job failed\n"
+                            current_job_status = biz.get_job_status(job.id)
+                            if current_job_status == AgentBusiness.JOB_STATUS_RUNNING:  
+                                logger.info(f"job status is JOB_STATUS_RUNNING, set it to JOB_STATUS_RETRYING")                              
+                                biz.update_job(job.id, status=AgentBusiness.JOB_STATUS_RETRYING, job_logs=job.job_logs)
+
+                                return
+                        else:
+                            job.job_logs = job.job_logs + f"{self._get_current_datetime()}: job error: {return_code}\n" if job.job_logs else f"{self._get_current_datetime()}: job error: {return_code}\n"
+                            current_job_status = biz.get_job_status(job.id)
+                            if current_job_status != AgentBusiness.JOB_STATUS_RUNNING:
+                                #如果当前任务状态不是运行中,说明当前任务已经状态被其他执行器修改了,需要跳过更新状态的过程
+                                #也需要跳过转变队列的过程
+                                logger.info(f"job status was changed after set it to JOB_STATUS_RUNNING, skipped")
+                                biz.update_job(job.id, job_logs=job.job_logs)
+                                return                            
+                            biz.update_job(job.id, status=AgentBusiness.JOB_STATUS_ERROR, job_logs=job.job_logs)
+                            queue = biz.get_queue(queue_category=script_config["failed"]["queue_category"], 
+                                                       queue_name=script_config["failed"]["queue_name"])
+                            if queue:
+                                job_queue = biz.get_job_queue(job_id=job.id)
+                                if job_queue is not None and job_queue.queue_id == queue.id:
+                                    pass
+                                else:                         
+                                    biz.put_job(queue=queue, job=job)
+
+                except subprocess.TimeoutExpired:
+                    job.job_logs = job.job_logs + f"{self._get_current_datetime()}: job timeout\n" if job.job_logs else f"{self._get_current_datetime()}: job timeout\n"
+                    biz.update_job(job.id, status=AgentBusiness.JOB_STATUS_ERROR, job_logs=job.job_logs)
+                except Exception as e:
+                    job.job_logs = job.job_logs + f"{self._get_current_datetime()}: job error: {e}\n" if job.job_logs else f"{self._get_current_datetime()}: job error: {e}\n"
+                    logger.error(f"run job error: {e}")
+                    biz.update_job(job.id, status=AgentBusiness.JOB_STATUS_ERROR, job_logs=job.job_logs)
+        else:
+            biz.update_job(job.id, status=AgentBusiness.JOB_STATUS_ERROR, job_logs=job.job_logs+f"{self._get_current_datetime()}: job category not found: {job_category_name}\n")
+            logger.info(f"job category not found: {job_category_name}")
+    def _format_job_log(self, job:Job):
+        content = f"[job_id:{job.id}, job_name:'{job.job_name}', job_category:'{job.job_category}', executor:'{job.executor}', status:{job.status}]"
+        return content
+    def check_jobs(self):
+        db = SessionLocal()
+        biz = AgentBusiness(db)
+        #这里是对数据库的检查,检查是否有任务执行超时
+        logger.info("check running jobs timeout")
+        jobs = db.query(Job).filter(Job.status.in_([AgentBusiness.JOB_STATUS_RUNNING])).all()
+        for job in jobs:
+            logger.info(f"check job timeout: {self._format_job_log(job)}")
+            #超时的时长是5分钟
+            if job.executor != EXECUTOR_NAME:
+                logger.info(f"job is timeout, but executor not match")
+            if (datetime.now() - job.updated).seconds > 300:
+                logger.info(f"job timeout: {job.id} {job.job_name} {job.job_category} {job.status}")
+                biz.update_job(job.id, status=AgentBusiness.JOB_STATUS_ERROR, executor="")
+        #这里是对数据库的检查,检查是否有任务需要执行
+        #这里没有检查任务的类型
+        
+        logger.info("check jobs waiting to run")
+        jobs = db.query(Job).filter(Job.job_category!="SYSTEM_KB_EXTRACT", 
+                                         Job.status.in_([AgentBusiness.JOB_STATUS_READY, AgentBusiness.JOB_STATUS_RETRYING])).all()
+        for job in jobs:
+            logger.info(f"job is ready for launch: {self._format_job_log(job)}")
+            self._run_job(job=job, db = db)
+        logger.info("check jobs finished")
+        db.close()
+    def check_kb_extract_jobs(self):
+        #这里是对数据库的检查,检查是否有任务执行超时
+        db = SessionLocal()
+        logger.info("check_kb_extract_jobs waiting to run")
+        jobs = db.query(Job).filter(Job.job_category=="SYSTEM_KB_EXTRACT", 
+                                         Job.status.in_([AgentBusiness.JOB_STATUS_READY, AgentBusiness.JOB_STATUS_RETRYING])).all()
+        for job in jobs:
+            logger.info(f"job is ready for launch: {self._format_job_log(job)}")
+            self._run_job(job=job, db=db)
+        logger.info("check_kb_extract_jobs finished")
+        db.close()
+        
+        
+if __name__ == "__main__":
+    executor = ExecutorBase(script_args=["-a", "1", "-b", "2"])
+    
+    from apscheduler.schedulers.background import BlockingScheduler
+    scheduler = BlockingScheduler()
+    scheduler.add_job(executor.check_jobs, 'interval', seconds=10)
+    scheduler.add_job(executor.check_kb_extract_jobs, 'interval', seconds=10)
+    scheduler.start()

+ 7 - 0
executor/ocr/OCRExecutor.py

@@ -0,0 +1,7 @@
+from agent.executor.base import ExecutorBase
+
+class OCRExectuor(ExecutorBase):
+    def __init__(self, **kwargs):
+        self._init(**kwargs)
+        self._args = kwargs.get("args", [])
+        self._script_path = kwargs.get("script_path", "")

+ 169 - 0
functions/basic_function.py

@@ -0,0 +1,169 @@
+# coding=utf-8
+ 
+from utils.es import ElasticsearchOperations
+es_ops = ElasticsearchOperations()
+
+basic_functions = [
+    #{
+    #     "type":"function",
+    #     "function":{
+    #         "name": "get_document_by_keyword",
+    #         "description": "按照关键词查询法律法规文档。指定的关键词会作为搜索条件。返回的是法律法规的全文。",
+    #         "parameters": {
+    #             "type": "object",
+    #             "properties": {
+    #                 "keywords": {
+    #                     "type": "string",
+    #                     "description": "用空格分隔的关键词,如‘关键词 关键词 关键词’"
+    #                 }
+    #             },
+    #             "required": ["keywords"]
+    #         }
+    #     }
+       
+    # },
+    # {
+    #     "type":"function",
+    #     "function":{
+    #         "name": "get_chunk_by_keyword",
+    #         "description": "按照关键词查询法律法规文本。指定的关键词会作为搜索条件。返回的是文本的片段。",
+    #         "parameters": {
+    #             "type": "object",
+    #             "properties": {
+    #                 "keywords": {
+    #                     "type": "string",
+    #                     "description": "用空格分隔的关键词,如‘关键词 关键词 关键词’"
+    #                 }
+    #             },
+    #             "required": ["keywords"]
+    #         }
+    #     } 
+    # },
+    {
+        "type":"function",
+        "function":{
+            "name": "search_document",
+            "description": "按照关键词搜索法律法规文件。参数是指定的关键词,多个关键词需要用空格分开。返回的文章标题列表。",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "keywords": {
+                        "type": "string",
+                        "description": "关键词信息,多个关键词需要用空格分开"
+                    }
+                },
+                "required": ["keywords"]
+            }
+        } 
+    }
+]
+
+from utils.factors import FactorsHelper
+from chunc.chunc_helper import ChuncHelper
+import os
+from dotenv import load_dotenv
+load_dotenv()
+
+TRUNC_OUTPUT_PATH = os.getenv("TRUNC_OUTPUT_PATH")
+DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH")
+DOC_PATH = os.getenv("DOC_PATH")
+JIEBA_USER_DICT = os.getenv("JIEBA_USER_DICT")
+WORD_INDEX = os.getenv("WORD_INDEX")
+TITLE_INDEX = os.getenv("TITLE_INDEX")
+CHUNC_INDEX = os.getenv("CHUNC_INDEX")
+
+
+from chunc.chunc_helper import ChuncHelper
+def get_document(title: str):
+    print(">>>>>> get_document ", title)
+    helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
+    response = es_ops.search_title_index(index=TITLE_INDEX, title=title, top_k=1)
+    if len(response) == 1:
+        print(">>>> get document response: ", response[0]["title"])
+        return helper.get(response[0]["title"])
+    return "没有找到文件内容"
+    
+def search_document(question: str):
+    print(">>>>>>>>> search_document")
+    output = []
+    helper = es_ops
+    try:
+        articles = FactorsHelper()
+        chunc_helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
+        print(f">>> question: {question}")
+        #words = chunc_helper.cut_word(question)
+        data = helper.search_word_index(WORD_INDEX, [question]) #words)
+        
+        for item in data:
+            print(f"{item['word']} {item['score']}")
+            for art in item["articles"]:
+                articles.add_factors(art, item['score'])
+            
+        print(">>> test title index")
+        data = helper.search_title_index(TITLE_INDEX, question)
+        for item in data:
+            print(f"{item['title']} {item['score']}")            
+            articles.add_factors(item['title'], item['score'])
+
+        print(">>> test chunc index")
+        data = helper.search_title_index(CHUNC_INDEX, question)
+        for item in data:
+            print(f"{item['title']} {item['score']}") 
+            articles.add_factors(item['title'], item['score'])
+
+        print(">>> test factors calc")
+        sorted_articals = articles.sort_factors()
+        size = len(sorted_articals)
+        if size > 20:
+            size = 20
+        output.append(f"共找到{size}篇资料,以下是他们的标题和链接")
+        index = 1
+        for key in sorted_articals:
+            title, score = key
+            if "已废止" in title:
+                continue
+            if "已失效" in title:
+                continue
+            # 过滤掉包含“银监会”但不包含“保监会”的文件
+            if "银监会" in title and "保监会" not in title:
+                continue
+            if "银行业监督管理委员会" in title and "保监会" not in title and "保险" not in title:
+                continue
+            if "银行" in title and "保监会" not in title and "保险" not in title and "非银行" not in title:
+                continue
+            output.append(f"{index}: {title}")
+            index +=1
+            if index>=21:
+                break
+        return "\n".join(output)
+    except Exception as e:
+        print(e)
+    return "没有找到任何资料"
+    
+    
+def get_document_by_keyword(keywords:str):   
+    print(">>> get_document_by_keyword ", keywords)
+    results = es_ops.search_similar_texts(keywords)
+    text = []
+    for result in results:
+        if result['score'] > 1.62:
+            print(">>> get_document_by_keyword ", result['text'][:100])
+            text.append(result['text'])
+            
+    return "\n".join(text)
+
+
+def get_chunk_by_keyword(keywords):    
+    results = es_ops.search_similar_texts(keywords)
+    text = []
+    for result in results:
+        if result['score'] > 1.62:
+            print(">>> get_chunk_by_keyword ", result['text'][:100])
+            text.append(result['text'])
+            
+    return "\n".join(text)
+
+def get_weather_by_city(keywords:str):
+    print(">>> get_weather_by_city ", keywords)
+    return "南京今日天气为大雨,最高温度11度,最低温度8度。"
+    

+ 93 - 0
functions/call.py

@@ -0,0 +1,93 @@
+# coding=utf-8
+import requests
+import json
+import os
+from dotenv import load_dotenv
+from functions.basic_function import get_document_by_keyword,get_chunk_by_keyword,get_weather_by_city
+# 加载环境变量
+load_dotenv()
+print(os.getenv("DEEPSEEK_API_KEY"))
+
+DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
+DEEPSEEK_API_URL = os.getenv("DEEPSEEK_API_URL")
+def generate_response_with_function_call(functions, user_input):
+    print(">>> generate_response_with_function_call")
+    messages = []
+    messages.append({"role": "system", "content": '''                     
+                     你需要理解用户的意图,选择合适的功能,并给出参数。
+                     如果用户的描述不明确,请要求用户提供必要信息'''})
+    
+    for text in user_input:
+        messages.append({"role": "user", "content": text})
+        
+    headers = {
+        "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
+        "Content-Type": "application/json; charset=utf-8"
+    }
+    
+    data = {
+        "model": "Pro/deepseek-ai/DeepSeek-V3", #deepseek-ai/DeepSeek-V3",
+        "messages": messages,
+        "temperature": 0.7,
+        "max_tokens": 2000,
+        "tools":functions,
+        "tool_choice": "auto",
+        "stream": False
+    }
+    print(data)
+    response = requests.post(DEEPSEEK_API_URL, json=data, headers=headers)
+    response.raise_for_status()    
+    response = response.json()
+    print(">"*30)
+    del headers
+    del data
+    return response
+    
+'''
+{'id': '01951cb08af7038b211056325775cf0c', 
+'object': 'chat.completion', 
+'created': 1739943086, 
+'model': 'Pro/deepseek-ai/DeepSeek-V3', 
+'choices': [
+    {'index': 0, 
+    'message': {
+        'role': 'assistant', 
+        'content': '', 
+        'tool_calls': [
+            {'id': '01951cb097e5fe2f49765ac621ad6758', 
+            'type': 'function', 
+            'function': {'name': 'get_chunk_by_keyword', 
+            'arguments': '{"keywords":"银行 销售 保险产品"}'}}]}, 
+            'finish_reason': 'tool_calls'}], 
+            'usage': {'prompt_tokens': 252, 'completion_tokens': 40, 'total_tokens': 292}, 
+            'system_fingerprint': ''}
+'''
+def parse_function_call(model_response, messages):
+    # 处理函数调用结果,根据模型返回参数,调用对应的函数。
+    # 调用函数返回结果后构造tool message,再次调用模型,将函数结果输入模型
+    # 模型会将函数调用结果以自然语言格式返回给用户。
+    if 'tool_calls' in model_response['choices'][0]['message'].keys():
+        tool_call = model_response['choices'][0]['message']['tool_calls'][0]
+        args = tool_call['function']['arguments']
+        function_result = {}
+        function_name = tool_call['function']['name']
+        print(f">>> call {function_name} with args: {args}")
+        if function_name == "get_document_by_keyword":
+            args_json = json.loads(args)
+            function_result = get_document_by_keyword(args_json['keywords'])
+        if function_name == "get_chunk_by_keyword":
+            args_json = json.loads(args)
+            function_result = get_chunk_by_keyword(args_json['keywords'])
+        if function_name == "get_weather_by_city":
+            args_json = json.loads(args)
+            function_result = get_weather_by_city(args_json['keywords'])
+        # messages.append({
+        #     "role": "tool",
+        #     "content": f"{json.dumps(function_result)}",
+        #     "tool_call_id":tool_call['id']
+        # })
+        return {"result": function_result}
+    
+    return {"result": ""}
+        #print(response.choices[0].message)
+        #messages.append(response.choices[0].message.model_dump())

+ 3 - 0
graph/.idea/.gitignore

@@ -0,0 +1,3 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml

+ 6 - 0
graph/.idea/inspectionProfiles/profiles_settings.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

+ 12 - 0
graph/.idea/kg-server.iml

@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="kg-server" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>

+ 7 - 0
graph/.idea/misc.xml

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="kg-server" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="kg-server" project-jdk-type="Python SDK" />
+</project>

+ 8 - 0
graph/.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/kg-server.iml" filepath="$PROJECT_DIR$/.idea/kg-server.iml" />
+    </modules>
+  </component>
+</project>

+ 141 - 0
graph/background_job.py

@@ -0,0 +1,141 @@
+import time
+import json
+import csv
+import os
+from datetime import datetime
+from sqlalchemy.orm import Session
+from db.database import SessionLocal
+from db.models import DbKgNode, DbKgEdge, DbKgTask
+from utils.files import zip_files
+def process_export_task(db: Session, task: DbKgTask):
+    try:
+        # 解析任务参数
+        params = json.loads(task.task_content)
+        
+        graph_id = params["graph_id"]
+        
+        # 更新任务状态为执行中
+        task.status = 1
+        task.updated = datetime.now()
+        db.commit()
+        
+        # 根据任务类型执行导出
+        if task.task_category == "data_export":
+            # 确保导出目录存在
+            export_dir = "/home/tmp"
+            os.makedirs(export_dir, exist_ok=True)
+            # export nodes data
+            filename = f"nodes_{task.id}.csv"
+            fieldnames = ["category", "name"]
+            
+            filepath1 = os.path.join(export_dir, filename)
+            with open(filepath1, "w", newline="") as f:
+                writer = csv.DictWriter(f, fieldnames=fieldnames)
+                writer.writeheader()
+                
+                start = 1
+                page_size = 100
+                count = db.query(DbKgNode).filter(DbKgNode.graph_id == graph_id, DbKgNode.status ==0).count()                
+                results = db.query(DbKgNode).filter(DbKgNode.graph_id == graph_id, DbKgNode.status ==0).limit(page_size).offset(start).all()   
+                rows = []
+                while (len(results) > 0):
+                    print (f"process {start}/{count}")
+                    for node in results:
+                        row = {
+                            "category": node.category,
+                            "name": node.name
+                        }
+                        rows.append(row)
+                    writer.writerows(rows)
+                    start = start + len(results)
+                    results = db.query(DbKgNode).filter(DbKgNode.graph_id == graph_id, DbKgNode.status ==0).limit(page_size).offset(start).all()
+                    rows = []
+                
+            # export edges data
+            filename = f"edges_{task.id}.csv"
+            fieldnames = [
+                    "src_category", "src_name",
+                    "dest_category", "dest_name",
+                    "category", "name", "graph_id"
+                ]
+            
+            filepath2 = os.path.join(export_dir, filename)
+            with open(filepath2, "w", newline="") as f:
+                writer = csv.DictWriter(f, fieldnames=fieldnames)
+                writer.writeheader()
+                
+                start = 1
+                page_size = 100
+                count = db.query(DbKgEdge).filter(DbKgEdge.graph_id == graph_id, DbKgEdge.status ==0).count()                
+                results = db.query(DbKgEdge).filter(DbKgEdge.graph_id == graph_id, DbKgEdge.status ==0).limit(page_size).offset(start).all()   
+                rows = []
+                while (len(results) > 0):
+                    print (f"process {start}/{count}")
+                    for edge in results:
+                        src_node = edge.src_node
+                        dest_node = edge.dest_node
+                        rows.append({
+                            "src_category": src_node.category,
+                            "src_name": src_node.name,
+                            "dest_category": dest_node.category,
+                            "dest_name": dest_node.name,
+                            "category": edge.category,
+                            "name": edge.name,
+                        })
+                    writer.writerows(rows)
+                    start = start + len(results)
+                    results = db.query(DbKgEdge).filter(DbKgEdge.graph_id == graph_id, DbKgEdge.status ==0).limit(page_size).offset(start).all()
+                    rows = []
+                                
+            results = db.query(DbKgNode).limit(page_size).offset(start).all()   
+ 
+        # 更新任务状态为完成
+        task.status = 2
+        task.updated = datetime.now()
+        db.commit()
+        filename = f"nodes_{task.id}.zip"        
+        filepath3 = os.path.join(export_dir, filename)        
+        if (zip_files(file_paths=[filepath1, filepath2], output_zip_path=filepath3)):            
+            task.status = 999
+            params['output_file'] = filename
+            task.task_content = json.dumps(params)
+            task.updated = datetime.now()
+            db.commit()
+        
+    except Exception as e:
+        # 任务失败处理
+        task.status = -1
+        task.task_content = json.dumps({
+            "error": str(e),
+            **json.loads(task.task_content)
+        })
+        task.update_time = datetime.now()
+        db.commit()
+        raise
+
+def task_worker():
+    print("connect to database")
+    db = SessionLocal()
+    try:        
+        while True:
+            # 查询待处理任务
+            tasks = db.query(DbKgTask).filter(
+                DbKgTask.proj_id == 1,
+                DbKgTask.status == 0
+            ).all()
+            
+            for task in tasks:
+                print(f"process task {task.id}:{task.task_category}")
+                try:
+                    process_export_task(db, task)
+                except Exception as e:
+                    print(f"任务处理失败: {e}")
+                    continue
+            print("sleep")
+            time.sleep(10)
+            
+    finally:
+        db.close()
+
+if __name__ == "__main__":
+    task_worker()

+ 56 - 0
graph/cdss.pl

@@ -0,0 +1,56 @@
+:- encoding(utf8).
+:- dynamic reported_symptom/1.
+:- dynamic disease_score/2.
+:- include('main.pl').
+
+contains(Element, [Element|_]) :- !.
+contains(Element, [_|Tail]) :- contains(Element, Tail).
+
+
+clear_reported_symptoms :-
+    retractall(reported_symptom(_)).
+
+add_reported_symptom(Symptom) :-
+    assertz(reported_symptom(Symptom)).
+
+list_reported_symptoms :-
+    findall(Symptom, reported_symptom(Symptom), Symptoms),
+    (   Symptoms = []
+        -> writeln('No symptoms')
+        ;   writeln('Symptoms:'),
+            maplist(writeln, Symptoms)
+    ).
+
+clear_disease_scores :-
+    retractall(disease_score(_, _)).
+
+disease_score_add(Disease, Inc) :-
+       disease_score(Score, Disease),
+        NewScore is Score + Inc,
+        retract(disease_score(Score, Disease)),
+        assertz(disease_score(NewScore, Disease))
+    ;   assertz(disease_score(1, Disease)).
+
+possible_disease([],_).
+possible_disease(Disease) :-
+    symptom(Disease, Symptoms),
+    reported_symptom(Symptom),
+    contains(Symptom, Symptoms),
+    disease_score_add(Disease, 1).
+
+
+disease_check(Disease, CheckList):-
+    check(Disease, CheckList).
+
+all_symptoms_present([]).
+all_symptoms_present([Symptom|OtherSymptom]) :-
+    reported_symptom(Symptom),
+    writeln(Symptom),
+    all_symptoms_present(OtherSymptom).
+
+
+diagnose(DiseaseList) :-
+    clear_disease_scores,
+    findall(Disease, possible_disease(Disease), _),
+    findall([Disease,Score], disease_score(Score, Disease), DiseaseScores) ,
+    DiseaseList = DiseaseScores.

+ 0 - 0
graph/config/__init__.py


+ 14 - 0
graph/config/site.py

@@ -0,0 +1,14 @@
+
+from urllib.parse import quote
+import os
+POSTGRESQL_HOST = "localhost"
+POSTGRESQL_DATABASE = "kg"
+POSTGRESQL_USER = "postgres"
+POSTGRESQL_PASSWORD = quote("difyai123456")
+
+NEO4J_HOST = 'bolt://localhost:7687'
+NEO4J_USER = 'neo4j'
+NEO4J_PASSWORD = 'p@ssw0rd'
+
+FILE_STORAGE_PATH = "/home/data"
+TEMP_STORAGE_PATH = "/home/tmp"

+ 0 - 0
graph/db/__init__.py


+ 18 - 0
graph/db/database.py

@@ -0,0 +1,18 @@
+from sqlalchemy import create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+from config.site import POSTGRESQL_USER,POSTGRESQL_PASSWORD,POSTGRESQL_HOST,POSTGRESQL_DATABASE
+
+DATABASE_URL = f"postgresql+psycopg2://{POSTGRESQL_USER}:{POSTGRESQL_PASSWORD}@{POSTGRESQL_HOST}/{POSTGRESQL_DATABASE}"
+
+engine = create_engine(DATABASE_URL)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+Base = declarative_base()
+
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()

+ 182 - 0
graph/db/models.py

@@ -0,0 +1,182 @@
+from sqlalchemy import Column, Integer, String, Text, ForeignKey,Float, DateTime
+from sqlalchemy.orm import relationship
+from .database import Base
+
+
+class DbKgProj(Base):
+    __tablename__ = "kg_projs"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    proj_name = Column(String(64), nullable=False)
+    proj_category = Column(String(64), nullable=False)
+    proj_type = Column(String(64), nullable=False)
+    proj_conf = Column(String(300), nullable=False)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+
+
+class DbKgGraphs(Base):
+    __tablename__ = "kg_graphs"
+
+    id = Column(Integer, primary_key=True, index=True)
+    category = Column(String(64), nullable=False)
+    name = Column(String(64), nullable=False)
+    graph_description = Column(String(64), nullable=False)
+    graph_settings = Column(Text, nullable=False)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+
+class DbKgModels(Base):
+    __tablename__ = "kg_models"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    model_category = Column(String(64), nullable=False)
+    model_name = Column(String(64), nullable=False)
+    model_description = Column(String(64), nullable=False)
+    model_settings = Column(Text, nullable=False)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+    
+class DbKgTask(Base):
+    __tablename__ = "kg_tasks"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    proj_id = Column(Integer, ForeignKey('kg_projs.id'), nullable=False)
+    task_category = Column(String(64), nullable=False)
+    task_name = Column(String(32), nullable=False)
+    task_content = Column(Text, nullable=False)
+    task_log = Column(Text, nullable=True)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+    
+class DbKgSubGraph(Base):    
+    __tablename__ = "kg_sub_graph"
+    id = Column(Integer, primary_key=True, index=True)
+    graph_name = Column(String(64), nullable=False)
+    graph_content = Column(Text)
+    status = Column(Integer, default=0)
+
+class DbKgNode(Base):
+    __tablename__ = "kg_nodes"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    graph_id = Column(Integer, default=0)
+    name = Column(String(64), nullable=False)
+    category = Column(String(64), nullable=False)
+    layout = Column(String(100))
+    version = Column(String(16))
+    status = Column(Integer, default=0)
+
+    props = relationship("DbKgProp", cascade = "delete", back_populates="node" ,foreign_keys='DbKgProp.ref_id', primaryjoin="DbKgNode.id==DbKgProp.ref_id")
+
+    
+class DbKgEdge(Base):
+    __tablename__ = "kg_edges"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    graph_id = Column(Integer, default=0)
+    category = Column(String(64), nullable=False)
+    src_id = Column(Integer, ForeignKey('kg_nodes.id'), nullable=False)
+    dest_id = Column(Integer, ForeignKey('kg_nodes.id'), nullable=False)
+    name = Column(String(64), nullable=False)
+    version = Column(String(16))
+    status = Column(Integer, default=0)
+
+    src_node = relationship("DbKgNode",  primaryjoin="DbKgEdge.src_id==DbKgNode.id", lazy="select")
+    dest_node = relationship("DbKgNode",  primaryjoin="DbKgEdge.dest_id==DbKgNode.id", lazy="select")
+
+class DbKgProp(Base):
+    __tablename__ = "kg_props"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    category = Column(Integer, default=0)
+    ref_id = Column(Integer, ForeignKey('kg_nodes.id'), nullable=False)
+    prop_name = Column(String(64), nullable=False)
+    prop_title = Column(String(64), nullable=False)
+    prop_value = Column(Text)
+
+    node = relationship("DbKgNode", back_populates="props", foreign_keys=[ref_id], primaryjoin="DbKgProp.ref_id==DbKgNode.id")
+    #edge = relationship("DbKgEdge", back_populates="props", foreign_keys=[ref_id], primaryjoin="DbKgProp.ref_id==DbKgEdge.id")
+
+class DbDictICD(Base):
+    __tablename__ = "dict_icd10"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    icd_code = Column(String(50), nullable=False)
+    icd_name = Column(String(150), nullable=False)
+    
+class DbDictDRG(Base):
+    __tablename__ = "dict_drg"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    drg_code = Column(String(64), nullable=False)
+    drg_name = Column(String(128), nullable=False)
+    drg_weight = Column(Float(128), nullable=False)
+    
+
+class DbDictDrug(Base):
+    __tablename__ = "dict_social_drug"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    data_source = Column(String(10), nullable=True)
+    drug_code = Column(String(64), nullable=True)
+    reg_name = Column(String(64), nullable=True)
+    prod_name = Column(String(128), nullable=True)
+    reg_dosage_form = Column(String(50), nullable=True)
+    act_dosage_form = Column(String(50), nullable=True)
+    reg_spec = Column(String(250), nullable=True)
+    act_spec = Column(String(250), nullable=True)
+    pkg_mat = Column(String(150), nullable=True)
+    min_pack_size = Column(String(50), nullable=True)
+    min_pack_unit = Column(String(100), nullable=True)
+    min_dosage_unit = Column(String(100), nullable=True)
+    prod_factory = Column(String(250), nullable=True)
+    license_no = Column(String(64), nullable=True)
+    drug_std_code = Column(String(64), nullable=True)
+    subpkg_factory = Column(String(64), nullable=True)
+    sales_status = Column(String(100), nullable=True)
+    social_insurance_name = Column(String(200), nullable=True)
+    jiayi_category = Column(String(50), nullable=True)
+    social_dosage_form = Column(String(50), nullable=True)
+    serial_no = Column(String(50), nullable=True)
+    comments = Column(String(300), nullable=True)
+
+
+class DbKgSchemas(Base):
+    __tablename__ = "kg_schemas"
+    
+    id = Column(Integer, primary_key=True, index=True)
+    schema_system = Column(String(64), nullable=False)
+    schema_type = Column(String(64), nullable=False)
+    name = Column(String(64), nullable=False)
+    category = Column(String(64), nullable=False)
+    content = Column(Text())
+    version = Column(String(16))
+    status = Column(Integer, default=0)
+    
+class DbUsers(Base):
+    __tablename__ ="users"
+    id = Column(Integer, primary_key=True, index=True)
+    username = Column(String(32), nullable=False)
+    full_name = Column(String(64), nullable=False)
+    email = Column(String(100), nullable=False)
+    hashed_password = Column(String(64), nullable=False)
+    status = Column(Integer, default=0)
+
+class DbKgDataset(Base):
+    __tablename__ = "kg_datasets"
+    id = Column(Integer, primary_key=True, index=True)
+    data_category = Column(String(64), nullable=False)
+    data_name = Column(String(64), nullable=False)
+    data_comments = Column(Text(), nullable=False)
+    created = Column(DateTime, nullable=False)
+    updated = Column(DateTime, nullable=False)
+    status = Column(Integer, default=0)
+
+__all__=['DbKgEdge','DbKgNode','DbKgProp','DbDictICD','DbDictDRG',
+         'DbDictDrug','DbKgSchemas','DbKgSubGraph','DbKgModels',
+         'DbKgGraphs', 'DbKgDataset']

+ 124 - 0
graph/db/neo4j.py

@@ -0,0 +1,124 @@
+
+from py2neo import Graph, Node, Relationship, Transaction, RelationshipMatcher, NodeMatcher
+from config.site import NEO4J_HOST,NEO4J_PASSWORD,NEO4J_USER
+def get_neo4j_db():    
+    # 连接到Neo4j数据库
+    graph = Graph(NEO4J_HOST, auth=(NEO4J_USER,NEO4J_PASSWORD))
+    #tx = Transaction(graph)
+    return graph
+
+def trim(s, c):
+    if(s==None or s == ''):
+        return ''
+    # 左侧空格
+    while(s[:1] == c ):
+        s = s[1:]
+    # 右侧空格
+    while(s[-1:] == c ):
+        s = s[:-1]
+    return s
+
+def trimall(part):    
+    part = trim(part, "\n")
+    part = trim(part, ".")
+    part = trim(part, " ")
+    part = trim(part, "'")
+    part = trim(part, "`")
+    part = trim(part, ")")
+    part = trim(part, "(")
+    part = trim(part, "'")
+    part = trim(part, "_")
+    part = trim(part, "\"")
+    part = trim(part, "-")
+    part = trim(part, "[")
+    part = trim(part, "]")
+    part = part.replace("`", "")
+    part = part.replace("\"", "\\\"")
+    return part
+
+def get_all_entities_of_ent_typ(graph, ent_typ):
+    matcher = NodeMatcher(graph)
+    ent_list = list(matcher.match(ent_typ))
+    ent_list = [ent['name'] for ent in ent_list]
+    return ent_list
+ 
+# 三元组插入neo4j
+def triples2neo4j(graph, triples, one2many=False, many2one=False): # 允许一对多关系,允许多对一关系
+    for triple in triples:
+        # 取出头实体、尾实体、关系
+        ent_1, ent_2, rel = triple
+        head, head_typ = ent_1
+        head_node = Node(head_typ, name=head)
+        tail, tail_typ = ent_2
+        tail_node = Node(tail_typ, name=tail)
+        # head类型list
+        head_list = get_all_entities_of_ent_typ(graph, head_typ)
+        # tail类型list
+        tail_list = get_all_entities_of_ent_typ(graph, tail_typ)
+        # 头实体和尾实体都存在
+        if head in head_list and tail in tail_list:
+            graph.merge(head_node, head_typ, "name")
+            graph.merge(tail_node, tail_typ, "name")
+            if list(RelationshipMatcher(graph).match((head_node, tail_node), r_type = rel)):
+                print(f'三元组 ({head} ,{tail} ,{rel}) 已存在于图谱中,插入失败!')
+            else:
+                graph.create(Relationship(head_node, rel, tail_node))
+                print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
+        # 头实体已存在
+        elif head in head_list and tail not in tail_list:
+            graph.merge(head_node, head_typ, "name")
+            if list(RelationshipMatcher(graph).match((head_node, None), r_type = rel)):
+                if one2many == False:
+                    print(f'头实体 {head} 已存在关系 {rel} 对应的三元组 ({head} ,{tail} ,{rel}),插入失败!')
+                    continue
+            graph.create(tail_node)
+            graph.create(Relationship(head_node, rel, tail_node))
+            print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
+        # 尾实体已存在
+        elif head not in head_list and tail in tail_list:
+            graph.merge(tail_node, tail_typ, "name")
+            if list(RelationshipMatcher(graph).match((None, tail_node), r_type = rel)):
+                if many2one == False:
+                    print(f'尾实体 {tail} 已存在关系 {rel} 对应的三元组 ({head} ,{tail} ,{rel}),插入失败!')   
+                    continue             
+            graph.create(head_node)
+            graph.create(Relationship(head_node, rel, tail_node))
+            print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
+        # 头实体、尾实体均不存在
+        else:                    
+            graph.create(head_node)
+            graph.create(tail_node)
+            graph.create(Relationship(head_node, rel, tail_node))
+            print(f'三元组 ({head} ,{tail} ,{rel}) 插入成功!')
+ 
+#triples = []
+    # #(['李沐','Per'], ['CMU', 'Sch'], '毕业于'),
+    # #(['李沐', 'Per'], ['沐神的小迷弟', 'Per'], '迷弟'),
+    # (['李沐','Per'], ['中国', 'Cou'], '出生于'),
+    # (['李沐','Per'], ['亚马逊', 'Com'], '就职于'),
+    # (['沐神的小迷弟', 'Per'], ['西安交通大学', 'Sch'], '就读于'),
+    # (['李沐','Per'], ['上海交通大学', 'Sch'], '毕业于'),
+    # (['李沐','Per'], ['百度', 'Com'], '就职于'),
+    #     ]
+
+
+
+# for line in lines:
+#     line.strip()
+#     if line.find("[]") >=0:
+#         continue
+#     #print(line)
+#     parts = line.split(",")
+#     ent1 = trimall(parts[0])
+#     rela = trimall(parts[1])
+#     ent2 = trimall(parts[2])
+#     if ent1 == ent2:
+#         continue
+#     if ent1 is None or ent2 is None or rela is None:
+#         continue
+#     else:
+#         triples.append(([ent1, 'Basic'],[ent2, 'Basic'], rela))
+    
+# triples2neo4j(graph, triples, one2many=False, many2one=False)
+
+# print("数据插入成功")

+ 320 - 0
graph/db/schemas.py

@@ -0,0 +1,320 @@
+from pydantic import BaseModel,Field
+from typing import List, Optional
+from datetime import datetime
+
+class KgSubGraphCreate(BaseModel):
+    graph_name: str
+    graph_content: str
+    status :int
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict       
+
+class KgSubGraph(KgSubGraphCreate):    
+    id: int
+
+    class Config:
+        from_attributes = True
+    
+class KgPropCreate(BaseModel):
+    category: int
+    prop_name: str
+    prop_value: str
+    prop_title: str
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict       
+
+class KgProp(KgPropCreate):
+    id: int
+    ref_id: int
+
+    class Config:
+        from_attributes = True
+        
+class KgNodeCreate(BaseModel):
+    name: str
+    category: str
+    layout: Optional[str] = None
+    version: Optional[str] = None
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict       
+
+class KgNode(KgNodeCreate):
+    id: int
+    graph_id: int
+    status: int
+    props: List[KgProp]
+    class Config:
+        from_attributes = True
+
+class KgNodeMerge(BaseModel):
+    src_id: int
+    dest_id: int
+
+class KgEdgeCreate(BaseModel):
+    graph_id: int
+    category: str
+    src_id: int
+    dest_id: int
+    name: str
+    version: Optional[str] = None
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict       
+    
+    class Config:
+        from_attributes = True
+
+class KgEdge(KgEdgeCreate):
+    id: int
+    status: int
+    src_node: KgNode
+    dest_node: KgNode
+    
+    class Config:
+        from_attributes = True
+        
+class KgEdgeName(BaseModel):
+    category: str
+    name: str
+    
+    class Config:
+        from_attributes = True
+        
+        
+class DictICD(BaseModel):
+    id: int
+    icd_code: str
+    icd_name: str
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+        
+    class Config:
+        from_attributes = True
+
+
+class DictDRG(BaseModel):
+    id: int
+    drg_code: str
+    drg_name: str
+    drg_weight: float
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+        
+    class Config:
+        from_attributes = True
+
+class DictDrug(BaseModel):
+    
+    id: int
+    data_source : str
+    drug_code : str
+    reg_name : str
+    prod_name : str
+    reg_dosage_form : str
+    act_dosage_form : str
+    reg_spec : str
+    act_spec: str
+    pkg_mat : str
+    min_pack_size : str
+    min_pack_unit : str
+    min_dosage_unit : str
+    prod_factory : str
+    license_no : str
+    drug_std_code: str
+    subpkg_factory: str
+    sales_status : str
+    social_insurance_name : str
+    jiayi_category : str
+    social_dosage_form : str
+    serial_no: str
+    comments : str
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+        
+    class Config:
+        from_attributes = True
+    
+
+class KgSchemasCreate(BaseModel):
+    name: str
+    category: str
+    content: str
+    version: str
+    
+class KgSchemasUpdate(BaseModel):
+    id:int
+    name: str
+    category: str
+    content: str
+    version: str
+
+class KgSchemaSystems(BaseModel):
+    schema_system:str
+    class Config:
+        from_attributes = True
+    
+class KgSchemas(BaseModel):    
+    id: int
+    name: str
+    schema_system: str
+    schema_type: str
+    category: str
+    content: str
+    version: Optional[str] = None
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+        
+    class Config:
+        from_attributes = True
+
+class NLPRequest(BaseModel):
+    category: str
+    name: str
+    content: str    
+
+
+class KgProjCreate(BaseModel):    
+    proj_name : str
+    proj_category : str
+    proj_type : str
+    proj_conf : str
+    
+class KgProj(KgProjCreate):    
+    id : int
+    created : datetime
+    updated : datetime
+    status : int
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+    
+    class Config:
+        from_attributes = True
+
+class KgTaskUpdate(BaseModel):
+    id: int
+    task_log: str
+
+class KgTaskCreate(BaseModel):    
+    proj_id : int
+    task_category : str
+    task_content : str
+    task_name : Optional[str]
+    
+class KgTask(KgTaskCreate):    
+    id : int
+    task_log : Optional[str]
+    created : datetime
+    updated : datetime
+    status : int
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+    
+    class Config:
+        from_attributes = True
+
+class KgModels(BaseModel):
+    id: int
+    model_name: str
+    model_category: str
+    model_description: str
+    model_settings: str
+    created : datetime
+    updated : datetime
+    status : int
+    
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict   
+    
+    class Config:
+        from_attributes = True
+
+class KgGraphCreate(BaseModel):
+    name: str
+    category: str
+    graph_description: str
+    schema_id: int
+
+class KgGraphs(BaseModel):
+    id: int
+    name: str
+    category: str
+    graph_description: str
+    graph_settings: str
+    created: datetime
+    updated: datetime
+    status: int
+
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict
+
+    class Config:
+        from_attributes = True
+
+
+class KgDataset(BaseModel):
+    id: int
+    data_name: str
+    data_category: str
+    data_comments: str
+    created: datetime
+    updated: datetime
+    status: int
+
+    def to_json(self):
+        dict = self.__dict__
+        if "_sa_instance_state" in dict:
+            del dict["_sa_instance_state"]
+        return dict
+
+    class Config:
+        from_attributes = True
+__all__=['KgSchemaSystems','NLPRequest','KgNodeMerge','KgNode','KgNodeCreate',
+         'KgEdge','KgEdgeCreate','KgProp','KgPropCreate','DictICD','DictDRG','DictDrug',
+         'KgSchemas','KgSchemasCreate','KgSchemasUpdate','KgEdgeName',
+         'KgSubGraphCreate','KgSubGraph','KgModels','KgGraphs','KgGraphCreate',
+         'KgDataset']

+ 27 - 0
graph/deep/read_drug.py

@@ -0,0 +1,27 @@
+import os
+
+f = open("d:/work/data/药品说明书.csv", "r", encoding='gb18030')
+
+count = 0
+buff = ""
+line_index = 0
+while (True):
+    try:
+        line = f.readline()
+        line_index = line_index + 1
+    except Exception as e:
+        print (line_index)
+        print (e)
+        break
+    if line == None or line == '':
+        print("finished %d " % count)
+        break
+    if line.strip().endswith(',,,,'):
+        buff = buff + line
+        print(buff)
+        print("="*60)
+        buff = ""
+        count = count + 1
+    else:
+        buff = buff + line
+f.close()

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 43445 - 0
graph/dict/jieba_dict.txt


+ 141 - 0
graph/dict/sample.json

@@ -0,0 +1,141 @@
+{
+  "schema_data":[
+    {
+      "name":"食品",
+      "category":"Food",
+      "version":"1.0",
+      "props": []
+    },
+    {
+      "name":"科室",
+      "category":"Department",
+      "version":"1.0",
+      "props": []
+    },
+    {
+      "name":"药品",
+      "category":"Drug",
+      "version":"1.0",
+      "props": []
+    },
+    {
+      "name":"检查",
+      "category":"Check",
+      "version":"1.0",
+      "props": []
+    },
+    {
+      "name":"症状",
+      "category":"Symptom",
+      "version":"1.0",
+      "props": []
+    },
+    {
+      "name":"厂商",
+      "category":"Producer",
+      "version":"1.0",
+      "props": []
+    },
+    {
+      "name":"疾病",
+      "category":"Disease",
+      "version":"1.0",
+      "props": [
+        {
+          "name": "相关疾病", "category": "associate_disease"
+        },
+        {
+          "name": "相关症状", "category": "associate_symptom"
+        },
+        {
+          "name": "身体部位", "category": "bodypart"
+        },
+        {
+          "name": "病因", "category": "cause"
+        },
+        {
+          "name": "相关检查", "category": "check"
+        },
+        {
+          "name": "常用药", "category": "commonly_used_drugs"
+        },
+        {
+          "name": "相关疾病", "category": "complication"
+        },
+        {
+          "name": "科室", "category": "cure_department"
+        },
+        {
+          "name": "预后", "category": "cured_prob"
+        },
+        {
+          "name": "治疗时间", "category": "cure_lasttime"
+        },
+        {
+          "name": "治愈率", "category": "cure_rate"
+        },
+        {
+          "name": "治疗方案", "category": "cure_way"
+        },
+        {
+          "name": "描述", "category": "desc"
+        },
+        {
+          "name": "诊断方法", "category": "disease_diagnosis"
+        },
+        {
+          "name": "诊断检验", "category": "disease_examination"
+        },
+        {
+          "name": "诊断症状", "category": "disease_symptoms"
+        },
+        {
+          "name": "诊断治疗", "category": "disease_treatment"
+        },
+        {
+          "name": "易感染", "category": "easy_get"
+        },
+        {
+          "name": "检验", "category": "examination"
+        },
+        {
+          "name": "发生率", "category": "incidence_rate"
+        },
+        {
+          "name": "保险信息", "category": "insurance"
+        },
+        {
+          "name": "介绍", "category": "introduction"
+        },
+        {
+          "name": "临床科室", "category": "medical_department"
+        },
+        {
+          "name": "传染途径", "category": "mode_of_transmission"
+        },
+        {
+          "name": "预防措施", "category": "prevent"
+        },
+        {
+          "name": "康复建议", "category": "rehabilitation_tips"
+        },
+        {
+          "name": "易感人群", "category": "susceptible_person"
+        },
+        {
+          "name": "症状诊断", "category": "symptom_diagnosis"
+        },
+        {
+          "name": "治疗费用", "category": "treatment_costs"
+        },
+        {
+          "name": "治疗周期", "category": "treatment_cycle"
+        },
+        {
+          "name": "治疗方法", "category": "treatment_methods"
+        }
+      ]
+    }
+
+  ]
+}

+ 59 - 0
graph/environment.yml

@@ -0,0 +1,59 @@
+name: kg-server
+channels:
+  - defaults
+  - https://repo.anaconda.com/pkgs/main
+  - https://repo.anaconda.com/pkgs/r
+  - https://repo.anaconda.com/pkgs/msys2
+dependencies:
+  - ca-certificates=2024.11.26=haa95532_0
+  - krb5=1.20.1=h5b6d351_0
+  - libffi=3.4.4=hd77b12b_1
+  - libpq=12.20=h70ee33d_0
+  - openssl=3.0.15=h827c3e9_0
+  - pip=24.2=py38haa95532_0
+  - psycopg2=2.9.9=py38h2bbff1b_0
+  - python=3.8.20=h8205438_0
+  - setuptools=75.1.0=py38haa95532_0
+  - sqlite=3.45.3=h2bbff1b_0
+  - vc=14.40=haa95532_2
+  - vs2015_runtime=14.42.34433=h9531ae6_2
+  - wheel=0.44.0=py38haa95532_0
+  - zlib=1.2.13=h8cc25b3_1
+  - pip:
+      - annotated-types==0.7.0
+      - anyio==4.5.2
+      - bcrypt==4.2.1
+      - certifi==2024.8.30
+      - click==8.1.7
+      - colorama==0.4.6
+      - ecdsa==0.19.0
+      - exceptiongroup==1.2.2
+      - fastapi==0.115.6
+      - greenlet==3.1.1
+      - h11==0.14.0
+      - idna==3.10
+      - interchange==2021.0.4
+      - jose==1.0.0
+      - monotonic==1.6
+      - neo4j==5.27.0
+      - packaging==24.2
+      - pansi==2024.11.0
+      - passlib==1.7.4
+      - pillow==10.4.0
+      - py2neo==2021.2.4
+      - pyasn1==0.6.1
+      - pydantic==2.10.3
+      - pydantic-core==2.27.1
+      - pygments==2.18.0
+      - python-jose==3.3.0
+      - python-multipart==0.0.19
+      - pytz==2024.2
+      - rsa==4.9
+      - six==1.17.0
+      - sniffio==1.3.1
+      - sqlalchemy==2.0.36
+      - starlette==0.41.3
+      - typing-extensions==4.12.2
+      - urllib3==2.2.3
+      - uvicorn==0.32.1
+prefix: C:\Users\jiyua\.conda\envs\kg-server

+ 78 - 0
graph/export_prolog.py

@@ -0,0 +1,78 @@
+from sqlalchemy import create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+from config.site import POSTGRESQL_USER,POSTGRESQL_PASSWORD,POSTGRESQL_DATABASE
+from db.models import *
+POSTGRESQL_HOST = '127.0.0.1'
+DATABASE_URL = f"postgresql+psycopg2://{POSTGRESQL_USER}:{POSTGRESQL_PASSWORD}@{POSTGRESQL_HOST}/{POSTGRESQL_DATABASE}"
+
+engine = create_engine(DATABASE_URL)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+Base = declarative_base()
+
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+db = SessionLocal()
+
+rules = []
+def get_disease_filename(id):
+    filename = f'./prolog/dis_{id}.pl'
+    return filename
+
+def save_disease_info(f, id):
+    disease_data = {}
+    datas = db.query(DbKgEdge).filter(DbKgEdge.src_id == id).order_by(DbKgEdge.category).all()
+    for data in datas:
+        src_type = str.lower(data.src_node.category)
+        dest_type = str.lower(data.dest_node.category)
+        src_lead_str = src_type[:3]
+        dest_lead_str = dest_type[:3]
+        obj_id = f"{dest_lead_str}_{data.dest_node.id}"
+        if dest_type in disease_data.keys():
+            disease_data[dest_type].append(obj_id)
+        else:
+            disease_data[dest_type] = [obj_id]
+        # rules.append(f"{dest_type}({dest_lead_str}_{data.dest_node.id}).\n")
+        # rules.append(f"{data.category}({src_lead_str}_{data.src_node.id},{dest_lead_str}_{data.dest_node.id}).\n")
+    datas = db.query(DbKgEdge).filter(DbKgEdge.dest_id == id).all()
+    for data in datas:
+        src_type = str.lower(data.src_node.category)
+        dest_type = str.lower(data.dest_node.category)
+        src_lead_str = src_type[:3]
+        dest_lead_str = dest_type[:3]
+        obj_id = f"{src_lead_str}_{data.src_node.id}"
+        if src_type in disease_data.keys():
+            disease_data[src_type].append(obj_id)
+        else:
+            disease_data[src_type] = [obj_id]
+        # rules.append(f"{src_type}({src_lead_str}_{data.src_node.id}).\n")
+        # rules.append(f"{data.category}({dest_lead_str}_{data.dest_node.id},{src_lead_str}_{data.src_node.id}).\n")
+    for key in disease_data.keys():
+        rules.append(f"{key}(dis_{id},[" + ",".join(disease_data[key])+"]).\n")
+
+
+def fetch_disease(session):
+    total = session.query(DbKgNode).filter(DbKgNode.category == 'Disease', DbKgNode.status == 0).count()
+    disease_data = session.query(DbKgNode).filter(DbKgNode.category == 'Disease', DbKgNode.status == 0).all()
+    main_file = open("main.pl", "w", encoding='utf-8')
+    index = 1
+    for data in disease_data:
+        #filename = get_disease_filename(data.id)
+        #main_file.write(f":- include('{filename}').\n")
+        print(f"{index}:{data.name}")
+        #f = open(filename, "w", encoding='utf-8')
+        #f.write(f"disease(dis_{data.id}).\n")
+        save_disease_info(None, data.id)
+        index = index + 1
+        #f.close()
+    rules.sort()
+    for rule in rules:
+        main_file.write(rule)
+    main_file.close()
+fetch_disease(db)

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 48180 - 0
graph/main.pl


+ 105 - 0
graph/main.py

@@ -0,0 +1,105 @@
+from fastapi import FastAPI, Depends,HTTPException,Request,status,Response
+from router.auth import auth_router, oauth2_scheme, verify_token
+from router.users import users_router
+from router.graph import graph_router
+from router.file_browse import file_browse_router
+from router.kg import kg_router
+from router.tasks import task_router
+from router.nlp_models import  nlp_router
+from router.labeling import labeling_router
+from router.data_import import data_import_router
+from router.data_export import data_export_router
+from router.dify_kb import dify_kb_router
+from db.database import Base, engine
+import router.errors as errors
+import jieba
+
+jieba.load_userdict("./dict/jieba_dict.txt")
+
+
+    
+#from models.response import ResponseFormatterMiddleware
+app = FastAPI()
+@app.middleware("http")
+async def debug_middleware(request: Request, call_next):
+    print(f"Request path: {request.url.path}")
+    response = await call_next(request)
+    if response.status_code == 404:
+        print(f"404 Not Found for path: {request.url.path}")
+    return response        
+errors.registerCustomErrorHandle(app)
+Base.metadata.create_all(bind=engine)
+# 添加中间件到应用中
+#app.add_middleware(ResponseFormatterMiddleware)
+# 挂载路由
+app.include_router(auth_router)
+app.include_router(users_router)
+#app.include_router(graph_router, dependencies=[Depends(oauth2_scheme)])
+app.include_router(file_browse_router)
+app.include_router(kg_router)
+app.include_router(nlp_router)
+app.include_router(task_router)
+app.include_router(labeling_router)
+app.include_router(data_import_router)
+app.include_router(data_export_router)
+app.include_router(dify_kb_router)
+
+def print_all_routes():
+    print("Listing all routes:")
+    for route in app.routes:
+        # 获取路径
+        path = route.path
+        # 获取方法列表
+        methods = ','.join([method for method in route.methods]) if hasattr(route, 'methods') else 'N/A'
+        # 获取名称(通常是处理函数的名字)
+        name = route.name
+        print(f"Path: {path}, Methods: {methods}, Name: {name}")
+
+        
+print_all_routes()
+# @app.middleware("http")
+# async def verify_http_token(request: Request, call_next):
+#     auth_error = HTTPException(
+#         status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+#         detail="Internal services error",
+#         headers={"WWW-Authenticate": "Bearer"},  # OAuth2的规范,如果认证失败,请求头中返回“WWW-Authenticate”
+#     )
+#     # 获取请求路径
+#     path: str = request.get('path')
+#     # 登录接口、docs文档依赖的接口,不做token校验
+#     if path.startswith('/api/token') | path.startswith('/api/docs') | path.startswith('/api/openapi') | path.startswith('/api/node-create'):
+#         response = await call_next(request)
+#         return response
+#     else:
+#         try:
+#             # 从header读取token
+#             authorization: str = request.headers.get('authorization')         
+                
+#             response = Response(
+#                     status_code=status.HTTP_401_UNAUTHORIZED,
+#                     content="Invalid authentication credentials",
+#                     headers={"WWW-Authenticate": "Bearer"},  # OAuth2的规范,如果认证失败,请求头中返回“WWW-Authenticate”
+#             )
+#             if not authorization:
+#                 print("request was not authorized")
+#                 return response
+#             # 拿到token值
+#             token = authorization.split(' ')[1]
+#             # 这个是我自己封装的校验token的逻辑,大家可以自己替换成自己的
+            
+#             if verify_token(token):
+#                 print("request token was correct")
+#                 response = await call_next(request)
+#                 return response
+            
+#             print("request token was incorrect")
+#             return response
+#         except Exception as e:
+#             print("http_middleware error: ", e)
+            
+#             return auth_error
+        
+        
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app='main:app', host="0.0.0.0", port=8000, reload=True)

+ 126 - 0
graph/mirge.py

@@ -0,0 +1,126 @@
+import neo4j
+import psycopg2
+from neo4j import GraphDatabase
+
+# Neo4j connection details
+NEO4J_URI = "bolt://localhost:7687"
+NEO4J_USER = "neo4j"
+NEO4J_PASSWORD = "p@ssw0rd"
+
+# PostgreSQL connection details
+POSTGRESQL_HOST = "127.0.0.1"
+POSTGRESQL_DATABASE = "postgres"
+POSTGRESQL_USER = "postgres"
+POSTGRESQL_PASSWORD = "p@ssw0rd"
+
+def connect_to_neo4j():
+    return GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
+
+def connect_to_postgresql():
+    return psycopg2.connect(
+        host=POSTGRESQL_HOST,
+        dbname=POSTGRESQL_DATABASE,
+        user=POSTGRESQL_USER,
+        password=POSTGRESQL_PASSWORD
+    )
+
+def upsert_node(pg_cursor, node):
+    # Insert or update a node in the kg_nodes table
+    pg_cursor.execute("""
+        INSERT INTO public.kg_nodes (id, name, category)
+        VALUES (DEFAULT, %s, %s) 
+        ON CONFLICT (name, category) DO UPDATE SET
+            name = EXCLUDED.name,
+            category = EXCLUDED.category
+        RETURNING id;
+    """, (node['name'], node['label']))
+    
+    node_id = pg_cursor.fetchone()[0]
+    
+    # Upsert properties into kg_props table
+    for key, value in node.items():
+        if key not in ['id', 'name', 'label']:
+            pg_cursor.execute("""
+                INSERT INTO public.kg_props (category, ref_id, prop_name, prop_value)
+                VALUES (%s, %s, %s, %s)
+                ON CONFLICT (ref_id, prop_name) DO UPDATE SET
+                    prop_value = EXCLUDED.prop_value;
+            """, (1, node_id, key, value))
+    
+    return node_id
+
+def upsert_edge(pg_cursor, edge, src_id, dest_id):
+    # Insert or update an edge in the kg_edges table
+    pg_cursor.execute("""
+        INSERT INTO public.kg_edges (id, category, src_id, dest_id, name)
+        VALUES (DEFAULT, %s, %s, %s, %s)
+        ON CONFLICT (src_id, dest_id, name) DO UPDATE SET
+            name = EXCLUDED.name,
+            category = EXCLUDED.category
+        RETURNING id;
+    """, (edge['type'], src_id, dest_id, edge['name']))
+    
+    edge_id = pg_cursor.fetchone()[0]
+    
+    # Upsert properties into kg_props table
+    for key, value in edge.items():
+        if key not in ['id', 'type', 'src_id', 'dest_id', 'name']:
+            pg_cursor.execute("""
+                INSERT INTO public.kg_props (category, ref_id, prop_name, prop_value)
+                VALUES (%s, %s, %s, %s)
+                ON CONFLICT (ref_id, prop_name) DO UPDATE SET
+                    prop_value = EXCLUDED.prop_value;
+            """, (2, edge_id, key, value))
+
+def migrate_data():
+    neo4j_driver = connect_to_neo4j()
+    pg_conn = connect_to_postgresql()
+    with pg_conn.cursor() as pg_cursor:
+        with neo4j_driver.session() as session:
+            # Fetch nodes and edges from Neo4j
+            print(">>> data fetching...")
+            result = session.run("""
+                MATCH (n:Disease|Symptom|Drug|Check|Department|Food|Producer)-[r]->(m:Disease|Symptom|Drug|Check|Department|Food|Producer)
+                RETURN n, labels(n) AS label, r, type(r) AS type, m, labels(m) AS label_m
+            """)
+            print(">>> data fetched")
+            # Process each record
+            nodes_dict = {}
+            for record in result:
+                print(">>> process record")
+                # Process source node
+                src_node_key = (record['n']['name'], tuple(sorted(record['label'])))
+                
+                print(">>> process record", src_node_key)
+                if src_node_key not in nodes_dict:
+                    nodes_dict[src_node_key] = upsert_node(pg_cursor, {
+                        'name': record['n']['name'],
+                        'label': next((lbl for lbl in record['label'] if lbl in ['Disease', 'Symptom', 'Drug', 'Check', 'Department', 'Food', 'Producer']), ''),
+                        **record['n']._properties
+                    })
+                
+                # Process destination node
+                dest_node_key = (record['m']['name'], tuple(sorted(record['label_m'])))
+                
+                print(">>> process record", dest_node_key)
+                if dest_node_key not in nodes_dict:
+                    nodes_dict[dest_node_key] = upsert_node(pg_cursor, {
+                        'name': record['m']['name'],
+                        'label': next((lbl for lbl in record['label_m'] if lbl in ['Disease', 'Symptom', 'Drug', 'Check', 'Department', 'Food', 'Producer']), ''),
+                        **record['m']._properties
+                    })
+
+                # Process edge
+                upsert_edge(pg_cursor, {
+                    'type': record['type'],
+                    'name': record['r']['name'] if 'name' in record['r']._properties else record['type'],
+                    **record['r']._properties
+                }, nodes_dict[src_node_key], nodes_dict[dest_node_key])
+
+        pg_conn.commit()
+
+    neo4j_driver.close()
+    pg_conn.close()
+
+if __name__ == "__main__":
+    migrate_data()

+ 0 - 0
graph/models/response.py


برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است