Browse Source

开放平台知识库

yuchengwei 1 week ago
commit
ffbe7231c4
8 changed files with 2346 additions and 0 deletions
  1. 27 0
      config.py
  2. 509 0
      main.py
  3. 44 0
      models.py
  4. 10 0
      requirements.txt
  5. 1377 0
      static/redoc.standalone.js
  6. 2 0
      static/swagger-ui-bundle.js
  7. 3 0
      static/swagger-ui.css
  8. 374 0
      utils.py

+ 27 - 0
config.py

@@ -0,0 +1,27 @@
+from typing import Dict, Union, Optional
+from pydantic_settings import BaseSettings
+
+class Settings(BaseSettings):
+    # PostgreSQL配置
+    DATABASE_URL: str = "postgresql://knowledge:qwer1234.@173.18.12.203:5432/knowledge_base"
+    
+    # MinIO配置
+    MINIO_ENDPOINT: str = "173.18.12.199:9000"
+    MINIO_ACCESS_KEY: str = "yvhNcezRwQvPuUylHqrg"
+    MINIO_SECRET_KEY: str = "QQCejGeENpUIkGr4yfDaubPwWCoV29xHoXv6gHYU"
+    MINIO_BUCKET_NAME: str = "knowledge-base"
+    MINIO_SECURE: bool = False
+    
+    # 文件上传配置
+    MAX_FILE_COUNT: int = 100
+    ALLOWED_EXTENSIONS: Dict[str, Dict[str, Union[int, Optional[int]]]] = {
+        "doc": {"max_size": 50, "max_pages": 1000},
+        "txt": {"max_size": 10, "max_pages": None},
+        "docx": {"max_size": 50, "max_pages": 1000},
+        "pdf": {"max_size": 500, "max_pages": 3000},
+        "ppt": {"max_size": 50, "max_pages": 1000},
+        "pptx": {"max_size": 50, "max_pages": 1000},
+        "md": {"max_size": 10, "max_pages": None}
+    }
+
+settings = Settings()

+ 509 - 0
main.py

@@ -0,0 +1,509 @@
+import os
+import io
+import logging
+import urllib.parse
+import time
+import glob
+import shutil
+import subprocess
+from typing import List, Optional
+from datetime import datetime
+from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, Form
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.openapi.docs import (
+    get_redoc_html,
+    get_swagger_ui_html,
+    get_swagger_ui_oauth2_redirect_html,
+)
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session, sessionmaker
+from sqlalchemy.ext.declarative import declarative_base
+from pydantic import BaseModel, ConfigDict, Field, field_serializer
+from models import Base, KnowledgeBase, KnowledgeFile
+from utils import DatabaseUtils, MinioUtils, FileUtils
+from config import settings
+
+# 响应模型
+class ResponseModel(BaseModel):
+    code: int
+    message: str
+    data: Optional[dict | list | bool | None]
+
+class KnowledgeBaseResponse(BaseModel):
+    model_config = ConfigDict(from_attributes=True)
+    
+    id: int
+    name: str
+    description: Optional[str] = None
+    tags: Optional[str] = None
+    creator: Optional[str] = None
+    file_count: int = 0
+    created_at: datetime = Field(default_factory=datetime.utcnow)
+    updated_at: datetime = Field(default_factory=datetime.utcnow)
+
+    @field_serializer('created_at', 'updated_at')
+    def serialize_datetime(self, dt: datetime) -> str:
+        return dt.strftime('%Y-%m-%d')
+
+class KnowledgeFileResponse(BaseModel):
+    model_config = ConfigDict(from_attributes=True)
+    
+    id: int
+    knowledge_base_id: int
+    file_name: str
+    file_size: float
+    file_type: str
+    minio_url: str
+    version: Optional[str] = None
+    author: Optional[str] = None
+    year: Optional[int] = None
+    page_count: Optional[int] = None
+    creator: Optional[str] = None
+    knowledge_type: Optional[str] = None
+    created_at: datetime = Field(default_factory=datetime.utcnow)
+    updated_at: datetime = Field(default_factory=datetime.utcnow)
+
+    @field_serializer('created_at', 'updated_at')
+    def serialize_datetime(self, dt: datetime) -> str:
+        return dt.strftime('%Y-%m-%d %H:%M')
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# 创建数据库引擎
+engine = create_engine(settings.DATABASE_URL)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+# 创建数据库表
+Base.metadata.create_all(bind=engine)
+
+# 初始化FastAPI应用
+app = FastAPI(
+    title="知识库管理系统",
+    description="知识库文档管理系统API",
+    version="1.0.0",
+    root_path="/open-platform",
+    docs_url=None, 
+    redoc_url=None
+)
+
+# 配置CORS中间件
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # 在生产环境中应该设置具体的域名
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.mount("/static", StaticFiles(directory="static"), name="static")
+
+
+@app.get("/docs", include_in_schema=False)
+async def custom_swagger_ui_html():
+    return get_swagger_ui_html(
+        openapi_url=app.openapi_url,
+        title=app.title + " - Swagger UI",
+        oauth2_redirect_url=app.swagger_ui_oauth2_redirect_url,
+        swagger_js_url="/open-platform/static/swagger-ui-bundle.js",
+        swagger_css_url="/open-platform/static/swagger-ui.css",
+    )
+
+
+@app.get(app.swagger_ui_oauth2_redirect_url, include_in_schema=False)
+async def swagger_ui_redirect():
+    return get_swagger_ui_oauth2_redirect_html()
+
+
+@app.get("/redoc", include_in_schema=False)
+async def redoc_html():
+    return get_redoc_html(
+        openapi_url=app.openapi_url,
+        title=app.title + " - ReDoc",
+        redoc_js_url="/open-platform/static/redoc.standalone.js",
+    )
+
+# 初始化MinIO工具类
+minio_utils = MinioUtils()
+
+# 全局异常处理
+@app.exception_handler(Exception)
+async def global_exception_handler(request, exc):
+    logger.error(f"全局异常: {exc}", exc_info=True)
+    return {
+        "code": 500,
+        "message": "服务器内部错误",
+        "data": None
+    }
+
+# 依赖项:获取数据库会话
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+# 请求模型
+class KnowledgeBaseCreate(BaseModel):
+    name: str
+    description: Optional[str] = None
+    tags: Optional[str] = None
+
+class KnowledgeBaseUpdate(BaseModel):
+    name: str
+    description: Optional[str] = None
+    tags: Optional[str] = None
+
+class FileUpdate(BaseModel):
+    id: int
+    file_name: Optional[str] = None
+    version: Optional[str] = None
+    author: Optional[str] = None
+    year: Optional[int] = None
+    page_count: Optional[int] = None
+    creator: Optional[str] = None
+    knowledge_type: Optional[str] = None
+
+class BatchFileUpdate(BaseModel):
+    files: List[FileUpdate]
+
+# 使用utils.py中的FileUtils类进行文件转换
+
+@app.post("/knowledge-base/", response_model=ResponseModel)
+def create_knowledge_base(kb: KnowledgeBaseCreate, db: Session = Depends(get_db)):
+    kb_data = DatabaseUtils.create_knowledge_base(db, kb.name, kb.description, kb.tags)
+    return ResponseModel(
+        code=200,
+        message="创建成功",
+        data=KnowledgeBaseResponse.model_validate(kb_data).model_dump()
+    )
+
+@app.put("/knowledge-base/{kb_id}", response_model=ResponseModel)
+def update_knowledge_base(kb_id: int, kb: KnowledgeBaseUpdate, db: Session = Depends(get_db)):
+    kb_data = DatabaseUtils.update_knowledge_base(db, kb_id, kb.name, kb.description, kb.tags)
+    return ResponseModel(
+        code=200,
+        message="更新成功",
+        data=KnowledgeBaseResponse.model_validate(kb_data).model_dump()
+    )
+
+@app.delete("/knowledge-base/{kb_id}", response_model=ResponseModel)
+def delete_knowledge_base(kb_id: int, db: Session = Depends(get_db)):
+    result = DatabaseUtils.delete_knowledge_base(db, kb_id)
+    return ResponseModel(
+        code=200,
+        message="删除成功",
+        data=result
+    )
+
+@app.get("/knowledge-base/{kb_id}", response_model=ResponseModel)
+def get_knowledge_base(kb_id: int, db: Session = Depends(get_db)):
+    kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id, KnowledgeBase.is_deleted == 0).first()
+    if not kb:
+        raise HTTPException(status_code=404, detail="知识库不存在")
+    return ResponseModel(
+        code=200,
+        message="查询成功",
+        data=KnowledgeBaseResponse.model_validate(kb).model_dump()
+    )
+
+@app.get("/knowledge-base/", response_model=ResponseModel)
+def list_knowledge_bases(pageNo: int = 1, pageSize: int = 10, name: Optional[str] = None, db: Session = Depends(get_db)):
+    if pageNo < 1:
+        raise HTTPException(status_code=400, detail="页码必须大于等于1")
+    if pageSize < 1:
+        raise HTTPException(status_code=400, detail="每页条数必须大于等于1")
+    skip = (pageNo - 1) * pageSize
+    kb_list, total = DatabaseUtils.get_knowledge_bases(db, skip, pageSize, name)
+    return ResponseModel(
+        code=200,
+        message="查询成功",
+        data={
+            "list": [KnowledgeBaseResponse.model_validate(kb).model_dump() for kb in kb_list],
+            "total": total
+        }
+    )
+
+@app.get("/knowledge-base/name/{name}", response_model=ResponseModel)
+def get_knowledge_base_by_name(name: str, db: Session = Depends(get_db)):
+    kb = DatabaseUtils.get_knowledge_base_by_name(db, name)
+    if not kb:
+        raise HTTPException(status_code=404, detail="知识库不存在")
+    return ResponseModel(
+        code=200,
+        message="查询成功",
+        data=KnowledgeBaseResponse.model_validate(kb).model_dump()
+    )
+
+@app.post("/knowledge-base/{kb_id}/files/", response_model=ResponseModel)
+async def upload_files(
+    kb_id: int,
+    files: List[UploadFile] = File(...),
+    db: Session = Depends(get_db)
+):
+    # 验证知识库是否存在
+    kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id, KnowledgeBase.is_deleted == 0).first()
+    if not kb:
+        raise HTTPException(status_code=404, detail="知识库不存在")
+
+    # 验证文件数量
+    if len(files) > settings.MAX_FILE_COUNT:
+        raise HTTPException(status_code=400, detail=f"单次上传文件数量不能超过{settings.MAX_FILE_COUNT}个")
+
+    # 导入所需模块
+    import tempfile
+
+    uploaded_files = []
+    for file in files:
+        # 获取文件扩展名
+        file_ext = os.path.splitext(file.filename)[1].lower().lstrip('.')
+        original_filename = file.filename
+        converted_content = None
+        
+        # 读取文件内容
+        content = await file.read()
+        
+        # 处理需要转换的文件格式
+        if file_ext in ["doc", "ppt"]:
+            # 创建临时目录用于文件转换
+            with tempfile.TemporaryDirectory() as temp_dir:
+                # 创建临时文件
+                temp_input_path = os.path.join(temp_dir, original_filename)
+                with open(temp_input_path, "wb") as temp_file:
+                    temp_file.write(content)
+                
+                # 确定目标格式
+                target_format = "docx" if file_ext == "doc" else "pptx"
+                
+                # 使用FileUtils中的文件转换方法
+                converted_file_path = FileUtils.convert_office_file(temp_input_path, temp_dir, target_format)
+                
+                if converted_file_path and os.path.exists(converted_file_path):
+                    # 读取转换后的文件内容
+                    with open(converted_file_path, "rb") as converted_file:
+                        converted_content = converted_file.read()
+                    
+                    # 更新文件名和扩展名
+                    file_ext = target_format
+                    file.filename = os.path.splitext(original_filename)[0] + f".{target_format}"
+                else:
+                    # 转换失败,使用原始文件
+                    raise HTTPException(status_code=500, detail=f"文件格式转换失败: {original_filename}")
+        
+        # 检查文件格式是否支持
+        if file_ext not in settings.ALLOWED_EXTENSIONS:
+            raise HTTPException(status_code=400, detail=f"不支持的文件格式:{file_ext}")
+
+        # 使用转换后的内容或原始内容
+        file_content = converted_content if converted_content else content
+        file_size = len(file_content) / (1024 * 1024)  # 转换为MB
+
+        # 验证文件大小
+        max_size = settings.ALLOWED_EXTENSIONS[file_ext]["max_size"]
+        if file_size > max_size:
+            raise HTTPException(status_code=400, detail=f"{original_filename}超过最大允许大小{max_size}MB")
+
+        # 上传到MinIO
+        minio_url = minio_utils.upload_file(file_content, file.filename, file.content_type)
+
+        # 从文件名识别知识类型
+        knowledge_type = None
+        if '指南' in file.filename:
+            knowledge_type = '指南'
+        elif '教材' in file.filename:
+            knowledge_type = '教材'
+
+        # 创建文件记录
+        db_file = KnowledgeFile(
+            knowledge_base_id=kb_id,
+            file_name=file.filename,
+            file_size=file_size,
+            file_type=file_ext,
+            minio_url=minio_url,
+            creator=kb.creator,  # 使用知识库的创建人作为文件创建人
+            knowledge_type=knowledge_type
+        )
+        db.add(db_file)
+        uploaded_files.append(db_file)
+        # 更新知识库文件计数
+        DatabaseUtils.increment_file_count(db, kb_id)
+
+    db.commit()
+    return ResponseModel(
+        code=200,
+        message="上传成功",
+        data=[KnowledgeFileResponse.model_validate(file).model_dump() for file in uploaded_files]
+    )
+
+@app.get("/knowledge-base/{kb_id}/files/", response_model=ResponseModel)
+def list_files(kb_id: int, pageNo: int = 1, pageSize: int = 10, file_name: Optional[str] = None, db: Session = Depends(get_db)):
+    if pageNo < 1:
+        raise HTTPException(status_code=400, detail="页码必须大于等于1")
+    if pageSize < 1:
+        raise HTTPException(status_code=400, detail="每页条数必须大于等于1")
+    
+    skip = (pageNo - 1) * pageSize
+    query = db.query(KnowledgeFile).filter(
+        KnowledgeFile.knowledge_base_id == kb_id,
+        KnowledgeFile.is_deleted == 0
+    )
+    
+    if file_name:
+        query = query.filter(KnowledgeFile.file_name.ilike(f"%{file_name}%"))
+    
+    total = query.count()
+    files = query.offset(skip).limit(pageSize).all()
+    
+    return ResponseModel(
+        code=200,
+        message="查询成功",
+        data={
+            "list": [KnowledgeFileResponse.model_validate(file).model_dump() for file in files],
+            "total": total
+        }
+    )
+
+@app.get("/knowledge-base/{kb_id}/files/search/", response_model=ResponseModel)
+def search_files(kb_id: int, file_name: str, db: Session = Depends(get_db)):
+    files = db.query(KnowledgeFile).filter(
+        KnowledgeFile.knowledge_base_id == kb_id,
+        KnowledgeFile.file_name.ilike(f"%{file_name}%"),
+        KnowledgeFile.is_deleted == 0
+    ).all()
+    return ResponseModel(
+        code=200,
+        message="查询成功",
+        data=[KnowledgeFileResponse.model_validate(file).model_dump() for file in files]
+    )
+
+@app.get("/files/{file_id}/download")
+def download_file(file_id: int, db: Session = Depends(get_db)):
+    # 获取文件信息
+    file = db.query(KnowledgeFile).filter(
+        KnowledgeFile.id == file_id,
+        KnowledgeFile.is_deleted == 0
+    ).first()
+    if not file:
+        raise HTTPException(status_code=404, detail="文件不存在")
+
+    # 从MinIO下载文件
+    object_name = file.minio_url.split("/")[-1]
+    file_content = minio_utils.download_file(object_name)
+
+    # 创建文件流
+    file_stream = io.BytesIO(file_content)
+    
+    # 对文件名进行URL编码
+    encoded_filename = urllib.parse.quote(file.file_name)
+    
+    return StreamingResponse(
+        file_stream,
+        media_type="application/octet-stream",
+        headers={
+            "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
+        }
+    )
+
+@app.delete("/files/{file_id}", response_model=dict)
+def delete_file(file_id: int, db: Session = Depends(get_db)):
+    # 获取文件信息
+    file = db.query(KnowledgeFile).filter(
+        KnowledgeFile.id == file_id,
+        KnowledgeFile.is_deleted == 0
+    ).first()
+    if not file:
+        raise HTTPException(status_code=404, detail="文件不存在")
+
+    # 从MinIO删除文件
+    object_name = file.minio_url.split("/")[-1]
+    minio_utils.delete_file(object_name)
+
+    # 标记文件为已删除
+    file.is_deleted = 1
+    file.updated_at = datetime.utcnow()
+    # 更新知识库文件计数
+    DatabaseUtils.decrement_file_count(db, file.knowledge_base_id)
+    db.commit()
+
+    return {
+        "code": 200,
+        "message": "删除成功",
+        "data": True
+    }
+
+@app.put("/files/batch-update", response_model=ResponseModel)
+def batch_update_files(update_data: BatchFileUpdate, db: Session = Depends(get_db)):
+    updated_files = []
+    for file_update in update_data.files:
+        # 获取文件信息
+        file = db.query(KnowledgeFile).filter(
+            KnowledgeFile.id == file_update.id,
+            KnowledgeFile.is_deleted == 0
+        ).first()
+        if not file:
+            raise HTTPException(status_code=404, detail=f"文件ID {file_update.id} 不存在")
+
+        # 如果需要更新文件名,同时更新MinIO中的文件
+        if file_update.file_name and file_update.file_name != file.file_name:
+            old_object_name = file.minio_url.split("/")[-1]
+            new_object_name = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}_{file_update.file_name}"
+            
+            # 从MinIO下载文件
+            file_content = minio_utils.download_file(old_object_name)
+            
+            # 上传到MinIO新的位置
+            new_minio_url = minio_utils.upload_file(
+                file_content,
+                file_update.file_name,
+                file.file_type
+            )
+            
+            # 删除旧文件
+            minio_utils.delete_file(old_object_name)
+            
+            # 更新数据库中的文件名和MinIO URL
+            file.file_name = file_update.file_name
+            file.minio_url = new_minio_url
+
+        # 更新其他字段
+        if file_update.version is not None:
+            file.version = file_update.version
+        if file_update.author is not None:
+            file.author = file_update.author
+        if file_update.year is not None:
+            file.year = file_update.year
+        if file_update.page_count is not None:
+            file.page_count = file_update.page_count
+        if file_update.creator is not None:
+            file.creator = file_update.creator
+        if file_update.knowledge_type is not None:
+            file.knowledge_type = file_update.knowledge_type
+
+        file.updated_at = datetime.utcnow()
+        updated_files.append(file)
+
+    db.commit()
+    return ResponseModel(
+        code=200,
+        message="更新成功",
+        data=[KnowledgeFileResponse.model_validate(file).model_dump() for file in updated_files]
+    )
+
+if __name__ == "__main__":
+    import uvicorn
+    # 配置uvicorn服务器
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=8003,
+        reload=True,  # 开发模式下启用热重载
+        workers=4,    # 生产环境可根据CPU核心数调整
+        log_level="info"
+    )

+ 44 - 0
models.py

@@ -0,0 +1,44 @@
+from datetime import datetime
+from typing import List
+from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, ForeignKey, Float
+from sqlalchemy.orm import relationship, declarative_base
+
+Base = declarative_base()
+
+class KnowledgeBase(Base):
+    __tablename__ = 'knowledge_base'
+
+    id = Column(Integer, primary_key=True, index=True)
+    name = Column(String(50), unique=True, index=True, nullable=False)
+    description = Column(Text, nullable=True)
+    tags = Column(String(200), nullable=True)
+    creator = Column(String(100), nullable=True)
+    file_count = Column(Integer, default=0)  # 文件数量
+    is_deleted = Column(Integer, default=0)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    # 建立与KnowledgeFile的一对多关系
+    files = relationship('KnowledgeFile', back_populates='knowledge_base')
+
+class KnowledgeFile(Base):
+    __tablename__ = 'knowledge_file'
+
+    id = Column(Integer, primary_key=True, index=True)
+    knowledge_base_id = Column(Integer, ForeignKey('knowledge_base.id'), nullable=False)
+    file_name = Column(String(255), nullable=False)
+    file_size = Column(Float, nullable=False)  # 文件大小(MB)
+    file_type = Column(String(10), nullable=False)  # 文件扩展名
+    minio_url = Column(String(500), nullable=False)  # MinIO存储路径
+    version = Column(String(50), nullable=True)
+    author = Column(String(100), nullable=True)
+    year = Column(Integer, nullable=True)
+    page_count = Column(Integer, nullable=True)  # 文档页数
+    creator = Column(String(100), nullable=True)  # 创建人
+    knowledge_type = Column(String(50), nullable=True)  # 知识类型
+    is_deleted = Column(Integer, default=0)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    # 建立与KnowledgeBase的多对一关系
+    knowledge_base = relationship('KnowledgeBase', back_populates='files')

+ 10 - 0
requirements.txt

@@ -0,0 +1,10 @@
+fastapi==0.115.12
+uvicorn==0.34.0
+SQLAlchemy==2.0.30
+psycopg2-binary==2.9.10
+pydantic==2.11.1
+pydantic-settings==2.8.1
+python-multipart==0.0.20
+minio==7.1.1
+python-jose==3.3.0
+python-dotenv==1.0.1

File diff suppressed because it is too large
+ 1377 - 0
static/redoc.standalone.js


File diff suppressed because it is too large
+ 2 - 0
static/swagger-ui-bundle.js


File diff suppressed because it is too large
+ 3 - 0
static/swagger-ui.css


+ 374 - 0
utils.py

@@ -0,0 +1,374 @@
+import re
+import io
+import os
+import time
+import glob
+import shutil
+import logging
+import subprocess
+from datetime import datetime
+from typing import List, Optional
+from minio import Minio
+import urllib3
+from sqlalchemy.orm import Session
+from fastapi import HTTPException
+from models import KnowledgeBase, KnowledgeFile
+from config import settings
+
+# 配置Office文件转换日志
+office_logger = logging.getLogger('office_conversion')
+office_logger.setLevel(logging.INFO)
+if not office_logger.handlers:
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    handler.setFormatter(formatter)
+    office_logger.addHandler(handler)
+
+class DatabaseUtils:
+    @staticmethod
+    def validate_knowledge_base_name(name: str) -> bool:
+        pattern = r'^[a-zA-Z0-9\u4e00-\u9fa5_\-\.]+$'
+        return bool(re.match(pattern, name))
+
+    @staticmethod
+    def create_knowledge_base(db: Session, name: str, description: Optional[str] = None, tags: Optional[str] = None) -> KnowledgeBase:
+        if not DatabaseUtils.validate_knowledge_base_name(name):
+            raise HTTPException(status_code=400, detail="知识库名称格式不正确")
+        
+        if description and len(description) > 400:
+            raise HTTPException(status_code=400, detail="知识库备注不能超过400字")
+        
+        db_kb = KnowledgeBase(name=name, description=description, tags=tags, file_count=0)
+        db.add(db_kb)
+        db.commit()
+        db.refresh(db_kb)
+        return db_kb
+
+    @staticmethod
+    def update_knowledge_base(db: Session, kb_id: int, name: str, description: Optional[str] = None, tags: Optional[str] = None) -> KnowledgeBase:
+        db_kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id, KnowledgeBase.is_deleted == 0).first()
+        if not db_kb:
+            raise HTTPException(status_code=404, detail="知识库不存在")
+        
+        if name and not DatabaseUtils.validate_knowledge_base_name(name):
+            raise HTTPException(status_code=400, detail="知识库名称格式不正确")
+        
+        if description and len(description) > 400:
+            raise HTTPException(status_code=400, detail="知识库备注不能超过400字")
+        
+        db_kb.name = name
+        db_kb.description = description
+        db_kb.tags = tags
+        db_kb.updated_at = datetime.utcnow()
+        
+        db.commit()
+        db.refresh(db_kb)
+        return db_kb
+
+    @staticmethod
+    def delete_knowledge_base(db: Session, kb_id: int) -> bool:
+        db_kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id, KnowledgeBase.is_deleted == 0).first()
+        if not db_kb:
+            raise HTTPException(status_code=404, detail="知识库不存在")
+        
+        # 删除知识库时将文件计数清零
+        db_kb.file_count = 0
+        db_kb.is_deleted = 1
+        db_kb.updated_at = datetime.utcnow()
+        db.commit()
+        return True
+
+    @staticmethod
+    def get_knowledge_bases(db: Session, skip: int = 0, limit: int = 10, name: Optional[str] = None) -> tuple[List[KnowledgeBase], int]:
+        query = db.query(KnowledgeBase).filter(KnowledgeBase.is_deleted == 0)
+        if name:
+            query = query.filter(KnowledgeBase.name.ilike(f"%{name}%"))
+        total = query.count()
+        knowledge_bases = query.offset(skip).limit(limit).all()
+        return knowledge_bases, total
+
+    @staticmethod
+    def get_knowledge_base_by_name(db: Session, name: str) -> Optional[KnowledgeBase]:
+        return db.query(KnowledgeBase).filter(KnowledgeBase.name == name, KnowledgeBase.is_deleted == 0).first()
+
+    @staticmethod
+    def increment_file_count(db: Session, kb_id: int) -> None:
+        db_kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id, KnowledgeBase.is_deleted == 0).first()
+        if db_kb:
+            db_kb.file_count += 1
+            db.commit()
+
+    @staticmethod
+    def decrement_file_count(db: Session, kb_id: int) -> None:
+        db_kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id, KnowledgeBase.is_deleted == 0).first()
+        if db_kb and db_kb.file_count > 0:
+            db_kb.file_count -= 1
+            db.commit()
+
+class MinioUtils:
+    def __init__(self):
+        self.client = Minio(
+            settings.MINIO_ENDPOINT,
+            access_key=settings.MINIO_ACCESS_KEY,
+            secret_key=settings.MINIO_SECRET_KEY,
+            secure=settings.MINIO_SECURE,
+            http_client=urllib3.PoolManager(
+                timeout=urllib3.Timeout(connect=10, read=60),
+                maxsize=50,
+                retries=urllib3.Retry(
+                    total=5,
+                    backoff_factor=0.5,
+                    status_forcelist=[500, 502, 503, 504]
+                )
+            )
+        )
+        self._ensure_bucket_exists()
+
+    def _ensure_bucket_exists(self):
+        if not self.client.bucket_exists(settings.MINIO_BUCKET_NAME):
+            self.client.make_bucket(settings.MINIO_BUCKET_NAME)
+
+    def upload_file(self, file_data: bytes, file_name: str, content_type: str, part_size: int = 15 * 1024 * 1024) -> str:
+        import tempfile
+        import os
+        
+        object_name = file_name
+
+        try:
+            # 创建临时文件
+            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                temp_file.write(file_data)
+                temp_file_path = temp_file.name
+
+            # 使用fput_object进行上传,内部已实现分片上传
+            self.client.fput_object(
+                bucket_name=settings.MINIO_BUCKET_NAME,
+                object_name=object_name,
+                file_path=temp_file_path,
+                content_type=content_type,
+                part_size=part_size  # 使用更大的分片大小,提高上传效率
+            )
+
+            return f"http://{settings.MINIO_ENDPOINT}/{settings.MINIO_BUCKET_NAME}/{object_name}"
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"文件上传失败: {str(e)}")
+        finally:
+            # 清理临时文件
+            try:
+                os.unlink(temp_file_path)
+            except:
+                pass
+
+    def download_file(self, object_name: str) -> bytes:
+        try:
+            response = self.client.get_object(settings.MINIO_BUCKET_NAME, object_name)
+            return response.read()
+        finally:
+            response.close()
+            response.release_conn()
+
+    def delete_file(self, object_name: str) -> bool:
+        try:
+            self.client.remove_object(settings.MINIO_BUCKET_NAME, object_name)
+            return True
+        except:
+            return False
+
+class FileUtils:
+    @staticmethod
+    def convert_office_file(input_path, output_dir, target_format):
+        """使用LibreOffice转换Office文件格式
+        
+        Args:
+            input_path (str): 输入文件路径
+            output_dir (str): 输出目录
+            target_format (str): 目标格式,如docx、pptx等
+            
+        Returns:
+            str: 转换后的文件路径,转换失败则返回None
+        """
+        # 检查输入文件是否存在
+        if not os.path.exists(input_path):
+            office_logger.error(f"输入文件不存在: {input_path}")
+            return None
+            
+        # 检查输出目录是否存在,不存在则创建
+        if not os.path.exists(output_dir):
+            try:
+                os.makedirs(output_dir)
+                office_logger.info(f"创建输出目录: {output_dir}")
+            except OSError as e:
+                office_logger.error(f"创建输出目录失败: {e}")
+                return None
+        
+        # 检查输出目录权限
+        if not os.access(output_dir, os.W_OK):
+            office_logger.error(f"输出目录没有写入权限: {output_dir}")
+            return None
+        
+        # 检查LibreOffice是否安装
+        libreoffice_cmd = "soffice"  # Linux/macOS
+        if os.name == 'nt':  # Windows
+            libreoffice_cmd = r"C:\Program Files\LibreOffice\program\soffice.exe"
+        
+        # 检查LibreOffice命令是否可用
+        try:
+            version_cmd = [libreoffice_cmd, "--version"]
+            version_result = subprocess.run(version_cmd, check=True, capture_output=True, text=True)
+            office_logger.info(f"LibreOffice版本: {version_result.stdout.strip()}")
+        except (subprocess.SubprocessError, FileNotFoundError) as e:
+            office_logger.error(f"LibreOffice未安装或不可用: {e}")
+            return None
+        
+        # 获取输入文件的文件名(不含路径和扩展名)
+        filename = os.path.basename(input_path)
+        base_name = os.path.splitext(filename)[0]
+        input_ext = os.path.splitext(filename)[1][1:].lower()
+        office_logger.info(f"原始文件名: {filename}, 基本名称: {base_name}, 扩展名: {input_ext}")
+        
+        # 如果输入文件扩展名与目标格式相同,直接复制文件
+        if input_ext == target_format.lower():
+            office_logger.info(f"输入文件已经是目标格式,直接复制文件")
+            final_output_path = os.path.join(output_dir, f"{base_name}.{target_format}")
+            try:
+                shutil.copy2(input_path, final_output_path)
+                office_logger.info(f"复制文件到最终位置: {final_output_path}")
+                return final_output_path
+            except (shutil.Error, IOError) as e:
+                office_logger.error(f"复制文件失败: {e}")
+                return None
+        
+        # 创建临时工作目录,避免中文路径问题
+        temp_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), f"temp_convert_{int(time.time())}")
+        try:
+            os.makedirs(temp_dir)
+            office_logger.info(f"创建临时工作目录: {temp_dir}")
+        except OSError as e:
+            office_logger.error(f"创建临时工作目录失败: {e}")
+            return None
+        
+        # 复制原文件到临时目录,使用英文文件名
+        temp_input_file = os.path.join(temp_dir, f"input.{input_ext}")
+        try:
+            shutil.copy2(input_path, temp_input_file)
+            office_logger.info(f"复制文件到临时目录: {temp_input_file}")
+        except (shutil.Error, IOError) as e:
+            office_logger.error(f"复制文件失败: {e}")
+            shutil.rmtree(temp_dir, ignore_errors=True)
+            return None
+        
+        # 记录转换前输出目录中的文件
+        before_files = set(os.listdir(temp_dir))
+        office_logger.debug(f"转换前临时目录内容: {before_files}")
+        
+        # 构建转换命令
+        cmd = [
+            libreoffice_cmd,
+            "--headless"
+        ]
+        
+        # 根据文件类型选择合适的转换参数
+        if input_ext == 'doc' and target_format.lower() == 'docx':
+            cmd.extend(["--convert-to", "docx:MS Word 2007 XML"])
+        elif input_ext == 'ppt' and target_format.lower() == 'pptx':
+            cmd.extend(["--convert-to", "pptx:Impress MS PowerPoint 2007 XML"])
+        else:
+            cmd.extend(["--convert-to", target_format])
+        
+        # 添加输出目录和输入文件
+        cmd.extend(["--outdir", temp_dir, temp_input_file])
+        
+        office_logger.info(f"开始转换文件: {temp_input_file} -> {target_format}")
+        office_logger.info(f"执行命令: {' '.join(cmd)}")
+        
+        # 切换到临时目录执行命令,避免路径问题
+        current_dir = os.getcwd()
+        os.chdir(temp_dir)
+        
+        try:
+            result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+            office_logger.info(f"转换命令输出: {result.stdout}")
+            if result.stderr:
+                office_logger.warning(f"转换命令错误输出: {result.stderr}")
+            
+            # 切回原目录
+            os.chdir(current_dir)
+            
+            # 等待一小段时间确保文件写入完成
+            time.sleep(1)
+            
+            # 记录转换后输出目录中的文件
+            after_files = set(os.listdir(temp_dir))
+            office_logger.debug(f"转换后临时目录内容: {after_files}")
+            
+            # 找出新增的文件
+            new_files = after_files - before_files
+            office_logger.info(f"新增文件: {new_files}")
+            
+            # 预期的输出文件名
+            expected_output_filename = f"input.{target_format}"
+            
+            # 预期的输出文件路径(在临时目录中)
+            expected_output_path = os.path.join(temp_dir, expected_output_filename)
+            
+            # 最终的输出文件路径(在目标目录中)
+            final_output_path = os.path.join(output_dir, f"{base_name}.{target_format}")
+            
+            # 检查预期的输出文件是否存在
+            if os.path.exists(expected_output_path):
+                # 复制转换后的文件到最终目标位置
+                try:
+                    shutil.copy2(expected_output_path, final_output_path)
+                    office_logger.info(f"复制转换后的文件到最终位置: {final_output_path}")
+                    # 清理临时目录
+                    shutil.rmtree(temp_dir, ignore_errors=True)
+                    return final_output_path
+                except (shutil.Error, IOError) as e:
+                    office_logger.error(f"复制转换后的文件失败: {e}")
+            elif new_files:
+                # 如果有新文件生成,使用第一个新文件
+                new_file_path = os.path.join(temp_dir, list(new_files)[0])
+                try:
+                    shutil.copy2(new_file_path, final_output_path)
+                    office_logger.info(f"复制新生成的文件到最终位置: {final_output_path}")
+                    # 清理临时目录
+                    shutil.rmtree(temp_dir, ignore_errors=True)
+                    return final_output_path
+                except (shutil.Error, IOError) as e:
+                    office_logger.error(f"复制新生成的文件失败: {e}")
+            else:
+                # 尝试在临时目录中查找匹配的文件
+                pattern = os.path.join(temp_dir, f"*.{target_format}")
+                matching_files = glob.glob(pattern)
+                office_logger.info(f"匹配的文件列表: {matching_files}")
+                
+                if matching_files:
+                    # 按修改时间排序,获取最新的文件
+                    newest_file = max(matching_files, key=os.path.getmtime)
+                    try:
+                        shutil.copy2(newest_file, final_output_path)
+                        office_logger.info(f"复制匹配的文件到最终位置: {final_output_path}")
+                        # 清理临时目录
+                        shutil.rmtree(temp_dir, ignore_errors=True)
+                        return final_output_path
+                    except (shutil.Error, IOError) as e:
+                        office_logger.error(f"复制匹配的文件失败: {e}")
+            
+            # 如果所有尝试都失败,清理临时目录并返回None
+            office_logger.error(f"转换后的文件不存在或无法复制")
+            shutil.rmtree(temp_dir, ignore_errors=True)
+            return None
+        except subprocess.CalledProcessError as e:
+            # 切回原目录
+            os.chdir(current_dir)
+            office_logger.error(f"转换失败: {e.stderr if hasattr(e, 'stderr') else str(e)}")
+            # 清理临时目录
+            shutil.rmtree(temp_dir, ignore_errors=True)
+            return None
+        except Exception as e:
+            # 切回原目录
+            os.chdir(current_dir)
+            office_logger.error(f"转换过程中发生未知错误: {str(e)}")
+            # 清理临时目录
+            shutil.rmtree(temp_dir, ignore_errors=True)
+            return None