from fastapi import APIRouter, Depends, HTTPException, UploadFile from sqlalchemy.orm import Session from sqlalchemy.sql import select, or_, and_, func, distinct from db.schemas import KgTask, KgProj, KgProjCreate, KgTaskCreate, KgTaskUpdate from db.models import DbKgProj, DbKgTask from db.database import get_db from models.response import ResponseModel from utils.response import resp_200 from typing import List from datetime import datetime from math import ceil from config.site import FILE_STORAGE_PATH import os import json router = APIRouter() def create_task( proj_id, task_category, content: str,db:Session): print("create task") data = DbKgTask() data.proj_id = proj_id data.task_category = task_category data.task_log = "" data.task_content = content data.status = 0 data.created = datetime.now() data.updated = datetime.now() db.add(data) db.commit() db.refresh(data) return data # 标注数据文件上传 @router.post("/api/labeling-file-upload/{proj_id}") async def create_upload_file(proj_id: int, file: UploadFile, db: Session = Depends(get_db)): path = FILE_STORAGE_PATH + "/tasks_file" if not os.path.exists(path): os.makedirs(path) # 打印文件名称 new_filename = path + "/" + file.filename # 将上传的文件保存到服务本地 with open(f"{new_filename}", 'wb') as f: # 一次读取1024字节,循环读取写入 for chunk in iter(lambda: file.file.read(1024), b''): f.write(chunk) with open(f"{new_filename}", 'r', encoding="utf-8") as f: buf_str = "" for line in f.readlines(): line = line.strip() if len(buf_str) > 0: buf_str = buf_str + "\n" + line else: buf_str = buf_str + line while len(buf_str) > 256: chunk = buf_str[0:256] buf_str = buf_str[256:] if chunk[-1] != '\n' and chunk[-1] != "。": last_end1 = chunk.rfind("。") last_end2 = chunk.rfind("\n") if last_end1 == -1 and last_end2 == -1: last_end1 = len(chunk) #print("found end char: ", last_end1, last_end2) #print("*" * 60) if last_end2 > last_end1: last_end1 = last_end2 buf_str = chunk[last_end1 + 1:] + buf_str chunk = chunk[0:last_end1 + 1] #print(len(chunk),chunk) create_task(proj_id, "NLP", chunk, db) if len(buf_str) > 0: create_task(proj_id, "NLP", buf_str, db) return resp_200(data={"filename": file.filename}) labeling_router = router