123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- from fastapi import APIRouter, Depends, HTTPException, UploadFile
- from sqlalchemy.orm import Session
- from sqlalchemy.sql import select, or_, and_, func, distinct
- from db.schemas import KgTask, KgProj, KgProjCreate, KgTaskCreate, KgTaskUpdate
- from db.models import DbKgProj, DbKgTask
- from db.database import get_db
- from models.response import ResponseModel
- from utils.response import resp_200
- from typing import List
- from datetime import datetime
- from math import ceil
- from config.site import FILE_STORAGE_PATH
- import os
- import json
- router = APIRouter()
- def create_task( proj_id, task_category, content: str,db:Session):
- print("create task")
- data = DbKgTask()
- data.proj_id = proj_id
- data.task_category = task_category
- data.task_log = ""
- data.task_content = content
- data.status = 0
- data.created = datetime.now()
- data.updated = datetime.now()
- db.add(data)
- db.commit()
- db.refresh(data)
- return data
- # 标注数据文件上传
- @router.post("/api/labeling-file-upload/{proj_id}")
- async def create_upload_file(proj_id: int, file: UploadFile, db: Session = Depends(get_db)):
- path = FILE_STORAGE_PATH + "/tasks_file"
- if not os.path.exists(path):
- os.makedirs(path)
- # 打印文件名称
- new_filename = path + "/" + file.filename
- # 将上传的文件保存到服务本地
- with open(f"{new_filename}", 'wb') as f:
- # 一次读取1024字节,循环读取写入
- for chunk in iter(lambda: file.file.read(1024), b''):
- f.write(chunk)
-
- with open(f"{new_filename}", 'r', encoding="utf-8") as f:
- buf_str = ""
- for line in f.readlines():
- line = line.strip()
-
- if len(buf_str) > 0:
- buf_str = buf_str + "\n" + line
- else:
- buf_str = buf_str + line
- while len(buf_str) > 256:
- chunk = buf_str[0:256]
- buf_str = buf_str[256:]
-
- if chunk[-1] != '\n' and chunk[-1] != "。":
- last_end1 = chunk.rfind("。")
- last_end2 = chunk.rfind("\n")
- if last_end1 == -1 and last_end2 == -1:
- last_end1 = len(chunk)
- #print("found end char: ", last_end1, last_end2)
- #print("*" * 60)
- if last_end2 > last_end1:
- last_end1 = last_end2
- buf_str = chunk[last_end1 + 1:] + buf_str
- chunk = chunk[0:last_end1 + 1]
- #print(len(chunk),chunk)
- create_task(proj_id, "NLP", chunk, db)
- if len(buf_str) > 0:
- create_task(proj_id, "NLP", buf_str, db)
- return resp_200(data={"filename": file.filename})
- labeling_router = router
|