python
/
self-constructing_graph


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
							from fastapi import APIRouter, Depends, HTTPException, UploadFile
from sqlalchemy.orm import Session
from sqlalchemy.sql import select, or_, and_, func, distinct
from db.schemas import KgTask, KgProj, KgProjCreate, KgTaskCreate, KgTaskUpdate
from db.models import DbKgProj, DbKgTask
from db.database import get_db
from models.response import ResponseModel
from utils.response import resp_200
from typing import List
from datetime import datetime
from math import ceil
from config.site import FILE_STORAGE_PATH
import os
import json

router = APIRouter()

def create_task( proj_id, task_category, content: str,db:Session):
    print("create task")
    data = DbKgTask()
    data.proj_id = proj_id
    data.task_category = task_category
    data.task_log = ""
    data.task_content = content
    data.status = 0
    data.created = datetime.now()
    data.updated = datetime.now()    
    db.add(data)
    db.commit()
    db.refresh(data)  
    return data 
# 标注数据文件上传
@router.post("/api/labeling-file-upload/{proj_id}")
async def create_upload_file(proj_id: int, file: UploadFile, db: Session = Depends(get_db)):
    path = FILE_STORAGE_PATH + "/tasks_file"
    if not os.path.exists(path):
        os.makedirs(path)
    # 打印文件名称
    new_filename = path + "/" + file.filename
    # 将上传的文件保存到服务本地

    with open(f"{new_filename}", 'wb') as f:        
        # 一次读取1024字节，循环读取写入
        for chunk in iter(lambda: file.file.read(1024), b''):
            f.write(chunk)
            
    with open(f"{new_filename}", 'r', encoding="utf-8") as f:
        buf_str = ""
        for line in f.readlines():
            line = line.strip()
            
            if len(buf_str) > 0:
                buf_str = buf_str + "\n" + line
            else:
                buf_str = buf_str + line

            while len(buf_str) > 256:
                chunk = buf_str[0:256]
                buf_str = buf_str[256:]      
                           
                if chunk[-1] != '\n' and chunk[-1] != "。":
                    last_end1 = chunk.rfind("。")
                    last_end2 = chunk.rfind("\n")
                    if last_end1 == -1 and last_end2 == -1:
                        last_end1 = len(chunk)
                    #print("found end char: ", last_end1, last_end2)
                    #print("*" * 60)           
                    if last_end2 > last_end1:
                        last_end1 = last_end2
                    buf_str = chunk[last_end1 + 1:] + buf_str
                    chunk = chunk[0:last_end1 + 1]
                    #print(len(chunk),chunk)
                create_task(proj_id, "NLP", chunk, db)
        if len(buf_str) > 0:
            create_task(proj_id, "NLP", buf_str, db)               

    return resp_200(data={"filename": file.filename})


labeling_router = router