import logging from typing import List import numpy as np import requests from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry from utils.embed_helper import EmbedHelper from utils.vector_distance import VectorDistance logger = logging.getLogger(__name__) class Vectorizer: _instance = None def __init__(self): self.embedHelper = EmbedHelper() def get_embedding(self, text: str) -> List[float]: return self.embedHelper.embed_text(text) @classmethod def get_instance(cls): if cls._instance is None: cls._instance = cls() return cls._instance def chunk_text(self, text: str, chunk_size: int = 500) -> List[str]: tokens = self.tokenizer.tokenize(text) return [self.tokenizer.convert_tokens_to_string(tokens[i:i+chunk_size]) for i in range(0, len(tokens), chunk_size)] if __name__ == '__main__': text = '你好' print(text) embedding2 = Vectorizer.get_instance().get_embedding(text)