123456789101112131415161718192021222324252627282930313233343536373839 |
- import logging
- from typing import List
- import numpy as np
- import requests
- from requests.adapters import HTTPAdapter
- from urllib3.util.retry import Retry
- from utils.embed_helper import EmbedHelper
- from utils.vector_distance import VectorDistance
- logger = logging.getLogger(__name__)
- class Vectorizer:
- _instance = None
-
- def __init__(self):
- self.embedHelper = EmbedHelper()
- def get_embedding(self, text: str) -> List[float]:
- return self.embedHelper.embed_text(text)
- @classmethod
- def get_instance(cls):
- if cls._instance is None:
- cls._instance = cls()
- return cls._instance
- def chunk_text(self, text: str, chunk_size: int = 500) -> List[str]:
- tokens = self.tokenizer.tokenize(text)
- return [self.tokenizer.convert_tokens_to_string(tokens[i:i+chunk_size])
- for i in range(0, len(tokens), chunk_size)]
- if __name__ == '__main__':
- text = '你好'
- print(text)
- embedding2 = Vectorizer.get_instance().get_embedding(text)
|