1234567891011121314151617181920212223242526272829 |
- new_filename = 'D:\work\data\典型病例\典型病例合并后的数据1.txt'
- with open(f"{new_filename}", 'r', encoding="utf-8") as f:
- buf_str = ""
- for line in f.readlines():
- line = line.strip()
-
- if len(buf_str) > 0:
- buf_str = buf_str + "\n" + line
- else:
- buf_str = buf_str + line
- while len(buf_str) > 256:
- chunk = buf_str[0:256]
- buf_str = buf_str[256:]
- print("*" * 60)
- if chunk[-1] != '\n' and chunk[-1] != "。":
- last_end1 = chunk.rfind("。")
- last_end2 = chunk.rfind("\n")
- if last_end1 == -1 and last_end2 == -1:
- last_end1 = len(chunk)
- print("found end char: ", last_end1, last_end2)
- print("*" * 60)
- if last_end2 > last_end1:
- last_end1 = last_end2
- buf_str = chunk[last_end1 + 1:] + buf_str
- chunk = chunk[0:last_end1 + 1]
- print(len(chunk),chunk)
-
|