import jieba import os word_dict = [] word_freq = {} word_freq_doc = {} with open("./dict/legal_terms.txt", "r", encoding="utf-8") as f: for line in f.readlines(): jieba.add_word(line.strip()) word_dict.append(line.strip()) word_freq[line.strip()] = 0 path = "./docs" for root, dirs, files in os.walk(path): for file in files: file_path = os.path.join(root, file) print(file_path) with open(file_path, "r", encoding="utf-8") as f: for line in f: words = jieba.cut(line) for w in words: if w in word_freq.keys(): word_freq[w] += 1 if w in word_freq_doc.keys(): word_freq_doc[w] += 1 else: word_freq_doc[w] = 1 with open("word_feq.txt", "w", encoding="utf-8") as f: for k in word_freq.keys(): f.write(f"{k} {word_freq[k]}\n") with open("word_feq_doc.txt", "w", encoding="utf-8") as f: for k in word_freq_doc.keys(): f.write(f"{k} {word_freq_doc[k]}\n")