python
/
self-constructing_graph


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334
							import jieba
import os

word_dict = []
word_freq = {}
word_freq_doc = {}
with open("./dict/legal_terms.txt", "r", encoding="utf-8") as f: 
    for line in f.readlines():
        jieba.add_word(line.strip())
        word_dict.append(line.strip())
        word_freq[line.strip()] = 0
path = "./docs"
for root, dirs, files in os.walk(path):
    for file in files:
        file_path = os.path.join(root, file)
        print(file_path)
        with open(file_path, "r", encoding="utf-8") as f:
            for line in f:
                words = jieba.cut(line)
                for w in words:
                    if w in word_freq.keys():
                        word_freq[w] += 1
                    if w in word_freq_doc.keys():
                        word_freq_doc[w] += 1
                    else:
                        word_freq_doc[w] = 1
                        
with open("word_feq.txt", "w", encoding="utf-8") as f:
    for k in word_freq.keys():
        f.write(f"{k} {word_freq[k]}\n")
        
with open("word_feq_doc.txt", "w", encoding="utf-8") as f:
    for k in word_freq_doc.keys():
        f.write(f"{k} {word_freq_doc[k]}\n")