python基础:统计小说词频

题目要求:用Python统计小说中各单词出现的频次,并按频次由高到低排序。
读取文件
f = open(‘C:/Users/dell/Desktop/散文.txt’,’r’)
txt = f.read(100)
f.close()
print(txt)

f = open(‘C:/Users/dell/Desktop/散文.txt’,’r’)
txt_lines = f.readlines()#换行符
f.close()
print(txt_lines)

统计txt中单词的频次
import re
f = open(‘C:/Users/dell/Desktop/散文.txt’,’r’)
txt = f.read(1000)
f.close()
txt = txt.lower()
txt = re.sub(‘[,.?:”\’!-]’, ‘ ‘, txt)
word = txt.split() #先用正则表达式做预处理再进行分割因为正则和lower()都是对字符串进行处理而非列表
print(txt)
print(word)

词频统计
words_fre = {}
for i in word:
if i not in words_fre.keys():
words_fre[i] = 1
else:
words_fre[i] += 1
print(words_fre)

词频排序
sorted(words_fre.items(),key = xxx,revers = True)#把字典中所有的键值对取出并进行排序的声明