#词频统计
sent = "python is my favoriate language,you're so cute ,someone like you ~~,do what shhhh"
#最新
output = {k:sent.count(k) for k in set(sent)}#快捷获得词频统计
print(output)
output = {k:sent.count(k) for k in set(sent.split(" "))}#快捷获得词频统计
print(output)
chset = "abcdefghijklmnopqrstuvwxyz-' "
sent = sent.lower()#小写字母
for c in sent:
if c not in chset:
sent = sent.replace(c," ")
wd_lst = sent.split(" ")#通过空格切分
wd_lst
dt = {}
for k in wd_lst:
if k not in dt.keys():
dt[k] = 0
dt[k]+=1
wd_s = set(wd_lst)
dt1 = {}
for k in wd_s:
dt1[k] = wd_lst.count(k)
根据字典中的value值对key进行排序:
a1 = sorted(word_freqs_c.items(),key = lambda x:x[1],reverse = True)
本文探讨了Python中的字符串处理技术,包括词频统计和字符过滤。通过实例展示了如何使用Python对字符串进行频率统计,并将非字母字符替换为空,进一步提取关键词。文章适合Python初学者和对字符串操作感兴趣的读者。

1万+

被折叠的 条评论
为什么被折叠?



