统计一篇英语文章每个单词出现的频率

从文件中读取一篇英语文章,统计每个单词出现的频率,输出十个最长出现的单词及次数图像。


import turtle

count = 10
xscale = 30
yscale = 6
data = []
words = []

def replacepunctuations(line):
     for ch in line:
          if ch in "[email protected]#$%^&*()_-+=<>?/,.:;{}[]|\'""":
               line = line.replace(ch," ")
     return line
     
def processline(line,wordcounts):
     line = replacepunctuations(line)
     words = line.split()
     for word in words:
          if word in wordcounts:
               wordcounts[word] += 1
          else:
               wordcounts[word] = 1

def line(t,x1,y1,x2,y2): #作x,y轴
     t.penup()
     t.goto(x1,y1)
     t.pendown()
     t.goto(x2,y2)
     
def drawtext(t,x,y,text): #写出每个坐标对应的单词和柱体表示的数量
     t.penup()
     t.goto(x,y)
     t.pendown()
     t.write(text)

def drawline(t,x1,y1,x2,y2):
     t.penup()
     t.goto(x1,y1)
     t.pendown()
     t.goto(x2,y2)

def drawrectangle(t,x,y):     #作长方形
     x = x*xscale
     y = y*yscale
     drawline(t,x-5,0,x-5,y)
     drawline(t,x-5,y,x+5,y)
     drawline(t,x+5,y,x+5,0)
     drawline(t,x+5,0,x-5,0)

def drawbar(t):     #作单词对应的图形
     for i in range(count):
          drawrectangle(t,i+1,data[i])

def drawgraph(t):   #作图表
     drawline(t,0,0,360,0)
     drawline(t,0,300,0,0)

     for x in range(count):
          x += 1
          drawtext(t,x*xscale-5,-20,(words[x-1]))
          drawtext(t,x*xscale-5,data[x-1]*yscale+10,data[x-1])
     drawbar(t)
     
def main():
     filename = input("enter a filename:").strip()
     infile = open(filename,"r")

     wordcounts = {}
     for line in infile:
          processline(line.lower(),wordcounts)  #lower()把大写字母改成小写

     pairs = list(wordcounts.items())
     items = [[x,y] for (y,x) in pairs]
     items.sort()

     for i in range(len(items)-1,len(items)-count-1,-1):
          print(items[i][1]+"\t"+str(items[i][0]))
          data.append(items[i][0])
          words.append(items[i][1])

     infile.close()

     turtle.title('词频结果柱状图')
     turtle.setup(900,750,0,0)
     t = turtle.Turtle()
     t.width(3)
     drawgraph(t)

main()


版权声明:本文为land_Jeep原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/land_Jeep/article/details/71016152