#!/usr/bin/python # prints frequency of words found in standard input sorted by # frequency after removing stop words and stipping off punctuation import sys from operator import itemgetter punctuation = """'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'""" freq = {} # frequency of words in text # load stop words to ignore into a dictionary stop_words = {} for line in open("stop_words.txt"): stop_words[line.strip()] = True # compute the frequencies for line in sys.stdin: for word in line.split(): word = word.strip(punctuation).lower() if not word in stop_words: freq[word] = freq.get(word,0) + 1 # sord the words found by their frequency words = sorted(freq.iteritems(), key=itemgetter(1), reverse=True) # print them for w in words: print w[1], w[0]