#!/usr/bin/python import sys from operator import itemgetter # punct is a string of chars we consider to be punctuation punct = """'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'""" # freq is a dictionary to store frequency of words in text freq = {} # load stop words to ignore into a dictionary stop_words = {} for line in open("stop_words.txt"): stop_words[line.strip()] = True # read lines in text, split into words, remove punctuation and # downcase, ignore stop words, update freq for line in sys.stdin: for word in line.split(): word = word.strip(punct).lower() if word not in stop_words: freq[word] = freq.get(word,0) + 1 # words is a list of the words in freq, sorted by frequency words = sorted(freq.iteritems(), key=itemgetter(1), reverse=True) # print words and their frequencies for (freq, word) in words: print freq, word