content = content.replace('.', ' ') content = content.replace('∙', ' ') content = content.replace('˚', '°') content = content.replace('~', ' ') content = content.replace('=', ' ') content = content.replace('≤', ' ') content = content.replace('−', '-') content = content.replace('α', 'a') content = content.replace('δ', 'b') return content #get row data from database data = Database() stopwords = nltk.corpus.stopwords.words('english') synopses = data.getAbstract() #initial abstract list abstract_list=[] TotalCountList = [] for i in synopses: abstract_id = i[0] abstract_list.append(str(abstract_id)) content = normalizeString(i[1]) allwords_stemmed = tokenize_and_stem(content) allwords_stemmed = [word for word in allwords_stemmed if word not in stopwords] count_list = count_words(allwords_stemmed) #print(tuple_count) TotalCountList.append(count_list)