with open(os.path.join(directory, myFile), 'r') as f: # Here, the JSON is converted back to a Python object transcript = json.load(f) transcripts.append(transcript) # Create lists for the years and the length of the text for each year. years = [] lengths = [] # Go through each transcript for transcript in transcripts: # Get the date - converting the ISO date back into a datetime.date object date = cf.iso_to_datetime(transcript['date']) # Convert the year into a campaign year year = cf.campaign_year_from_year(date.year) years.append(year) # Create a string for all of the text in the debate allText = "" # Add all the text spoken by speakers to that string for speaker in transcript['text_by_speakers']: allText += (" " + speaker['text']) # removes punctuation, digits, splits text into words # remove words shorter than 3 characters and suffixes for p in list(punctuation):
total_pos_words = len( [True for x in long_words if x in positive_words]) total_neg_words = len( [True for x in long_words if x in negative_words]) print total_pos_words, total_neg_words, word_count, year transcript_results.append( dict(total_pos_words=total_pos_words, total_neg_words=total_neg_words, word_count=word_count, year=year)) # Get a unique list of the years uniqueYears = list( set([ cf.campaign_year_from_year(transcript_result['year']) for transcript_result in transcript_results ])) uniqueYears.sort() print uniqueYears year_results = [] # For each unique year for uniqueYear in uniqueYears: transcript_results_for_year = [ transcript_result for transcript_result in transcript_results if cf.campaign_year_from_year(transcript_result['year']) == uniqueYear ] word_count = sum([
uniqueYears = list(set(years)) # Create a new list for the simplicity corresponding to each year. uniquesimplewords = [] # For each unique year for uniqueYear in uniqueYears: # Create a list which will contain all simplicity values for a year simplewordsforyear = [] nounsforyear = [] # Go through all the different years, adding the simplicity to that list and dividing # it over the total number of nouns. for number in range(len(years)): if cf.campaign_year_from_year(years[number]) == uniqueYear: simplewordsforyear.append(simplicity[number]) nounsforyear.append(noun_numbers[number]) # Take a simple mean of the simplicity of all texts in a given year. # Add this to the list uniquesimplewords, which is paired with the uniqueYears list. if sum(nounsforyear): uniquesimplewords.append( sum(simplewordsforyear) / sum(nounsforyear)) results.append({ 'party': party, 'data': [[year for year in uniqueYears if not year % 4], uniquesimplewords] })