示例#1
0
    with open(os.path.join(directory, myFile), 'r') as f:
        # Here, the JSON is converted back to a Python object
        transcript = json.load(f)
    transcripts.append(transcript)

# Create lists for the years and the length of the text for each year.
years = []
lengths = []

# Go through each transcript
for transcript in transcripts:

    # Get the date - converting the ISO date back into a datetime.date object
    date = cf.iso_to_datetime(transcript['date'])
    # Convert the year into a campaign year
    year = cf.campaign_year_from_year(date.year)

    years.append(year)

    # Create a string for all of the text in the debate
    allText = ""

    # Add all the text spoken by speakers to that string
    for speaker in transcript['text_by_speakers']:

        allText += (" " + speaker['text'])

    # removes punctuation, digits, splits text into words
    # remove words shorter than 3 characters and suffixes

    for p in list(punctuation):
示例#2
0
        total_pos_words = len(
            [True for x in long_words if x in positive_words])
        total_neg_words = len(
            [True for x in long_words if x in negative_words])
        print total_pos_words, total_neg_words, word_count, year
        transcript_results.append(
            dict(total_pos_words=total_pos_words,
                 total_neg_words=total_neg_words,
                 word_count=word_count,
                 year=year))

    # Get a unique list of the years
    uniqueYears = list(
        set([
            cf.campaign_year_from_year(transcript_result['year'])
            for transcript_result in transcript_results
        ]))
    uniqueYears.sort()
    print uniqueYears
    year_results = []

    # For each unique year
    for uniqueYear in uniqueYears:

        transcript_results_for_year = [
            transcript_result for transcript_result in transcript_results if
            cf.campaign_year_from_year(transcript_result['year']) == uniqueYear
        ]

        word_count = sum([
示例#3
0
    uniqueYears = list(set(years))

    # Create a new list for the simplicity corresponding to each year.
    uniquesimplewords = []

    # For each unique year
    for uniqueYear in uniqueYears:
        # Create a list which will contain all simplicity values for a year
        simplewordsforyear = []
        nounsforyear = []

        # Go through all the different years, adding the simplicity to that list and dividing
        # it over the total number of nouns.

        for number in range(len(years)):
            if cf.campaign_year_from_year(years[number]) == uniqueYear:
                simplewordsforyear.append(simplicity[number])
                nounsforyear.append(noun_numbers[number])

        # Take a simple mean of the simplicity of all texts in a given year.
        # Add this to the list uniquesimplewords, which is paired with the uniqueYears list.
        if sum(nounsforyear):
            uniquesimplewords.append(
                sum(simplewordsforyear) / sum(nounsforyear))

    results.append({
        'party':
        party,
        'data': [[year for year in uniqueYears if not year % 4],
                 uniquesimplewords]
    })