Пример #1
0
    out.print_verbose('Loading input')
    source1, source2 = load_input()
    out.print_verbose('Sizes of data sets: ', len(source1), len(source2))
    out.print_verbose('Words: ', struct.word_count(source1), struct.word_count(source2))

    '''
    /source.../ are structures that contain
    {
    0: { 'opinions': [('CÂMERA', 80.0)],
         'verbatim': 'Câmera boa.'}
    }
    '''

    evaluate.reset()  # To start evaluating summaries of the current sources.
    output_files.new_source(SOURCE1, SOURCE2, source1, source2, METHOD_NAME)  # Prepare output files for the current sources.

    map_scores_summary = {}

    distinct_summaries = set()

    time_total = 0

    out.print_verbose('Making summaries\n\n')

    print('     %5s %5s %5s %5s\n' % ('R', 'C', 'D', 'H'))

    for repeat in range(REPEAT_TESTS):
        time_initial = time()

        # Make summary
Пример #2
0
        if sum([source1[i]['sent'][a] for a in source1[i]['sent']]) < 0:
            e1_neg[i] = source1[i]

    for i in source2:
        if sum([source2[i]['sent'][a] for a in source2[i]['sent']]) > 0:
            e2_pos[i] = source2[i]
        if sum([source2[i]['sent'][a] for a in source2[i]['sent']]) < 0:
            e2_neg[i] = source2[i]

    stats_e1_pos = struct.aspects_stats_SIMILARITY(e1_pos)
    stats_e1_neg = struct.aspects_stats_SIMILARITY(e1_neg)
    stats_e2_pos = struct.aspects_stats_SIMILARITY(e2_pos)
    stats_e2_neg = struct.aspects_stats_SIMILARITY(e2_neg)

    evaluate.reset()  # To start evaluating summaries of the current sources.
    output_files.new_source(SOURCE1, SOURCE2, source1, source2, 'Similarity')  # Prepare output files for the current sources.

    map_scores_summary = {}

    distinct_summaries = set()

    time_total = 0

    out.print_verbose('Making summaries\n\n')

    print('     %5s %5s %5s %5s\n' % ('R', 'C', 'D', 'H'))

    w_e1_pos = word_count(e1_pos)
    w_e1_neg = word_count(e1_neg)
    w_e2_pos = word_count(e2_pos)
    w_e2_neg = word_count(e2_neg)
Пример #3
0
        'word_count': 21}
    }
    '''

    # Get statistics about aspects in the source (mean, standard deviation, probability)
    stats_source_1 = struct.aspects_stats(source1)
    stats_source_2 = struct.aspects_stats(source2)
    '''
     /stats_.../ are structures of the form: 
        {'tela': {'mean':  83, 'prob': 0.07, 'std': 0},
        'cor':   {'mean': -87, 'prob': 0.21, 'std': 1.73}}
    '''

    evaluate.reset()  # To start evaluating summaries of the current sources.
    output_files.new_source(
        SOURCE1, SOURCE2, source1, source2,
        'Statistic')  # Prepare output files for the current sources.

    map_scores_summary = {}

    distinct_summaries = set()

    time_total = 0

    out.print_verbose('Making summaries\n\n')

    print('     %5s %5s %5s %5s\n' % ('R', 'C', 'D', 'H'))

    for repeat in range(REPEAT_TESTS):

        time_initial = time()
Пример #4
0
                'text_info': ['bom', 'maquinar', 'começar', 'fotografar'],
               'opinions': [['PRODUTO', 100]],               
               'word_count': 8}
    }
    '''

    source1_proc = preprocess_CLUSTERING(source1)
    source2_proc = preprocess_CLUSTERING(source2)

    set1_pos = source1_proc['+']
    set1_neg = source1_proc['-']
    set2_pos = source2_proc['+']
    set2_neg = source2_proc['-']

    evaluate.reset()  # To start evaluating summaries of the current sources.
    output_files.new_source(SOURCE1, SOURCE2, source1, source2, 'Clustering')  # Prepare output files for the current sources.

    map_scores_summary = {}

    distinct_summaries = set()

    time_total = 0

    out.print_verbose('Making summaries\n\n')

    print('     %5s %5s %5s %5s\n' % ('R', 'C', 'D', 'H'))

    repeat = 0
    discarded = 0
    while repeat < REPEAT_TESTS: