out.print_verbose('Loading input') source1, source2 = load_input() out.print_verbose('Sizes of data sets: ', len(source1), len(source2)) out.print_verbose('Words: ', struct.word_count(source1), struct.word_count(source2)) ''' /source.../ are structures that contain { 0: { 'opinions': [('CÂMERA', 80.0)], 'verbatim': 'Câmera boa.'} } ''' evaluate.reset() # To start evaluating summaries of the current sources. output_files.new_source(SOURCE1, SOURCE2, source1, source2, METHOD_NAME) # Prepare output files for the current sources. map_scores_summary = {} distinct_summaries = set() time_total = 0 out.print_verbose('Making summaries\n\n') print(' %5s %5s %5s %5s\n' % ('R', 'C', 'D', 'H')) for repeat in range(REPEAT_TESTS): time_initial = time() # Make summary
if sum([source1[i]['sent'][a] for a in source1[i]['sent']]) < 0: e1_neg[i] = source1[i] for i in source2: if sum([source2[i]['sent'][a] for a in source2[i]['sent']]) > 0: e2_pos[i] = source2[i] if sum([source2[i]['sent'][a] for a in source2[i]['sent']]) < 0: e2_neg[i] = source2[i] stats_e1_pos = struct.aspects_stats_SIMILARITY(e1_pos) stats_e1_neg = struct.aspects_stats_SIMILARITY(e1_neg) stats_e2_pos = struct.aspects_stats_SIMILARITY(e2_pos) stats_e2_neg = struct.aspects_stats_SIMILARITY(e2_neg) evaluate.reset() # To start evaluating summaries of the current sources. output_files.new_source(SOURCE1, SOURCE2, source1, source2, 'Similarity') # Prepare output files for the current sources. map_scores_summary = {} distinct_summaries = set() time_total = 0 out.print_verbose('Making summaries\n\n') print(' %5s %5s %5s %5s\n' % ('R', 'C', 'D', 'H')) w_e1_pos = word_count(e1_pos) w_e1_neg = word_count(e1_neg) w_e2_pos = word_count(e2_pos) w_e2_neg = word_count(e2_neg)
'word_count': 21} } ''' # Get statistics about aspects in the source (mean, standard deviation, probability) stats_source_1 = struct.aspects_stats(source1) stats_source_2 = struct.aspects_stats(source2) ''' /stats_.../ are structures of the form: {'tela': {'mean': 83, 'prob': 0.07, 'std': 0}, 'cor': {'mean': -87, 'prob': 0.21, 'std': 1.73}} ''' evaluate.reset() # To start evaluating summaries of the current sources. output_files.new_source( SOURCE1, SOURCE2, source1, source2, 'Statistic') # Prepare output files for the current sources. map_scores_summary = {} distinct_summaries = set() time_total = 0 out.print_verbose('Making summaries\n\n') print(' %5s %5s %5s %5s\n' % ('R', 'C', 'D', 'H')) for repeat in range(REPEAT_TESTS): time_initial = time()
'text_info': ['bom', 'maquinar', 'começar', 'fotografar'], 'opinions': [['PRODUTO', 100]], 'word_count': 8} } ''' source1_proc = preprocess_CLUSTERING(source1) source2_proc = preprocess_CLUSTERING(source2) set1_pos = source1_proc['+'] set1_neg = source1_proc['-'] set2_pos = source2_proc['+'] set2_neg = source2_proc['-'] evaluate.reset() # To start evaluating summaries of the current sources. output_files.new_source(SOURCE1, SOURCE2, source1, source2, 'Clustering') # Prepare output files for the current sources. map_scores_summary = {} distinct_summaries = set() time_total = 0 out.print_verbose('Making summaries\n\n') print(' %5s %5s %5s %5s\n' % ('R', 'C', 'D', 'H')) repeat = 0 discarded = 0 while repeat < REPEAT_TESTS: