def testTrimDataToEvents(self): test_data = [ [ 'company1', { 20091101: 1, 20091102: 2, 20091103: 3, 20091104: 4, 20091105: 5, 20091108: 6 } ], [ 'company2', { 20091101: 1, 20091102: 1, 20091103: 1, 20091104: 1, 20091105: 1, 20091108: 1 } ] ] test_events = { 'event_type_1': [ (20091101, 20091101, "desc1") ], 'event_type_2': [ (20091107, 20091108, "desc2") ] } range = 2 expected_result = [ [ 'company1', { 20091101: 1, 20091102: 2, 20091104: 4, 20091105: 5, 20091108: 6 } ], [ 'company2', { 20091101: 1, 20091102: 1, 20091104: 1, 20091105: 1, 20091108: 1 }] ] actual_result = eventutils.trim_data_to_events( test_data, test_events, range) self.assertEquals(actual_result, expected_result)
if import_political_events: political_events = eventutils.import_events( "../data/wydarzenia-polityczne-polska.txt") events[political_events[0]] = political_events[1] except IOError, err: sys.exit(err) # TODO(patryk): plot events. # Preprocessing phase. if compress_to_weekly_data: data = utils.compress_data_weekly(data) if trimming_range > 0: data = eventutils.trim_data_to_events(data, events, trimming_range) input_vecs = [] if treat_data_differentially: input_vecs = utils.make_prices_diffs_vecs(data) else: input_vecs = utils.make_prices_vecs(data) # Run clustering algorithm. if algorithm_type == ClusterAlg.KMEANS: labels, wcss, n = Pycluster.kcluster(input_vecs, number_of_clusters, dist = dist_measure, npass = number_of_iters, method = dist_method) elif algorithm_type == ClusterAlg.HIERARCHICAL: tree = Pycluster.treecluster(input_vecs, method = dist_method,