def categorize_data(merged_events, rules): classificator = Classificator(productivity_map=rules) discovery = Discovery() category_counter = defaultdict(int) category_duration = defaultdict(float) for event in merged_events: category = classificator.check_productivity(event) category_counter[category] += 1 category_duration[category] += extract_duration(event) if category == classificator.UNKNOWN_CATEGORY: discovery.add_event(event) print("Counter: ", category_counter) print("Duration: ", category_duration) for unknown_event in discovery.get_agg_duration_events_sorted(top_n=20): pprint.pprint(unknown_event) productivity_data = category_duration if classificator.UNKNOWN_CATEGORY in productivity_data: productivity_data.pop(classificator.UNKNOWN_CATEGORY) return productivity_data
def test_add_same_events(): # Assign discovery = Discovery() event1 = create_event(update_data={'title': 'Page 1'}) event2 = create_event(update_data={'title': 'Page 1'}) # Act discovery.add_event(event1) discovery.add_event(event2) all_events = discovery.get_all_unique_events() # Assert assert len(all_events) == 1
def test_total_duration_per_event_sorted(): # Assign discovery = Discovery() # Act discovery.add_event(create_event(update_event={'duration': datetime.timedelta(seconds=10)})) discovery.add_event(create_event(update_event={'duration': datetime.timedelta(seconds=50)}, update_data={'title': 'Fantastic choice!'})) discovery.add_event(create_event(update_event={'duration': datetime.timedelta(seconds=50)})) discovery.add_event(create_event(update_event={'duration': datetime.timedelta(seconds=20)}, update_data={'title': 'YES! choice!'})) all_unique_events = discovery.get_all_unique_events() total_duration_events = discovery.get_agg_duration_events() sorted_total_duration_events = discovery.get_agg_duration_events_sorted() # Assert assert len(all_unique_events) == 3 assert len(total_duration_events) == 3 assert 'Amazing page' in str(sorted_total_duration_events[0][0]) assert sorted_total_duration_events[0][1] == 60 assert 'Fantastic' in str(sorted_total_duration_events[1][0]) assert sorted_total_duration_events[1][1] == 50 assert 'YES!' in str(sorted_total_duration_events[2][0]) assert sorted_total_duration_events[2][1] == 20
def test_total_duration_per_event_sorted_top_n(): # Assign discovery = Discovery() top_n = 3 # Act discovery.add_event(create_event(update_data={'title': 'Fantastic choice!'})) discovery.add_event(create_event(update_data={'title': 'This is great'})) discovery.add_event(create_event(update_data={'title': 'YES! choice!'})) discovery.add_event(create_event(update_data={'title': 'If not for others, I would definitely'})) discovery.add_event(create_event(update_data={'title': 'Maybe or not maybe, there is not try'})) sorted_total_duration_events = discovery.get_agg_duration_events_sorted(top_n=top_n) # Assert assert len(sorted_total_duration_events) == top_n
def test_total_duration_per_event(): # Assign discovery = Discovery() event1 = create_event(update_event={'duration': datetime.timedelta(seconds=10)}) event2 = create_event(update_event={'duration': datetime.timedelta(seconds=50)}) # Act discovery.add_event(event1) discovery.add_event(event2) all_unique_events = discovery.get_all_unique_events() total_duration_events = discovery.get_agg_duration_events() # Assert assert len(all_unique_events) == 1 assert len(total_duration_events) == 1 any_key = list(total_duration_events.keys())[0] assert total_duration_events[any_key] == 60
def main(): config_parser = ConfigParser('config.json') discovery = Discovery() classificator = Classificator() event_extractor = EventExtractor() start_date_str = os.getenv("START_DATE") end_date_str = os.getenv("END_DATE") start_date_utc, end_date_utc = process_dates(start_date_str, end_date_str) events = event_extractor.get_events_between_dates(start_date_utc, end_date_utc) category_counter = defaultdict(int) category_duration = defaultdict(float) for event in events: category = classificator.check_productivity(event) category_counter[category] += 1 category_duration[category] += extract_duration(event) if category == classificator.UNKNOWN_CATEGORY: discovery.add_event(event) print("Counter: ", category_counter) print("Duration: ", category_duration) for unknown_event in discovery.get_agg_duration_events_sorted(top_n=20): pprint.pprint(unknown_event) productivity_data = category_duration if classificator.UNKNOWN_CATEGORY in productivity_data: productivity_data.pop(classificator.UNKNOWN_CATEGORY) ## Upload results to the Exist client exist_client = ExistClient() response = exist_client.send_productivity(start_date_utc.isoformat()[:10], productivity_data) print("Respone from the Exist.io:") exist_client.validate_response(response) pprint.pprint(response.json()) # Upload events data to S3 all_buckets_events = event_extractor.get_all_buckets_events() upload_data(config_parser, all_buckets_events, start_date_utc)
def test_total_duration_per_event_sorted_cached(): # Assign discovery = Discovery() discovery.add_event(create_event(update_event={'duration': datetime.timedelta(seconds=10)})) discovery.add_event(create_event(update_event={'duration': datetime.timedelta(seconds=50)}, update_data={'title': 'Fantastic choice!'})) discovery.add_event(create_event(update_event={'duration': datetime.timedelta(seconds=50)})) discovery.add_event(create_event(update_event={'duration': datetime.timedelta(seconds=20)}, update_data={'title': 'YES! choice!'})) # Act sorted_total_duration_events_1 = discovery.get_agg_duration_events_sorted() sorted_total_duration_events_2 = discovery.get_agg_duration_events_sorted() # Assert assert len(sorted_total_duration_events_1) == len(sorted_total_duration_events_2) assert sorted(sorted_total_duration_events_1) == sorted(sorted_total_duration_events_2) assert sorted_total_duration_events_1 is not sorted_total_duration_events_2
def main(): config_parser = ConfigParser('config.json') # Shhh... I know. But don't tell anyone. This is going to be our own little secret, Ok? ;-) discovery = Discovery() prod_discovery = Discovery() neur_discovery = Discovery() dist_discovery = Discovery() classificator = Classificator() logger.info("Pass classifcator") event_extractor = EventExtractor() _, today_start_day_utc = get_today_utc() end_date_utc = today_start_day_utc - datetime.timedelta(days=3) start_date_utc = end_date_utc - datetime.timedelta(days=1) events = event_extractor.get_events_between_dates( start_date=start_date_utc, end_date=end_date_utc) category_counter = defaultdict(int) category_duration = defaultdict(float) for event in events: category = classificator.check_productivity(event) category_counter[category] += 1 category_duration[category] += extract_duration(event) if category == classificator.UNKNOWN_CATEGORY: discovery.add_event(event) elif category == 'productive': prod_discovery.add_event(event) elif category == 'neutral': neur_discovery.add_event(event) elif category == 'distracting': dist_discovery.add_event(event) else: raise ValueError("Something went wrong") save_report_to_disk(start_date_utc.date(), 'unknown', discovery.get_agg_duration_events_sorted()) save_report_to_disk(start_date_utc.date(), 'productive', prod_discovery.get_agg_duration_events_sorted()) save_report_to_disk(start_date_utc.date(), 'neutral', neur_discovery.get_agg_duration_events_sorted()) save_report_to_disk(start_date_utc.date(), 'distracting', dist_discovery.get_agg_duration_events_sorted()) logger.info("Counter: %s", category_counter) logger.info("Duration: %s", category_duration) for unknown_event in discovery.get_agg_duration_events_sorted(top_n=20): logger.debug("Total duration: %f", unknown_event[1]) logger.debug(dict(eval(unknown_event[0]))) productivity_data = category_duration if classificator.UNKNOWN_CATEGORY in productivity_data: productivity_data.pop(classificator.UNKNOWN_CATEGORY) ## Upload results to the Exist client print("Sending request to the Exist") exist_client = ExistClient() response = exist_client.send_productivity(start_date_utc.isoformat()[:10], productivity_data) print("Respone from the Exist.io:") exist_client.validate_response(response) print(response.json()) # Upload events data to S3 all_buckets_events = event_extractor.get_all_buckets_events() upload_data(config_parser, all_buckets_events, start_date_utc)