Пример #1
0
 def station_count(self):
     """
     Return data point's stations length
     :return: list of stations size per hour
     """
     stations = [0] * 24
     for hour in self.data_file.buckets:
         for pt in self.data_file.buckets[hour]:
             try:
                 pt_data = json.loads(pt['data'])
                 if 'stations' in pt_data:
                     stations[hour] = len(pt_data['stations'])
             except ValueError:
                 logger.error(f"Invalid json string: {pt['data']}")
                 raise ValueError
     logger.info(f"stations: {stations}")
     return stations
Пример #2
0
def read_data(filename):
    """
    Read data in the file
    :param filename:
    :return: list of data sorted by timestamp
    """
    result = []
    with open(filename, 'r') as json_file:
        json_list = list(json_file)

    for json_str in json_list:
        try:
            result.append(json.loads(json_str))
        except ValueError:
            logger.error(f"Invalid json string in {filename}: {json_str}")
            raise ValueError

    sorted_data = sorted(result, key=lambda x: x['timestamp'])
    return sorted_data
Пример #3
0
 def avg_upd_not_dropped(self):
     """
     Calculates average time difference of updates that are not dropped
     :return:
     """
     sum_upd = 0
     count = 0
     for hour in self.data_file.buckets:
         for i in range(len(self.data_file.buckets[hour]) - 1):
             if not is_dropped(self.data_file.buckets[hour][i + 1]['timestamp'],
                               self.data_file.buckets[hour][i]['timestamp']):
                 sum_upd += self.data_file.buckets[hour][i + 1]['timestamp'] - \
                            self.data_file.buckets[hour][i]['timestamp']
                 count += 1
     if count == 0:
         logger.error("every update is dropped")
         return 1.0
     average = round(sum_upd / count, 2)
     logger.info(f"average upd duration if not dropped:{average}s")
     return average
Пример #4
0
 def latest_dr_ver(self):
     """
     Latest driver version per hour
     :return: list of driver versions per hour
     """
     dr_ver = [None] * 24
     no_dr_ver = []
     for hour in self.data_file.buckets:
         for pt in self.data_file.buckets[hour]:
             try:
                 pt_data = json.loads(pt['data'])
                 if 'dr_ver' in pt_data:
                     dr_ver[hour] = pt_data['dr_ver']
             except ValueError:
                 logger.error(f"Invalid json string: {pt['data']}")
                 raise ValueError
         if dr_ver[hour] is None:
             no_dr_ver.append(hour)
     if len(no_dr_ver) != 0:
         logger.info(f"no driver version in hours: {no_dr_ver}")
     logger.info(f"driver version: {dr_ver}")
     return dr_ver
Пример #5
0
 def neighbor_count(self):
     """
     Return data point's neighbors length
     :return: list of neighbors size per hour
     """
     neighbors = [0] * 24
     prev = 0
     for hour in self.data_file.buckets:
         for pt in self.data_file.buckets[hour]:
             if pt['type'] == "slow":
                 try:
                     pt_data = json.loads(pt['data'])
                     neighbors_count = len(pt_data['neighbors'])
                     neighbors[hour] = neighbors_count
                     prev = neighbors_count
                 except ValueError:
                     logger.error(f"Invalid json string: {pt['data']}")
                     raise ValueError
                 except KeyError:
                     logger.error(f"slow update at {pt['timestamp']} does not have neighbors")
         if neighbors[hour] == 0:
             neighbors[hour] = prev
     logger.info(f"neighbors: {neighbors}")
     return neighbors