Exemplo n.º 1
0
 def query_cross_month_site_data(self, site_id_list, starttime, endtime):
     if starttime[5:7] == endtime[5:7]:
         site_data = self.query_site_data_within_one_month(site_id_list, starttime, endtime)
         return site_data
     else:
         month_endtime = tu.datetime_to_string(
             tu.time_str_to_datetime(
                 tu.last_day_of_a_month(
                     tu.time_str_to_datetime(starttime))[0]))
         month_starttime = tu.datetime_to_string(
             tu.time_str_to_datetime(
                 tu.first_day_of_a_month(
                     tu.time_str_to_datetime(endtime))[0]))
         # 获取上个月子站数据
         site_data_last_month = self.query_site_data_within_one_month(
             site_id_list, starttime, month_endtime)
         # 获取当月子站数据
         site_data_this_month = self.query_site_data_within_one_month(
             site_id_list, month_starttime, endtime)
         # 数据合并
         # 数据合并
         if site_data_last_month is not None:
             site_data = site_data_last_month.append(site_data_this_month)
         else:
             site_data = site_data_this_month
     if site_data is None:
         raise NoneDfError('未返回df数据,可能数据库连接有异常')
     elif site_data.empty:
         raise EmptyDfError('查询数据库获取的df数据为空')
     return site_data
Exemplo n.º 2
0
    def merge_adj_data_cross_month(self, device_list, start_time, end_time):
        # 跨月取数时,对不同月份的数据进行合并
        # 判断是否时同一个月的数据
        if start_time[5:7] == end_time[5:7]:
            adj_data = self.query_adj_data_within_one_month(
                device_list, start_time, end_time)
            return adj_data
        else:
            # 获取上月的月末时间
            month_endtime = tu.datetime_to_string(
                tu.time_str_to_datetime(
                    tu.last_day_of_a_month(
                        tu.time_str_to_datetime(start_time))[0]))

            month_starttime = tu.datetime_to_string(
                tu.time_str_to_datetime(
                    tu.first_day_of_a_month(
                        tu.time_str_to_datetime(end_time))[0]))
            # 获取上个月的adj数据
            adj_data_last_month = self.query_adj_data_within_one_month(
                device_list, start_time, month_endtime)
            # 获取当月的adj数据
            adj_data_this_month = self.query_adj_data_within_one_month(
                device_list, month_starttime, end_time)
            # 合并org数据
            if adj_data_last_month is not None:
                adj_data = adj_data_last_month.append(adj_data_this_month)
                return adj_data
            else:
                adj_data_this_month
Exemplo n.º 3
0
 def merge_org_data_by_month(self, org_x, device_list, endtime, starttime=None):
     # 跨月取数时,对不同月份的数据进行合并,同一个org_x
     # 判断是否时同一个月的数据
     if starttime is None or starttime[5:7] == endtime[5:7]:
         org_data = self.query_org_data_by_org_x(
             org_x, device_list, endtime, starttime)
         return org_data
     else:
         # 获取上月的月末时间
         month_endtime = tu.datetime_to_string(
             tu.time_str_to_datetime(
                 tu.last_day_of_a_month(
                     tu.time_str_to_datetime(starttime))[0]))
         # 获取当月的月初时间
         month_starttime = tu.datetime_to_string(
             tu.time_str_to_datetime(
                 tu.first_day_of_a_month(
                     tu.time_str_to_datetime(endtime))[0]))
         # 获取上个月的org数据
         org_data_last_month = self.query_org_data_by_org_x(
             org_x, device_list, month_endtime, starttime)
         # 获取当月的org数据
         org_data_this_month = self.query_org_data_by_org_x(
             org_x, device_list, endtime, month_starttime)
         # 合并org数据
         org_data = org_data_last_month.append(org_data_this_month)
         return org_data
Exemplo n.º 4
0
    def plot_overall_original_data(self, var, base_dev_channel_dict,
                                   qe_dev_channel_list):
        '''
        Args:
            var: 需要画图的参数名
            base_dev_channel_dict: {DEV_ID:CHANNEL_LIST} 基准设备ID及通道
            qe_dev_channel_list: 需要被质保设备在var这个参数下的通道
            data_type: 画图的数据类型(原始数据或者是处理后的数据), 'original' or 'processed'
        '''
        fig = plt.figure(figsize=(12, 8))
        ax = fig.add_subplot(111)
        base_df = pd.read_csv(self.file_utility.get_orig_base_data_path(var))
        # 把captime转化为日期形式
        base_df['CAP_TIME'] = base_df.apply(
            lambda x: tu.time_str_to_datetime(x.CAP_TIME), axis=1)
        for dev in base_dev_channel_dict.keys():
            for channel in base_dev_channel_dict[dev]:
                tmp_df = base_df[(base_df['DEV_ID'] == dev)][[
                    'CAP_TIME', channel
                ]]
                tmp_df.sort_values(by=['CAP_TIME'], inplace=True)
                ax.plot(tmp_df['CAP_TIME'],
                        tmp_df[channel],
                        label='{}:{}'.format(dev, channel))
        ax.legend()
        ax.set_title(
            '{}: Base devices with base channels [original]'.format(var),
            fontsize=14)
        plt.savefig(self.file_utility.get_orig_overall_base_plot(var))

        # 画出要被质保的数据的原图
        no_qe_data = True
        fig = plt.figure(figsize=(12, 8))
        ax = fig.add_subplot(111)
        qe_df = pd.read_csv(self.file_utility.get_orig_qe_data_path())

        qe_df['CAP_TIME'] = qe_df.apply(
            lambda x: tu.time_str_to_datetime(x.CAP_TIME), axis=1)
        qe_dev = qe_df['DEV_ID'].unique()
        for dev in qe_dev:
            for channel in qe_dev_channel_list:
                tmp_df = qe_df[qe_df['DEV_ID'] == dev][['CAP_TIME', channel]]
                if tmp_df.empty:
                    continue
                no_qe_data = False
                tmp_df.sort_values(by=['CAP_TIME'], inplace=True)
                ax.plot(tmp_df['CAP_TIME'], tmp_df[channel])

        if not no_qe_data:
            ax.set_title(
                '{}: Devices to be quality ensured [original]'.format(var),
                fontsize=14)
            plt.savefig(self.file_utility.get_orig_overall_qe_plot(var))

        plt.close()
Exemplo n.º 5
0
    def query_site_train_data_by_city(self, cityid, hour, is_cache=False):
        """
        # 按照配置的训练时间长度获取子站数据,训练时间长度 = self.n_days_train_data

        Args:
            cityid: 需要查询子站数据的城市id列表
            hour: 目标小时,查询时按照self.n_days_train_data的配置往前推n天
            is_cache: 是否启动缓存,如果启动缓存,则先查询全国数据,然后存储到内存中下次使用
        """
        start_hour = tu.datetime_n_days_before_string(tu.time_str_to_datetime(hour), self.n_days_train_data)
        end_hour = hour

        if is_cache == False:
            return self.query_site_data_by_time_interval(start_hour, end_hour, cityid)
        else:
            if self.air_quality_data.empty:
                self.air_quality_data = self.query_site_data_by_time_interval(start_hour, end_hour)

            # 先获取城市子站id
            device_info_df = self.active_devices_info[
                self.active_devices_info['CITYID'].isin(cityid)]
            device_info_df['RELATE_SITE_ID'] = device_info_df['RELATE_SITE_ID'].astype('int64')
            site_id = device_info_df[device_info_df['RELATE_SITE_ID'] > 0]['RELATE_SITE_ID'].values
            # 从已经缓存的self.air_quality_data中匹配数据
            site_data = self.air_quality_data[
                self.air_quality_data['SITE_ID'].isin(site_id)]
            return site_data
Exemplo n.º 6
0
 def query_capture_data_by_hour(self, hour, device_info_df):
     # 获取起始时间,例如2018-12-12 19:00:00 至 2018-12-12 19:59:59
     starttime = tu.datetime_n_hours_before_string(
         tu.time_str_to_datetime(hour), 1)
     endtime = starttime[0:13] + ':59:59'
     cap_data = self.query_capture_data_cross_hour(starttime, endtime, device_info_df)
     return cap_data
Exemplo n.º 7
0
 def query_qc_dev_org_data_by_city_month(self, city_id, hour):
     # 获取某个城市质控设备的7天数据
     try:
         # 获取城市的设备信息
         device_info_df = self.query_active_devices_by_city(city_id)
         device_info_df['RELATE_SITE_ID'] = device_info_df.apply(
             lambda x: int(x.RELATE_SITE_ID), axis=1)
         # device_info_df['RELATE_SITE_ID'] = device_info_df['RELATE_SITE_ID'].astype('int64')
         # 获取质控设备信息
         device_info_df = device_info_df[
             device_info_df['RELATE_SITE_ID'] > 0]
         # 获取hour7天之前的时间点
         starttime = tu.datetime_n_days_before_string(
             tu.time_str_to_datetime(hour), self.n_days_train_data)
         # 获取org数据
         finall_org_data = self.query_org_data_for_device_info_df(
             hour, device_info_df, starttime)
         return finall_org_data
     except BaseError as e:
         e.setdata({
             'key': 'query_qc_dev_org_data_by_city_month',
             'hour': hour,
             'city_id': city_id
         })
         logger.error('code:%s,name:%s,message:%s,data:%s',
                      e.code,
                      e.name,
                      e.message,
                      e.getdata(),
                      exc_info=True)
         raise OrgValueError('ORG数据有问题')
Exemplo n.º 8
0
    def determine_scenario(self, X, var, hour):
        '''
        为每一种变量确定其是否属于特殊的场景
            case 1: 如果不属于特殊场景,则应return QCScenarios.NORMAL
            case 2: 如果根据config应该插值,则return QCScenarios.INTERPOLATION
            case 3: 如果是扬尘场景,则return QCScenarios.SAND_STORM
        '''
        # print(X)
        cur_hour_df = X
        if cur_hour_df.empty:
            return self.scenarios.INTERPOLATION

        if var == 'PM10':
            hour_n = tu.datetime_n_hours_before_string(
                tu.time_str_to_datetime(hour), self.num_hours)
            logger.info('determine_scenario{}'.format(hour_n))
            cur_hour_df = cur_hour_df[cur_hour_df['TIMESTAMP'] > hour_n]
            # cur_hour_df.to_csv('determine_scenario.csv')
            logger.info("determine_scenario:{}".format(
                str(cur_hour_df['SITE_PM10/SITE_PM25'].mean())))
            logger.info("determine_scenario:{}".format(
                str(cur_hour_df['PM10/PM25'].mean())))

            # if cur_hour_df['SITE_PM10/SITE_PM25'].mean() >= \
            #    self.sand_storm_thres:
            if ((cur_hour_df['SITE_PM10/SITE_PM25'].mean() >=self.sand_storm_thres)
                and (cur_hour_df['SITE_PM10/SITE_PM25'].mean() - cur_hour_df['PM10/PM25'].mean()) >=self.sand_storm_dev_deviation_threshold) \
                    or ((cur_hour_df['SITE_PM10/SITE_PM25'].mean() >=self.sand_storm_thres) and math.isnan(np.mean(cur_hour_df['PM10/PM25']))):
                return self.scenarios.SAND_STORM

        return self.scenarios.NORMAL
Exemplo n.º 9
0
 def __init__(self, config, hour):
     super().__init__()
     self.section = 'MYSQL-SENSOR1A'
     self.config = config
     self.n_days_train_data = self.config.get_config_global_data('num_days_for_training')
     self.n_days_train_data = int(self.n_days_train_data)
     start_hour = tu.datetime_n_days_before_string(tu.time_str_to_datetime(hour), self.n_days_train_data)
     self.active_devices_info = self.query_all_active_devices_info(hour)
     abbr_df = self.query_abbr()
Exemplo n.º 10
0
def back_calculate(city_id_list, start_time, end_time, is_for_org=False):
    time1 = time.time()
    start_datatime = tu.time_str_to_datetime(start_time)
    end_datatime = tu.time_str_to_datetime(end_time)
    n_hours = int((end_datatime - start_datatime).seconds/3600)
    for city_id in city_id_list:
        for i in range(0, n_hours+1):
            hour = tu.datetime_n_hours_after_string(start_datatime, i)
            print("开始回算城市:%s。时间:%s"%(city_id,hour))
            if is_for_org:
                bc = BackCalculation(hour)
                bc.execute_back_calculation(hour, city_id, is_for_org=True)
                print('')
            else:
                bc = BackCalculation(hour)
                bc.execute_back_calculation(hour, city_id)
                print('')
    time2 = time.time()
    print("补算城市{},耗时{}".format(city_id, time2 - time1))
Exemplo n.º 11
0
def main():
    starttime = '2018-12-13 15:00:00'
    pool = multiprocessing.Pool(processes=12)
    result = []
    for i in range(24 * 7):
        hour = tu.datetime_n_hours_before_string(
            tu.time_str_to_datetime(starttime), i)
        print(hour)
        result = pool.apply_async(t_agg_capture, (hour, ))
    pool.close()
    pool.join()
    if result.successful():
        print('successful')
Exemplo n.º 12
0
 def prepare_train(self, features):
     if features.empty:
         logger.info('训练数据为空')
     else:
         hour_n = tu.datetime_n_hours_before_string(
             tu.time_str_to_datetime(self.hour),
             self.num_hours)
         features = features[features['TIMESTAMP'] >= hour_n]
         if features.empty:
             logger.info('训练数据为空')
             return None
         else:
             return features