Exemplo n.º 1
0
def _load(det_name, year, month, day, traffic_type, missing_data):
    # faverolles 1/18/2020: Reworked the downloading operation
    #   No longer saves all of the "fail" files
    #   Checks if global option to download data files is "TRUE"
    """ Return raw data of the detector as list """
    if det_name is None:
        raise Exception("Detector number must be passed")

    dirname = str(year) + str(month).zfill(2) + str(day).zfill(2)
    remote_file = cfg.TRAFFIC_DATA_URL + '/' + str(
        year) + '/' + dirname + '/' + det_name + traffic_type.extension
    date = datetime.date(year, month, day)
    data = _read_cached_data_file(det_name, date, traffic_type)

    if data is not None:
        return _convert_to_list(data, traffic_type)

    if global_settings.DOWNLOAD_TRAFFIC_DATA_FILES:
        print(f"Downloading traffic data file [{remote_file}]")
        try:
            with http.get_url_opener(remote_file, timeout=30) as res:
                bin_data = res.read()
                data = _convert_to_list(bin_data, traffic_type)
                if not data:
                    return missing_data
                _save_file_to_cache(det_name, date, bin_data, traffic_type)
                return data
        except Exception as e:
            print(
                f"Exception downloading traffic data(file=[{remote_file}], reason=[{str(e)}]"
            )
            return missing_data

    return missing_data
Exemplo n.º 2
0
def _load(device_name, year, month, day, opt, only_download=False, n_try=1):
    """ Return raw data of the device as list """
    date = datetime.date(year, month, day)
    data = _cached_data(device_name, date, opt)
    if data != None:
        return _convert_to_list(data, opt) if only_download == False else None

    # not cached, check if failed device
    if _is_failed(device_name, date, opt):
        logging.debug("Device " + str(device_name) + " is failed device")
        return [-1] * (cfg.SAMPLES_PER_DAY) if only_download == False else None

    dirname = str(year) + str(month).zfill(2) + str(day).zfill(2)
    remote_file = cfg.TRAFFIC_DATA_URL + '/' + str(
        year) + '/' + dirname + '/' + str(device_name) + opt['ext']

    try:
        with http.get_url_opener(remote_file) as res:
            binData = res.read()
    except url_error.HTTPError as e:
        logging.debug(
            'Could not get the weather sensor data (sensor={}, reason={}, http_code={})'
            .format(device_name, str(e.reason), e.code))
        if e.code == 404:
            open(_cache_path(_CACHE_FAIL, device_name, date, opt), 'w').close()
        return [-1] * (cfg.SAMPLES_PER_DAY) if only_download == False else None
    except url_error.URLError as e:
        logging.critical(
            'Could not connect to weather sensor {} (reason={})'.format(
                device_name, str(e.reason)))
        return [-1] * (cfg.SAMPLES_PER_DAY) if only_download == False else None
    except ConnectionResetError as e:
        logging.critical(
            'HTTP Connection has been reset. (file={}, reason={})'.format(
                remote_file, e.errno))
        if n_try <= MAX_TRY_NUM:
            logging.critical('Retrying...')
            time.sleep(1)
            return _load(device_name,
                         year,
                         month,
                         day,
                         opt,
                         only_download=only_download,
                         n_try=n_try + 1)
        return [-1] * (cfg.SAMPLES_PER_DAY) if only_download == False else None

    _cache(device_name, date, binData, opt)

    if only_download:
        return

    data = _convert_to_list(binData, opt)

    return data
def _load_testing(det_name,
                  year,
                  month,
                  day,
                  traffic_type,
                  only_download=False,
                  n_try=1):
    def _cached_data_local(detector_name, date_val, traffic_type_val):
        """
        :rtype: None or String
        """
        cache_path = _cache_path(CACHE_TYPE_DET, detector_name, date_val,
                                 traffic_type_val)
        if not os.path.exists(cache_path):
            return None
        with open(cache_path, 'rb') as cache_file:
            return cache_file.read()

    if det_name is None:
        raise Exception("Detector number must be passed")

    missing_data = [cfg.MISSING_VALUE] * cfg.SAMPLES_PER_DAY
    date = datetime.date(year, month, day)
    cached_data = _cached_data_local(det_name, date, traffic_type)

    if cached_data is not None:
        ret_val = _convert_to_list(
            cached_data, traffic_type) if only_download is False else None
        return ret_val

    # not cached, check if failed detector
    if _is_failed(det_name, date, traffic_type):
        # logging.debug("Detector " + det_name + " is failed detector")
        return missing_data if only_download is False else None

    dir_name = str(year) + str(month).zfill(2) + str(day).zfill(2)
    remote_file = cfg.TRAFFIC_DATA_URL + '/' + str(
        year) + '/' + dir_name + '/' + det_name + traffic_type.extension

    try:
        if n_try > 1:
            logging.critical('Retrying... (n_try=%d)' % n_try)

        with http.get_url_opener(remote_file, timeout=30) as res:
            binData = res.read()
            data = _convert_to_list(binData, traffic_type)
            if not data:
                return missing_data
            _cache(det_name, date, binData, traffic_type)
            return data

    except url_error.HTTPError as e:
        logging.debug(
            'Could not get the remote file (file={}, reason={}, http_code={})'.
            format(remote_file, str(e.reason), e.code))
        if e.code == 404:
            open(_cache_path(CACHE_TYPE_FAIL, det_name, date, traffic_type),
                 'w').close()
        return missing_data if only_download is False else None
    except url_error.URLError as e:
        logging.critical(
            'Could not connect to the server (file={}, reason={})'.format(
                remote_file, str(e.reason)))
        return missing_data if only_download is False else None
    except ConnectionResetError as e:
        logging.critical(
            'HTTP Connection has been reset. (file={}, reason={})'.format(
                remote_file, e.errno))
        if n_try <= MAX_TRY_NUM:
            time.sleep(1)
            return _load_testing(det_name,
                                 year,
                                 month,
                                 day,
                                 traffic_type,
                                 only_download=False,
                                 n_try=n_try + 1)
        else:
            logging.critical('  - fail to get data')
        return missing_data if only_download is False else None
    except Exception as e:
        logging.critical(
            'Exception occured while downloading traffic data(file={}, reason={})'
            .format(remote_file, str(e)))
        return missing_data if only_download is False else None
def _load(det_name,
          year,
          month,
          day,
          traffic_type,
          only_download=False,
          n_try=1):
    """ Return raw data of the detector as list """
    if det_name == None:
        raise Exception("Detector number must be passed")

    missing_data = [cfg.MISSING_VALUE] * cfg.SAMPLES_PER_DAY
    date = datetime.date(year, month, day)
    data = _cached_data(det_name, date, traffic_type)

    if data != None:
        return _convert_to_list(
            data, traffic_type) if only_download == False else None

    # not cached, check if failed detector
    if _is_failed(det_name, date, traffic_type):
        # logging.debug("Detector " + det_name + " is failed detector")
        return missing_data if only_download == False else None

    dirname = str(year) + str(month).zfill(2) + str(day).zfill(2)
    remote_file = cfg.TRAFFIC_DATA_URL + '/' + str(
        year) + '/' + dirname + '/' + det_name + traffic_type.extension

    # faverolles 1/13/2020 NOTE: DATA DOWNLOAD TEMPORARY FIX
    #   Temporary fix to stop the server from trying to download
    #   the traffic data files past the year specified by dataloader.py
    #   The server config while developing is Aug 1~31 2018 but the server
    #   continually tries to download 2019 traffic data (hundred+ GB) so
    #   the travel time calculation never finishes
    if str(year) > "2018":
        return missing_data

    try:
        if n_try > 1:
            logging.critical('Retrying... (n_try=%d)' % n_try)

        with http.get_url_opener(remote_file, timeout=30) as res:
            binData = res.read()
            data = _convert_to_list(binData, traffic_type)
            if not data:
                return missing_data
            _cache(det_name, date, binData, traffic_type)
            return data

    except url_error.HTTPError as e:
        logging.debug(
            'Could not get the remote file (file={}, reason={}, http_code={})'.
            format(remote_file, str(e.reason), e.code))
        if e.code == 404:
            open(_cache_path(CACHE_TYPE_FAIL, det_name, date, traffic_type),
                 'w').close()
        return missing_data if only_download == False else None
    except url_error.URLError as e:
        logging.critical(
            'Could not connect to the server (file={}, reason={})'.format(
                remote_file, str(e.reason)))
        return missing_data if only_download == False else None
    except ConnectionResetError as e:
        logging.critical(
            'HTTP Connection has been reset. (file={}, reason={})'.format(
                remote_file, e.errno))
        if n_try <= MAX_TRY_NUM:
            time.sleep(1)
            return _load(det_name,
                         year,
                         month,
                         day,
                         traffic_type,
                         only_download=False,
                         n_try=n_try + 1)
        else:
            logging.critical('  - fail to get data')
        return missing_data if only_download == False else None
    except Exception as e:
        logging.critical(
            'Exception occured while downloading traffic data(file={}, reason={})'
            .format(remote_file, str(e)))
        return missing_data if only_download == False else None
def _read_from_web(page_name,
                   group_id,
                   site_id,
                   sen_id,
                   date,
                   e_date,
                   n_try=1):
    """  thread worker to get data from web page

    :param page_name: web page name of scanWeb site
    :type page_name: str

    :type group_id: str
    :type site_id: str

    :param sen_id: sensor id (?), required param for uri request
    :type sen_id: int

    :param date: target date
    :type date: datetime.datetime

    :param date: end date
    :type e_date: datetime.datetime
    """
    remote_file = '{0}{1}?Units=English&Siteid={2}&Senid={3}&HEndDate={4}'.format(
        cfg.SCANWEB_URL, page_name, site_id, sen_id,
        '{0}%2F{1}%2F{2}'.format(e_date.month, e_date.day, e_date.year))
    logging.debug('RWIS(Export) URL : %s' % remote_file)
    try:
        with http.get_url_opener(remote_file) as res:
            html = res.read()
    except url_error.HTTPError as e:
        logging.debug(
            'Could not access html of ScanWeb site (reason={}, http_code={})'.
            format(str(e.reason), e.code))
        logging.debug('URL : {}'.format(remote_file))
        if e.code == 404:
            rwis_cache.fail(group_id, site_id, date, src_type=SRC_TYPE)
        return None
    except url_error.URLError as e:
        logging.critical(
            'Could not connect to ScanWeb site (reason={})'.format(
                str(e.reason)))
        logging.debug('URL : {}'.format(remote_file))
        return None
    except ConnectionResetError as e:
        logging.critical(
            'HTTP Connection has been reset. (file={}, reason={})'.format(
                remote_file, e.errno))
        if n_try <= MAX_TRY_NUM:
            logging.critical('Retrying...')
            time.sleep(1)
            return _read_from_web(page_name,
                                  group_id,
                                  site_id,
                                  sen_id,
                                  date,
                                  e_date,
                                  n_try=n_try + 1)
        return None

    try:
        return _parse(html.decode())
    except TypeError as ex:
        logging.critical('Could not parse the exported data from ScanWeb site')
        tb.traceback(ex)

    return None
Exemplo n.º 6
0
        def __init__(self, extension):
            self.extension = extension

    ticas.initialize(global_settings.DATA_PATH)
    infra = Infra.get_infra()
    sdt_str = input('# Enter start date to load data (e.g. 2015-01-01) : ')
    edt_str = input('# Enter end date to load data (e.g. 2017-12-31) : ')
    days = gen_days(start_date_string=sdt_str, end_date_string=edt_str)
    formats = ("v30", "c30")
    success_count = 0
    fail_count = 0
    for day in days:
        dirname = str(day.year) + str(day.month).zfill(2) + str(
            day.day).zfill(2)
        print("Downloading traffic data for {}".format(dirname))
        for detector in infra.detectors:
            for format in formats:
                remote_file = cfg.TRAFFIC_DATA_URL + '/' + str(
                    day.year) + '/' + dirname + '/' + detector + "." + format
                try:
                    with http.get_url_opener(remote_file, timeout=30) as res:
                        binData = res.read()
                        if binData:
                            _save_file_to_cache(detector, day, binData,
                                                TrafficType("." + format))
                            success_count += 1
                except Exception as e:
                    fail_count += 1
    print("Success: {}".format(success_count))
    print("Fail: {}".format(fail_count))