Python get_location_metadata_by_store 예제들, util.metadata_service.get_location_metadata_by_store Python 예제들

예제 #1

0

파일 보기

파일: san.py 프로젝트: petercable/stream_engine

def fetch_full_san_data(stream_key, time_range, location_metadata=None):
    """
    Given a time range and stream key.  Genereate all data in the inverval using data
    from the SAN.
    :param stream_key:
    :param time_range:
    :return:
    """
    if location_metadata is None:
        location_metadata = get_location_metadata_by_store(stream_key, time_range, SAN_LOCATION_NAME)
    # get which bins we can gather data from
    ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True)
    if not os.path.exists(ref_des_dir):
        log.warning("Reference Designator does not exist in offloaded DataSAN")
        return None
    data = []
    next_index = 0
    for time_bin in location_metadata.bin_list:
        direct = dir_string.format(time_bin)
        if os.path.exists(direct):
            # get data from all of the  deployments
            deployments = os.listdir(direct)
            for deployment in deployments:
                full_path = os.path.join(direct, deployment)
                if os.path.isdir(full_path):
                    new_data = get_deployment_data(full_path, stream_key.stream_name, -1, time_range,
                                                   index_start=next_index)
                    if new_data is not None:
                        data.append(new_data)
                        # Keep track of indexes so they are unique in the final dataset
                        next_index += len(new_data['index'])
    if not data:
        return None
    return xr.concat(data, dim='index')

예제 #2

0

파일 보기

def fetch_nsan_data(stream_key,
                    time_range,
                    num_points=1000,
                    location_metadata=None):
    """
    Given a time range and stream key.  Genereate evenly spaced times over the inverval using data
    from the SAN.
    :param stream_key:
    :param time_range:
    :param num_points:
    :return:
    """
    if location_metadata is None:
        location_metadata = get_location_metadata_by_store(
            stream_key, time_range, SAN_LOCATION_NAME)
    ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True)
    if not os.path.exists(ref_des_dir):
        log.warning("Reference Designator does not exist in offloaded SAN")
        return None
    to_sample = get_SAN_samples(num_points, location_metadata)
    # now get data in the present we are going to start by grabbing first file in the directory with name that matches
    # grab a random amount of particles from that file if they are within the time range.
    missed = 0
    data = []
    next_index = 0
    futures = []
    for time_bin, num_data_points in to_sample:
        direct = dir_string.format(time_bin)
        if os.path.exists(direct):
            # get data from all of the  deployments
            deployments = os.listdir(direct)

            for deployment in deployments:
                full_path = os.path.join(direct, deployment)
                if os.path.isdir(full_path):
                    futures.append(
                        san_threadpool.apply_async(
                            get_deployment_data,
                            (full_path, stream_key.stream_name,
                             num_data_points, time_range),
                            kwds={'index_start': next_index}))
        else:
            missed += num_data_points

    for future in futures:
        new_data = future.get()
        if new_data is None:
            missed += num_data_points
            continue
        count = len(new_data['index'])
        missed += (num_data_points - count)
        data.append(new_data)
        # keep track of the indexes so that the final dataset has unique indices
        next_index += len(new_data['index'])

    log.warn(
        "SAN: Failed to produce {:d} points due to nature of sampling".format(
            missed))
    return compile_datasets(data)

예제 #3

0

파일 보기

파일: san.py 프로젝트: petercable/stream_engine

def fetch_nsan_data(stream_key, time_range, num_points=1000, location_metadata=None):
    """
    Given a time range and stream key.  Genereate evenly spaced times over the inverval using data
    from the SAN.
    :param stream_key:
    :param time_range:
    :param num_points:
    :return:
    """
    if location_metadata is None:
        location_metadata = get_location_metadata_by_store(stream_key, time_range, SAN_LOCATION_NAME)
    ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True)
    if not os.path.exists(ref_des_dir):
        log.warning("Reference Designator does not exist in offloaded SAN")
        return None
    to_sample = get_SAN_samples(num_points, location_metadata)
    # now get data in the present we are going to start by grabbing first file in the directory with name that matches
    # grab a random amount of particles from that file if they are within the time range.
    missed = 0
    data = []
    next_index = 0
    futures = []
    for time_bin, num_data_points in to_sample:
        direct = dir_string.format(time_bin)
        if os.path.exists(direct):
            # get data from all of the  deployments
            deployments = os.listdir(direct)

            for deployment in deployments:
                full_path = os.path.join(direct, deployment)
                if os.path.isdir(full_path):
                    futures.append(
                            san_threadpool.apply_async(get_deployment_data,
                                                       (full_path, stream_key.stream_name, num_data_points, time_range),
                                                       kwds={'index_start': next_index}))
        else:
            missed += num_data_points

    for future in futures:
        new_data = future.get()
        if new_data is None:
            missed += num_data_points
            continue
        count = len(new_data['index'])
        missed += (num_data_points - count)
        data.append(new_data)
        # keep track of the indexes so that the final dataset has unique indices
        next_index += len(new_data['index'])

    log.warn("SAN: Failed to produce {:d} points due to nature of sampling".format(missed))
    return compile_datasets(data)

예제 #4

0

파일 보기

파일: cass.py 프로젝트: petercable/stream_engine

def fetch_all_data(stream_key, time_range, location_metadata=None):
    """
    Given a time range, Fetch all records from the starting hour to ending hour
    :param stream_key:
    :param time_range:
    :return:
    """
    if location_metadata is None:
        location_metadata = get_location_metadata_by_store(stream_key, time_range, CASS_LOCATION_NAME)
    cols = SessionManager.get_query_columns(stream_key.stream.name)

    rows = []
    for bin_num in location_metadata.bin_list:
        rows.extend(execute_unlimited_query(stream_key, cols, bin_num, time_range))

    return cols, rows

예제 #5

0

파일 보기

def fetch_full_san_data(stream_key, time_range, location_metadata=None):
    """
    Given a time range and stream key.  Genereate all data in the inverval using data
    from the SAN.
    :param stream_key:
    :param time_range:
    :return:
    """
    if location_metadata is None:
        location_metadata = get_location_metadata_by_store(
            stream_key, time_range, SAN_LOCATION_NAME)
    # get which bins we can gather data from
    ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True)
    if not os.path.exists(ref_des_dir):
        log.warning("Reference Designator does not exist in offloaded DataSAN")
        return None
    data = []
    next_index = 0
    for time_bin in location_metadata.bin_list:
        direct = dir_string.format(time_bin)
        if os.path.exists(direct):
            # get data from all of the  deployments
            deployments = os.listdir(direct)
            for deployment in deployments:
                full_path = os.path.join(direct, deployment)
                if os.path.isdir(full_path):
                    new_data = get_deployment_data(full_path,
                                                   stream_key.stream_name,
                                                   -1,
                                                   time_range,
                                                   index_start=next_index)
                    if new_data is not None:
                        data.append(new_data)
                        # Keep track of indexes so they are unique in the final dataset
                        next_index += len(new_data['index'])
    if not data:
        return None
    return xr.concat(data, dim='index')