Пример #1
0
    # Parse start and end dates for getting data
    if len(sys.argv) < 3:
        print(
            "Start and end dates are expected. Example: python index_data.py 20161122 20161130"
        )
    start_date_string = sys.argv[1]
    start_date = date(int(start_date_string[0:4]), int(start_date_string[4:6]),
                      int(start_date_string[6:8]))
    end_date_string = sys.argv[2]
    end_date = date(int(end_date_string[0:4]), int(end_date_string[4:6]),
                    int(end_date_string[6:8]))

    joined = []

    for current_date in common.dates_in_range(start_date, end_date):
        blob_prefix = current_date.strftime(
            '%Y/%m/%d/'
        )  #'{0}/{1}/{2}/'.format(current_date.year, current_date.month, current_date.day)
        joined += filter(
            lambda b: b.properties.content_length != 0,
            block_blob_service.list_blobs(joined_examples_container,
                                          prefix=blob_prefix))

    joined = map(common.parse_name, joined)
    joined = list(joined)

    def load_data(ts, blob):
        jd = common.JoinedData(block_blob_service, cache_folder,
                               joined_examples_container, ts, blob)
        if not os.path.exists(jd.filename + '.ids'):
Пример #2
0
    # Parse start and end dates for getting data
    if len(sys.argv) < 3:
        print("Start and end dates are expected. Example: python datascience.py 20161122 20161130")
    start_date_string = sys.argv[1]
    start_date = date(int(start_date_string[0:4]), int(start_date_string[4:6]), int(start_date_string[6:8]))
    
    # Lookback 'experimental_unit_duration_days' for events
    start_date_withlookback = start_date + timedelta(days = -int(experimental_unit_duration_days))
    
    end_date_string = sys.argv[2]
    end_date = date(int(end_date_string[0:4]), int(end_date_string[4:6]), int(end_date_string[6:8]))

    joined = []

    for current_date in common.dates_in_range(start_date_withlookback, end_date):
        blob_prefix = current_date.strftime('%Y/%m/%d/') #'{0}/{1}/{2}/'.format(current_date.year, current_date.month, current_date.day)
        joined += filter(lambda b: b.properties.content_length != 0, block_blob_service.list_blobs(joined_examples_container, prefix = blob_prefix))

    joined = map(common.parse_name, joined)
    joined = list(joined)
    
    global_idx = {}
    global_model_idx = {}
    data = []
    
    def load_data(ts, blob):
        jd = common.JoinedData(block_blob_service, cache_folder, joined_examples_container, ts, blob)
        jd.index()
        return jd
Пример #3
0
        )
    start_date_string = sys.argv[1]
    start_date = date(int(start_date_string[0:4]), int(start_date_string[4:6]),
                      int(start_date_string[6:8]))

    # Lookback 'experimental_unit_duration_days' for events
    start_date_withlookback = start_date + timedelta(
        days=-int(experimental_unit_duration_days))

    end_date_string = sys.argv[2]
    end_date = date(int(end_date_string[0:4]), int(end_date_string[4:6]),
                    int(end_date_string[6:8]))

    joined = []

    for current_date in common.dates_in_range(start_date_withlookback,
                                              end_date):
        blob_prefix = current_date.strftime(
            '%Y/%m/%d/'
        )  #'{0}/{1}/{2}/'.format(current_date.year, current_date.month, current_date.day)
        joined += filter(
            lambda b: b.properties.content_length != 0,
            block_blob_service.list_blobs(joined_examples_container,
                                          prefix=blob_prefix))

    joined = map(common.parse_name, joined)
    joined = list(joined)

    global_idx = {}
    global_model_idx = {}
    data = []
Пример #4
0
    joined_examples_container = ds['JoinedExamplesContainer']

    # https://azure-storage.readthedocs.io/en/latest/_modules/azure/storage/blob/models.html#BlobBlock
    block_blob_service = BlockBlobService(connection_string=self.config['AzureBlobStorageAuthorization']['$Default'])

    # Parse start and end dates for getting data
    if len(sys.argv) < 3:
        print("Start and end dates are expected. Example: python index_data.py 20161122 20161130")
    start_date_string = sys.argv[1]
    start_date = date(int(start_date_string[0:4]), int(start_date_string[4:6]), int(start_date_string[6:8]))
    end_date_string = sys.argv[2]
    end_date = date(int(end_date_string[0:4]), int(end_date_string[4:6]), int(end_date_string[6:8]))

    joined = []

    for current_date in common.dates_in_range(start_date, end_date):
        blob_prefix = current_date.strftime('%Y/%m/%d/') #'{0}/{1}/{2}/'.format(current_date.year, current_date.month, current_date.day)
        joined += filter(lambda b: b.properties.content_length != 0, block_blob_service.list_blobs(joined_examples_container, prefix = blob_prefix))

    joined = map(common.parse_name, joined)
    joined = list(joined)
   
    def load_data(ts, blob):
        jd = common.JoinedData(block_blob_service, cache_folder, joined_examples_container, ts, blob)
        if not os.path.exists(jd.filename + '.ids'):
            print("Indexing " + jd.filename)
            subprocess.call("C:\\work\\mwt-ds\\x64\\Release\\LogCookig.exe " + jd.filename)

    data = []
    print("Downloading & indexing events...")
    with Pool(processes = 4) as p: