def worker_task(instance_no, total_instances, bin_data_source_blob): """ get the task for the worker arguments contains the various parameters that will be used by the machines to process the data like file numbers instance_no belongs to [0, total_instances - 1] :param instance_no: the instance_no, this process is running on :param total_instances: total no. of instances :param bin_data_source_blob: blob name of for binary data """ if log: log_info = log.info else: log_info = print_alias BIN_DATA_STORAGE = os.path.expanduser( '~/raw_data') # binary will be stored in ~/raw_data PROCESSED_DATA_BLOB_NAME = "processed/" + bin_data_source_blob # blob name for processed data PROCESSED_DATA_STORAGE = os.path.expanduser( '~/' + PROCESSED_DATA_BLOB_NAME) # processed data storage loc assigned_blobs = assign_files(instance_no=instance_no, total_instances=total_instances, bin_data_source_blob=bin_data_source_blob) log_info("Instance_no: {}".format(instance_no)) log_info('Blobs assigned: ' + str(assigned_blobs)) # downloading the files file_names = [] for blob in assigned_blobs: # downloading bin files rel_file_name = blob.name.replace(bin_data_source_blob, '') joinable_rel_file_name = get_joinable_rear_path(rel_file_name) filename = os.path.join( BIN_DATA_STORAGE, joinable_rel_file_name) # absolute path for raw_data make_dirs(os.path.dirname(filename)) blob.download_to_filename(filename) log_info('File {} downloaded to {}'.format(str(blob.name), filename)) file_names.append(filename) save_names = [] upload_names = [] for filename in file_names: # processing the file save_filename = filename.replace(BIN_DATA_STORAGE, PROCESSED_DATA_STORAGE).replace( '.bin', '.json') make_dirs(os.path.dirname(save_filename)) log_parser.main(log, filename=filename, save_filename=save_filename) save_names.append(save_filename) # uploading the file upload_name = save_filename.replace(os.path.expanduser('~/'), '') upload_blob(source_file_path=save_filename, destination_blob_name=upload_name, bucket_name=BUCKET_NAME) upload_names.append(upload_name)