Пример #1
0
 def download(k):
   try:
     return cloud_helpers.download_file_from_s3(bucket, k)
   except:
     try:
       return cloud_helpers.download_file_from_s3(bucket, k)
     except Exception, e:
       return e
Пример #2
0
def process_s3_file(
    input_bucket_name, input_key_name, output_bucket_name_1ms=None, output_bucket_name_100ms=None, overwrite=False
):

    input_filename = cloud_helpers.download_file_from_s3(input_bucket_name, input_key_name)
    dest_1ms, dest_100ms = output_filenames(input_filename, cloud_helpers.scratch_dir())

    out_key_name_1ms = os.path.split(dest_1ms)[1]
    out_key_name_100ms = os.path.split(dest_100ms)[1]

    if output_bucket_name_1ms is None:
        output_bucket_name_1ms = input_bucket_name + "-hdf-1ms"

    if output_bucket_name_100ms is None:
        output_bucket_name_100ms = input_bucket_name + "-hdf"

    feature_names = extractor.feature_names()

    if (
        not overwrite
        and cloud_helpers.hdf_already_on_s3(output_bucket_name_1ms, out_key_name_1ms, feature_names)
        and cloud_helpers.hdf_already_on_s3(output_bucket_name_100ms, out_key_name_100ms, feature_names)
    ):

        print "HDFs on S3 have same features, so skipping this input..."
        return
    else:
        print "HDFs either not on S3 or have different features..."

    # In some weird situation we might have a local copy of the HDF already
    # finished but it just might not have been uploaded yet
    if (
        not overwrite
        and hdf.complete_hdf_exists(dest_1ms, feature_names)
        and hdf.complete_hdf_exists(dest_100ms, feature_names)
    ):
        print "Found finished HDFs on local storage..."
        header = hdf.header_from_hdf_filename(dest_1ms)
    else:
        print "Running feature generator..."
        header = process_local_file(input_filename, dest_1ms, dest_100ms)

    print "Header:", header
    print "Uploading 1ms feature file", dest_1ms, "to", output_bucket_name_1ms, "/", out_key_name_1ms
    cloud_helpers.upload_file_to_s3(dest_1ms, output_bucket_name_1ms, out_key_name_1ms, header)
    print "Uploading 100ms feature file", dest_100ms, "to", output_bucket_name_100ms, "/", out_key_name_100ms
    cloud_helpers.upload_file_to_s3(dest_100ms, output_bucket_name_100ms, out_key_name_100ms, header)
Пример #3
0
def process_s3_file(input_bucket_name, input_key_name, 
    output_bucket_name_1ms = None, 
    output_bucket_name_100ms = None, 
    overwrite = False):
  
  input_filename = cloud_helpers.download_file_from_s3(input_bucket_name, input_key_name)   
  dest_1ms, dest_100ms = output_filenames(input_filename, cloud_helpers.scratch_dir())
  
  out_key_name_1ms = os.path.split(dest_1ms)[1]  
  out_key_name_100ms = os.path.split(dest_100ms)[1]
  
  if output_bucket_name_1ms is None:
     output_bucket_name_1ms = input_bucket_name + "-hdf-1ms"
  
  if output_bucket_name_100ms is None:
     output_bucket_name_100ms = input_bucket_name + "-hdf"
     
  feature_names = extractor.feature_names()
  
  if not overwrite and \
     cloud_helpers.hdf_already_on_s3(output_bucket_name_1ms, out_key_name_1ms, feature_names) and \
     cloud_helpers.hdf_already_on_s3(output_bucket_name_100ms, out_key_name_100ms, feature_names):
  
    print "HDFs on S3 have same features, so skipping this input..."
    return
  else:
    print "HDFs either not on S3 or have different features..."
  
  # In some weird situation we might have a local copy of the HDF already 
  # finished but it just might not have been uploaded yet       
  if not overwrite and hdf.complete_hdf_exists(dest_1ms, feature_names) and \
     hdf.complete_hdf_exists(dest_100ms, feature_names):
    print "Found finished HDFs on local storage..."
    header = hdf.header_from_hdf_filename(dest_1ms) 
  else:
    print "Running feature generator..."
    header = process_local_file(input_filename, dest_1ms, dest_100ms)

  print "Header:", header
  print "Uploading 1ms feature file", dest_1ms, "to", output_bucket_name_1ms, "/", out_key_name_1ms
  cloud_helpers.upload_file_to_s3(\
    dest_1ms, output_bucket_name_1ms, out_key_name_1ms, header)
  print "Uploading 100ms feature file", dest_100ms, "to", output_bucket_name_100ms, "/", out_key_name_100ms
  cloud_helpers.upload_file_to_s3(\
    dest_100ms, output_bucket_name_100ms, out_key_name_100ms, header)