Exemplo n.º 1
0

# this method is used to get the size of the files
def createDs3Obj(fileName):
    size = os.stat(fileName).st_size
    ds3ObjName = "prefix/" + fileName
    fileMap[ds3ObjName] = fileName
    return ds3.FileObject(ds3ObjName, size)


# get the sizes for each file
fileList = ds3.FileObjectList(list(map(createDs3Obj, fileList)))

# submit the put bulk request to DS3
bulkResult = client.put_bulk_job_spectra_s3(
    ds3.PutBulkJobSpectraS3Request(bucketName, fileList))

# the bulk request will split the files over several chunks if it needs to
# we need to iterate over the chunks, ask the server for space to send
# the chunks, then send all the objects returned in the chunk
for chunk in bulkResult.result['ObjectsList']:
    allocateChunk = client.allocate_job_chunk_spectra_s3(
        ds3.AllocateJobChunkSpectraS3Request(chunk['ChunkId']))
    for obj in allocateChunk.result['ObjectList']:
        objectDataStream = open(fileMap[obj['Name']], "rb")
        client.put_object(
            ds3.PutObjectRequest(bucketName,
                                 obj['Name'],
                                 obj['Length'],
                                 objectDataStream,
                                 offset=int(obj['Offset']),
Exemplo n.º 2
0
# this method is used to map a file path to a Ds3PutObject
def fileNameToDs3PutObject(filePath, prefix=""):
    size = os.stat(pathForResource(filePath)).st_size
    return ds3.Ds3PutObject(prefix + filePath, size)
    
# this method is used to get the os specific path for an object located in the resources folder
def pathForResource(resourceName):
    currentPath = os.path.dirname(str(__file__))
    return os.path.join(currentPath, "resources", resourceName)

# get the sizes for each file
fileList = list(map(fileNameToDs3PutObject, fileList))

# submit the put bulk request to DS3
bulkResult = client.put_bulk_job_spectra_s3(ds3.PutBulkJobSpectraS3Request(bucketName, fileList))

# the bulk request will split the files over several chunks if it needs to.
# we then need to ask what chunks we can send, and then send them making
# sure we don't resend the same chunks

# create a set of the chunk ids which will be used to track
# what chunks have not been sent
chunkIds = set([x['ChunkId'] for x in bulkResult.result['ObjectsList']])

# while we still have chunks to send
while len(chunkIds) > 0:
    # get a list of the available chunks that we can send
    availableChunks = client.get_job_chunks_ready_for_client_processing_spectra_s3(
                             ds3.GetJobChunksReadyForClientProcessingSpectraS3Request(bulkResult.result['JobId']))