def main(args):
    start_time = time.time()
    parameters = SimpleNamespace(**args['parameters'])
    cos = COSBackend(
        aws_access_key_id=args['cos']['aws_access_key_id'],
        aws_secret_access_key=args['cos']['aws_secret_access_key'],
        endpoint_url=args['cos']['private_endpoint'])

    tile = args['tile']

    # Download shapefile
    shapefile = cos.get_object(bucket=parameters.BUCKET, key='shapefile.zip')
    with open('shape.zip', 'wb') as shapf:
        for chunk in iter(partial(shapefile.read, 200 * 1024 * 1024), ''):
            if not chunk:
                break
            shapf.write(chunk)

    rasters = {}
    for type in ['TEMPERATURE', 'HUMIDITY', 'WIND', 'EXTRAD', 'RADIANCE']:
        key = '/'.join(['tmp', type, tile, 'merged.tif'])
        rasters[type.lower()] = cos.get_object(bucket=parameters.BUCKET,
                                               key=key)

    filename = combine_calculations(tile=tile, **rasters)

    result_key = '/'.join(['tmp', 'ETC', args['tile'] + '.tif'])
    cos.upload_file(filename=filename,
                    bucket=parameters.BUCKET,
                    key=result_key)
    end_time = time.time()
    return {'result': filename, 'start_time': start_time, 'end_time': end_time}
Exemplo n.º 2
0
def main(args):
    start_time = time.time()
    args.update(args['chunk'])
    parameters = SimpleNamespace(**args['parameters'])
    cos = COSBackend(
        aws_access_key_id=args['cos']['aws_access_key_id'],
        aws_secret_access_key=args['cos']['aws_secret_access_key'],
        endpoint_url=args['cos']['private_endpoint'])

    mdt_key = args['mdt_key']
    mdt = cos.get_object(key=mdt_key, bucket=parameters.BUCKET)
    siam_stream = cos.get_object(key='siam_out.csv', bucket=parameters.BUCKET)

    out = map_interpolation(siam_stream=siam_stream,
                            mdt=mdt,
                            block_x=args['block_x'],
                            block_y=args['block_y'],
                            splits=parameters.SPLITS,
                            area_of_influence=parameters.AREA_OF_INFLUENCE)

    result_key = '/'.join([
        'tmp', 'WIND',
        os.path.basename(mdt_key).rsplit('.')[0],
        str(args['block_x']) + '_' + str(args['block_y']) + '.tif'
    ])

    cos.upload_file(filename=out, bucket=parameters.BUCKET, key=result_key)
    end_time = time.time()
    return {
        'result': result_key,
        'start_time': start_time,
        'end_time': end_time
    }
def my_map_function(vec):
    cos = COSBackend()
    resX = []

    vec = numpy.array(vec)

    for act in range(0, len(vec)):
        actual = vec[act]
        i = actual[0]
        j = actual[1]

        #load the row of the first matrix
        nameRow = 'A' + str(i)
        serialized1 = cos.get_object('cuc-bucket', nameRow)
        memfile = io.BytesIO()
        memfile.write(json.loads(serialized1).encode('latin-1'))
        memfile.seek(0)
        row = numpy.load(memfile)

        #load the column of the second matrix
        nameColumn = 'B' + str(j)
        serialized2 = cos.get_object('cuc-bucket', nameColumn)
        memfile = io.BytesIO()
        memfile.write(json.loads(serialized2).encode('latin-1'))
        memfile.seek(0)
        col = numpy.load(memfile)

        #calculation row * column
        x = numpy.dot(row, col)
        res = [x, i, j]
        resX.append(res)

    return resX
def mult(array):
    result = []
    cos = COSBackend()
    for i in range(len(array)):
        if (i % 2) != 0:
            continue
        matrix1 = cos.get_object('____', array[i])
        matrix1 = pickle.loads(matrix1)
        matrix2 = cos.get_object('_____', array[i + 1])
        matrix2 = pickle.loads(matrix2)
        result = np.append(result, np.dot(matrix1, matrix2))
    return result
Exemplo n.º 5
0
def matrix_mult_paquetes(x):
    cos = COSBackend()

    # Cálculo de forma secuencial
    if WORKERS == 1:
        A = p.loads(cos.get_object(BUCKET, '/secuencial/A'))
        B = p.loads(cos.get_object(BUCKET, '/secuencial/B'))
        results = np.dot(A, B)

    # Cálculo de forma paralela que hará cada worker con su parte correspondiente
    else:
        x = str(x).split('|')
        results = []

        worker = int(x[0])
        A = p.loads(
            cos.get_object(BUCKET, '/paralelo/A' +
                           str(worker)))  # Descargamos los paquetes del worker
        B = p.loads(cos.get_object(BUCKET, '/paralelo/B' + str(worker)))

        op_ini = x[1].split(',')
        op_ini[0] = int(op_ini[0])
        op_ini[1] = int(op_ini[1])

        op_fi = x[2].split(',')
        op_fi[0] = int(op_fi[0])
        op_fi[1] = int(op_fi[1])

        f = 0

        if (M * L /
                WORKERS) >= L:  # Si el paquete de B descargado incluye todo B
            while op_ini <= op_fi:  # Cálculo del worker con B entera
                results.append(A[f].dot(B[:, op_ini[1]]))
                op_ini[1] = op_ini[1] + 1
                if (op_ini[1] >= L):
                    op_ini[0] = op_ini[0] + 1
                    f = f + 1
                    op_ini[1] = 0
        else:
            c = 0

        while op_ini <= op_fi:  # Cálculo del worker siguiendo el orden de las columnas en Bw
            results.append(A[f].dot(B[:, c]))
            op_ini[1] = op_ini[1] + 1
            c = c + 1
            if (op_ini[1] >= L):
                op_ini[0] = op_ini[0] + 1
                f = f + 1
                op_ini[1] = 0

    return results
Exemplo n.º 6
0
def main(args):
    # initialize cos wrapper
    cb = COSBackend(args['cos']['service_endpoint'], args['cos']['secret_key'],
                    args['cos']['access_key'])

    # fetch the assigned range of bytes and parse that chunk into words to then count the number of occurrences of each word
    # ( by the way, this must be done in one line (as a r-value) so that the object returned by the cb.get_object method gets
    # free'd by the garbage collector ASAP, therefore reserved memory doesn't stack up too much )
    words = re.findall(
        r'\w+',
        cb.get_object(args['target_bucket'],
                      args['target_fname'],
                      extra_get_args={
                          'Range': args['Range']
                      }).decode('UTF-8', errors='ignore'))
    result = {}
    for word in words:
        adapted_word = word.lower()  #unidecode.unidecode(word).lower()
        if adapted_word in result:
            result[adapted_word] += 1
        else:
            result[adapted_word] = 1

    # commit result on the cloud
    result_tag = '{}/CW-result-{}'.format(args['target_fname'], args['index'])
    cb.put_object(args['target_bucket'], result_tag, json.dumps(result))

    # notify via queue, message = result file name on the cloud
    pika_params = pika.URLParameters(args['rabbitamqp_url'])
    connection = pika.BlockingConnection(pika_params)
    channel = connection.channel()
    channel.basic_publish(exchange='',
                          routing_key=args['qid'],
                          body=result_tag)
    connection.close()
def matrizMultCloud(casilla_ini, num_casillas):
    cos = COSBackend(config_os)
    res = 0
    resultados = []
    while (num_casillas > 0):
        fila_num, col_num = CalcPosMatrix(casilla_ini, M, L)
        fila = pickle.loads(
            cos.get_object('sistemasdistribuidos2', 'fila' + str(fila_num)))
        columna = pickle.loads(
            cos.get_object('sistemasdistribuidos2', 'colum' + str(col_num)))
        for n in range(N):
            res += fila[n] * columna[n]
        resultados.append([fila_num, col_num, res])
        num_casillas -= 1
        casilla_ini += 1
        res = 0
    return resultados
Exemplo n.º 8
0
def map_function(i, j):
    obj2 = COSBackend(dic)
    # Get submatrix
    m1 = pickle.loads(obj2.get_object('prac1', 'A' + str(i) + '.mtx'))
    m2 = pickle.loads(obj2.get_object('prac1', 'B' + str(j) + '.mtx'))
    # Calculate multiplication
    result = m1.dot(m2)
    return result
Exemplo n.º 9
0
def main(args):
    start_time = time.time()
    parameters = SimpleNamespace(**args['parameters'])
    cos = COSBackend(aws_access_key_id=args['cos']['aws_access_key_id'],
                     aws_secret_access_key=args['cos']['aws_secret_access_key'],
                     endpoint_url=args['cos']['private_endpoint'])

    keys = cos.list_keys_prefix(bucket=parameters.BUCKET, prefix='tmp/{}/{}'.format(args['type'], args['tile']))

    chunk = cos.get_object(key=keys[0], bucket=parameters.BUCKET)
    profile = obtain_meta(chunk, parameters.SPLITS)

    tiles = ((cos.get_object(bucket=parameters.BUCKET, key=key), tuple(os.path.basename(key)[:3].split('_')))
             for key in keys)
    out = gather_blocks(tiles, profile)

    result_key = '/'.join(['tmp', args['type'], args['tile'], 'merged.tif'])
    cos.upload_file(filename=out, bucket=parameters.BUCKET, key=result_key)
    end_time = time.time()
    return {'result': result_key, 'start_time': start_time, 'end_time': end_time}
Exemplo n.º 10
0
def slave(id, x, ibm_cos):
    obj = COSBackend(config=ibm_cos)
    obj.put_object('practise2', "p_write_{" + str(id) + "}", b"")
    my_turn = 0
    while (not my_turn):
        time.sleep(X)
        if (obj.list_objects('practise2', 'write_{' + str(id) + '}')):
            my_turn = 1
    result_file = json.loads(obj.get_object('practise2', 'result.json'))
    result_file.append(id)
    obj.put_object('practise2', 'result.json', json.dumps(result_file))
Exemplo n.º 11
0
def funcio_map(k):

    cos = COSBackend()
    """f=0
	j=0"""

    #for i in range(len(iterdata)):
    #cont=0
    k = k.split(" ")
    cont = 0
    dada = ''

    for a in range(len(k) // int(2)):
        i = k[cont]
        j = k[cont + 1]
        cont += 2
        fil = 'fila' + str(int(i) + 1) + '.txt'
        col = 'col' + str(int(j) + 1) + '.txt'

        fila = cos.get_object('sd-ori-un-buen-cubo', fil)
        columna = cos.get_object('sd-ori-un-buen-cubo', col)
        fila = fila.decode()
        columna = columna.decode()

        fila = fila.split(",")
        columna = columna.split(",")

        acum = 0
        for b in range(len(fila)):
            acum += int(fila[b]) * int(columna[b])

        dada += str(i) + " " + str(j) + ' ' + str(acum) + ' '

    dada = dada[:-1]
    dada = dada.encode()
    cos.put_object('sd-ori-un-buen-cubo', 'worker' + k[len(k) - 1] + '.txt',
                   dada)
    return (k[len(k) - 1])
Exemplo n.º 12
0
def matrix_mult(x):
    cos = COSBackend()
    x = str(x).split('|')

    #Calculo de forma secuencial
    if WORKERS == 1:
        A = p.loads(cos.get_object(BUCKET, '/secuencial/A'))
        B = p.loads(cos.get_object(BUCKET, '/secuencial/B'))
        results = np.dot(A, B)

    #Calculo de forma paralela que hará cada worker con su parte correspondiente
    else:
        results = []

        op_ini = x[1].split(',')
        op_ini[0] = int(op_ini[0])
        op_ini[1] = int(op_ini[1])

        op_fi = x[2].split(',')
        op_fi[0] = int(op_fi[0])
        op_fi[1] = int(op_fi[1])

        A = p.loads(cos.get_object(BUCKET, '/paralelo/f' + x[0]))
        B = p.loads(cos.get_object(BUCKET, '/secuencial/B'))

        rango = op_ini[0]

        while op_ini <= op_fi:
            #Calculo de la posición C[f_act-f_ini, c_act]
            results.append(A[op_ini[0] - rango].dot(B[:, op_ini[1]]))
            op_ini[1] = op_ini[1] + 1
            #Saltamos de fila de C
            if (op_ini[1] >= L):
                op_ini[0] = op_ini[0] + 1
                op_ini[1] = 0

    return results
Exemplo n.º 13
0
def main(args):
    #get arguments
    s1 = json.dumps(args)
    args = json.loads(s1)
    res = args["res"]
    url = res["rabbitmq"]["url"]
    topRange = int(args["topRange"])
    bottomRange = int(args["bottomRange"])
    #configure COS library
    odb = COSBackend(res["ibm_cos"])

    #rabbitmq configuration
    params = pika.URLParameters(url)
    connection = pika.BlockingConnection(params)
    channel = connection.channel()
    channel.queue_declare(queue="CountingWords")

    #Calcules a range which doesn't cut any word
    #	if functionNumber = -1 it means that is the last one so it has to analyse until the end
    #	if functionNumber = 0 it means that is the 1st one and it can't search before it
    if args["functionNumber"] != "-1":
        topRange = selectRange(args["fileName"], topRange, res)
    if args["functionNumber"] != '0':
        bottomRange = selectRange(args["fileName"], bottomRange, res)

    #download the part of the file that is needed
    fileFromServer = odb.get_object(res["ibm_cos"]["bucket"],
                                    args["fileName"],
                                    extra_get_args={
                                        "Range":
                                        "bytes={0}-{1}".format(
                                            bottomRange, topRange)
                                    }).decode('UTF-8', errors='ignore')

    #Delete unwanted characters
    stringFiltered = re.sub('[^A-Za-z \n]+', '', fileFromServer)
    #Split the string
    stringSplitted = re.split("\ |\n", stringFiltered)
    #Delete "" in array
    stringSplitted = list(filter(None, stringSplitted))

    #create a json:
    #		{'words' : numberWords}
    body = json.dumps({"words": len(stringSplitted)})
    #send a msg to reduce function
    channel.basic_publish(exchange='', routing_key='CountingWords', body=body)
    #close connection
    connection.close()
    return {}
Exemplo n.º 14
0
def selectRange(fileName, rang, res):
    odb = COSBackend(res['ibm_cos'])
    #read 20 bytes from file
    fileFromServer = odb.get_object(res['ibm_cos']["bucket"],
                                    fileName,
                                    extra_get_args={
                                        'Range':
                                        'bytes={0}-{1}'.format(
                                            rang - 20, rang)
                                    }).decode('UTF-8', errors='ignore')
    #Search an space in the text
    while (fileFromServer[-1] != " "):
        fileFromServer = fileFromServer[:-1]
        rang = rang - 1
    return rang
Exemplo n.º 15
0
def main(args):
    cos = COSBackend(args.get('cos_params'))
    space = args.get('space')
    byte_range = "bytes=" + str(int(space[0])) + "-" + str(int(space[1]))
    file = cos.get_object(args.get('bucket_name'),
                          args.get('file_name'),
                          extra_get_args={
                              'Range': byte_range
                          }).decode('iso8859-15').lower()

    clean_file = re.sub('[.,;:-_*+"(\'){!}@#%&?¿¡]', ' ', file)

    if int(args.get('program')) == 1:
        return map_count_words(clean_file, args)
    else:
        return map_word_count(clean_file, args)
Exemplo n.º 16
0
def matrix_multiplication(data):
    cos=COSBackend()
    valuesWorker=pickle.loads(cos.get_object('practica-sd-mp',f'{data}'))
    worker=data.split("w")
    i=int(worker[0])
    j=int(worker[1])

    #ara que tenim les files i columnes a calcular les calculem
    resultats=[]
    for lineA in valuesWorker[0]:
        resultatsFila=[]
        for columnB in valuesWorker[1]:
            total=0
            for x in range(n):
                total+=lineA[x]*columnB[x]
            resultatsFila.append(total)
        resultats.append(resultatsFila)
    return resultats
Exemplo n.º 17
0
def reduce_word_count(args):
    file_name = args.get('file_name')
    num_partitions = args.get('num_partitions')
    cos = COSBackend(args.get('cos_params'))
    bucket_name = args.get('bucket_name')
    result_dict = {}

    for i in range(num_partitions):
        file = "wc_" + file_name + str(i)
        file_dict = json.loads(cos.get_object(bucket_name, file))
        cos.delete_object(bucket_name, file)
        result_dict = {
            key: result_dict.get(key, 0) + file_dict.get(key, 0)
            for key in set(result_dict) | set(file_dict)
        }

    cos.put_object(bucket_name, "final_" + file_name, json.dumps(result_dict))
    return {'finish': "OK"}
Exemplo n.º 18
0
def reduce_count_words(args):
    file_name = args.get('file_name')
    num_partitions = args.get('num_partitions')
    cos = COSBackend(args.get('cos_params'))
    bucket_name = args.get('bucket_name')
    total_words = 0

    for i in range(num_partitions):
        file = "cw_" + file_name + str(i)
        total_words += int(cos.get_object(bucket_name, file))
        cos.delete_object(bucket_name, file)

    cos.put_object(bucket_name, "final_" + file_name, str(total_words))

    #for i in range(num_partitions):
    #    file_to_delete = "cw_"+file_name+str(i)
    #    cos.delete_object(bucket_name, file_to_delete)

    return {'finish': "OK"}
Exemplo n.º 19
0
def funcio_reduce(results):
    cos = COSBackend()
    mat_result = np.zeros(shape=(x, z))

    for m in range(len(results)):

        valor = cos.get_object('sd-ori-un-buen-cubo',
                               'worker' + results[m] + '.txt')
        valor = valor.decode()
        cont = 0
        valor = valor.split(" ")
        for n in range(len(valor) // 3):
            i = int(valor[cont])
            j = int(valor[cont + 1])
            res = valor[cont + 2]
            cont += 3

            mat_result[i][j] = res

    return (mat_result)
Exemplo n.º 20
0
def main(args):
    start_time = time.time()
    args.update(args['chunk'])
    parameters = SimpleNamespace(**args['parameters'])
    cos = COSBackend(
        aws_access_key_id=args['cos']['aws_access_key_id'],
        aws_secret_access_key=args['cos']['aws_secret_access_key'],
        endpoint_url=args['cos']['private_endpoint'])
    mdt_key = args['mdt_key']
    mdt = cos.get_object(key=mdt_key, bucket=parameters.BUCKET)

    filename = map_interpolation(mdt, parameters.DAY_OF_YEAR, args['block_x'],
                                 args['block_y'], parameters.SPLITS)

    result_key = '/'.join([
        'tmp', 'EXTRAD',
        os.path.basename(mdt_key).rsplit('.')[0],
        str(args['block_x']) + '_' + str(args['block_y']) + '.tif'
    ])
    cos.upload_file(filename=filename,
                    bucket=parameters.BUCKET,
                    key=result_key)

    result_key = '/'.join([
        'tmp', 'RADIANCE',
        os.path.basename(mdt_key).rsplit('.')[0],
        str(args['block_x']) + '_' + str(args['block_y']) + '.tif'
    ])
    cos.upload_file(filename='output',
                    bucket=parameters.BUCKET,
                    key=result_key)
    end_time = time.time()
    return {
        'result': result_key,
        'start_time': start_time,
        'end_time': end_time
    }
Exemplo n.º 21
0
class Orchestrator:
    def __init__(self, target_bucket, target_fname, upload=False):
        self.target_fname = target_fname
        self.target_bucket = target_bucket
        self.ini_error = False
        format_str = "cloudfunctions:\n  'endpoint': ''\n  'namespace': ''\n  'api_key': ''\nrabbitamqp:\n  'url': ''\ncos:\n  service_endpoint: ''\n  secret_key: ''\n  access_key: ''"

        try:
            # load keys securely
            with open('secret.yaml', 'r') as f:
                secret = yaml.safe_load(f)

            # initialitze the remote storage wrapper, and upload the target file
            self.cb = COSBackend(secret['cos']['service_endpoint'],
                                 secret['cos']['secret_key'],
                                 secret['cos']['access_key'])
            if upload:
                target_file = open(self.target_fname, "rb")
                self.cb.put_object(target_bucket, target_fname,
                                   target_file.read())
                target_file.close()

            # retrieve file length, ensure file has been uploaded
            try:
                self.fsize = int(
                    self.cb.head_object(self.target_bucket,
                                        self.target_fname)['content-length'])
            except:
                print(
                    'File \'{}\' was not found in this bucket \'{}\'. Upload it and retry.'
                    .format(self.target_fname, self.target_bucket))
                self.ini_error = True
                return None

            # initialize the function wrapper
            config = {}
            config['endpoint'] = secret['cloudfunctions']['endpoint']
            config['namespace'] = secret['cloudfunctions']['namespace']
            config['api_key'] = secret['cloudfunctions']['api_key']
            self.cf = CloudFunctions(config)

            # initialize the queue system
            self.pika_params = pika.URLParameters(secret['rabbitamqp']['url'])

        except KeyError:
            print('Wrong yaml document format. Please use the following one:')
            print(format_str)
            self.ini_error = True
        except FileNotFoundError as e:
            print('File \'{}\' not found.'.format(e.filename))
            self.ini_error = True

        # set the common args stub
        self.comargs = {}
        self.comargs['cos'] = secret['cos']
        self.comargs['rabbitamqp_url'] = secret['rabbitamqp']['url']
        self.comargs['target_bucket'] = self.target_bucket
        self.comargs['target_fname'] = self.target_fname

        # two separate queues, the reducer waits for the mappers and the orchestrator waits for the reducer
        self.mapper_qid = 'mapperQueue'
        self.reducer_qid = 'reducerQueue'

    def run(self, mapper, nthreads):
        # check if initialization was good
        if self.ini_error:
            return -4
        # validation of parameters
        if nthreads < 1:
            print(
                'Minimum number of partitions or threads must be 1. \nExiting...'
            )
            return -1
        if mapper != 'CountingWords' and mapper != 'WordCount':
            print(
                '{} is not supported as a mapper yet. Supported mappers: CountingWords, WordCount. \nExiting...'
                .format(mapper))
            return -2

        # prepare arguments for the mapper (mapper args)
        chunk_size = int(self.fsize / nthreads)
        mapargs = self.comargs.copy()
        mapargs['qid'] = self.mapper_qid

        # stat connection with the queue system
        connection = pika.BlockingConnection(self.pika_params)
        channel = connection.channel()
        channel.queue_declare(queue=self.mapper_qid)
        channel.queue_purge(
            queue=self.mapper_qid)  # ensure no message was left

        # measure time
        start_t = time.time()

        # dispatch mappers except the last one
        for i in range(0, nthreads - 1):
            mapargs['index'] = str(i)
            mapargs['Range'] = 'bytes={}-{}'.format(chunk_size * i,
                                                    chunk_size * (i + 1))
            self.cf.invoke(mapper, mapargs)
            #print('[{}]'.format(mapargs['index']), chunk_size*i, 'to', chunk_size*(i+1))

        # dispatch the last mapper, so that it takes the rest of the file
        mapargs['index'] = nthreads - 1
        mapargs['Range'] = 'bytes={}-{}'.format(chunk_size * (nthreads - 1),
                                                self.fsize)
        self.cf.invoke(mapper, mapargs)
        #print('[{}]'.format(mapargs['index']), chunk_size*(nthreads-1), 'to', self.fsize)

        # prepare arguments for the reducer (reducer args)
        redargs = self.comargs.copy()
        redargs['reduce_{}'.format(mapper)] = 'yes'
        redargs['nthreads'] = nthreads
        redargs['mapper_qid'] = self.mapper_qid
        redargs['reducer_qid'] = self.reducer_qid

        channel.queue_declare(queue=self.reducer_qid)
        channel.queue_purge(
            queue=self.reducer_qid)  # ensure no message was left

        self.cf.invoke('Reducer', redargs)

        # wait for the reducer to finish
        channel.basic_consume(queue=self.reducer_qid,
                              on_message_callback=SingleCallback())
        channel.start_consuming()

        # measure time
        end_t = time.time()

        connection.close()

        print('Done.\nExecution time: {0:.5g}s'.format(end_t - start_t))

    def claimFile(self, result_type, result_fname):
        # check if initialization was good
        if self.ini_error:
            return -4

        try:
            result_file = open(result_fname, "w")
            cos_result = self.cb.get_object(
                self.target_bucket,
                '{}/{}-result'.format(self.target_fname, result_type))
            result_file.write(cos_result.decode('utf-8'))
            result_file.close()
        except:
            print(
                'Something went wrong, could not download result file for: {}, action: {}'
                .format(self.target_fname, result_type))
Exemplo n.º 22
0
    cos=COSBackend()
    ibcmf= pywren.ibm_cf_executor()
    start_time = time.time()
    ibcmf.wait(ibcmf.call_async(generatex,[m,n,l,a]))
    ibcmf.clean()
    iterdata=[]
    
    for i in range(nWorkersA):
        for j in range(nWorkersB):
            iterdata.append(f'{i}w{j}')
    #start_time = time.time()
    ibcmf.wait(ibcmf.map_reduce(matrix_multiplication,iterdata, multiplication_reduce, reducer_wait_local=True))
    elapsed_time = time.time() - start_time
    for i in iterdata:
        cos.delete_object('practica-sd-mp',i)


    matrixA=pickle.loads(cos.get_object('practica-sd-mp','matrixA.txt'))
    matrixB=pickle.loads(cos.get_object('practica-sd-mp','matrixB.txt'))
    matrixC=pickle.loads(cos.get_object('practica-sd-mp','matrixC.txt'))
    print(f'Matriu A ({m} x {n}):')
    for filaA in matrixA:
        print(filaA)
    print(f'Matriu B ({n} x {l}):')
    for filaB in matrixB:
        print(filaB)
    print(f'Matriu C ({m} x {l}):' )
    for filaC in matrixC:
        print(filaC)
    print(f'Valor de m: {m}\nValor de n: {n}\nValor de l: {l}\nValor de a: {a}')
    print(f'El número total de workers ha sigut de: {w}.\nTemps que ha passat en segons: {elapsed_time} s')
Exemplo n.º 23
0
    #configure COS library
    odb = COSBackend(res['ibm_cos'])

    fileSize = int(odb.head_object(res['ibm_cos']["bucket"], fileName)["content-length"])
    #check if there are enough workers
    print(fileSize / nFunctions)
    if(fileSize / nFunctions) > 110000000: #this number is an aproximation we don't know the limit
        print("more workers are requiered for this file")
        exit(-1)

    #invoke functions calculating the time
    start = time.time()
    invokeFunctions('wordCount', nFunctions,  fileSize, fileName, res)
    end1 = time.time()
    invokeFunctions('countingWords', nFunctions,  fileSize, fileName, res)
    end2 = time.time()
    print("wordCount function's time: {0}".format(end1 - start))
    print("CountingWords function's time: {0}".format(end2 - end1))

    #download generated files
    fileFromServer = odb.get_object(res['ibm_cos']["bucket"], fileName[:-4] + 'CountingWordResult.txt')
    newFile = open(fileName[:-4] + 'CountingWordResult.txt', "wb")
    newFile.write(fileFromServer)
    newFile.close()
    print(fileName[:-4] + 'CountingWordResult.txt downloaded')

    fileFromServer = odb.get_object(res['ibm_cos']["bucket"], fileName[:-4] + 'WordCountResult.txt')
    newFile = open(fileName[:-4] + 'WordCountResult.txt', "wb")
    newFile.write(fileFromServer)
    newFile.close()
    print(fileName[:-4] + 'WordCountResult.txt downloaded')
Exemplo n.º 24
0
def main(args):
    #get arguments
    s1 = json.dumps(args)
    args = json.loads(s1)
    res = args["res"]
    url = res["rabbitmq"]["url"]
    topRange = int(args["topRange"])
    bottomRange = int(args["bottomRange"])
    #configure COS library
    odb = COSBackend(res["ibm_cos"])

    counts = Counter()

    #pika configuration
    params = pika.URLParameters(url)
    connection = pika.BlockingConnection(params)
    channel = connection.channel()
    channel.queue_declare(queue='WordCount')

    #Calcules a range which doesn't cut any word
    #	if functionNumber = -1 means that is the last one so it has to analyse until the end
    #	if functionNumber = 0 means that is the 1st one and it can't search before it
    if args["functionNumber"] != "-1":
        topRange = selectRange(args["fileName"], topRange, res)
    if args["functionNumber"] != '0':
        bottomRange = selectRange(args["fileName"], bottomRange, res)

    #get the part of the file that is needed in this function
    fileFromServer = odb.get_object(res["ibm_cos"]["bucket"],
                                    args["fileName"],
                                    extra_get_args={
                                        "Range":
                                        "bytes={0}-{1}".format(
                                            bottomRange, topRange)
                                    }).decode('UTF-8', errors='ignore')

    #Delete unwanted characters
    stringSplitted = re.sub('[^A-Za-z \n]+', '', fileFromServer)
    #Split the string
    stringSplitted = re.split("\ |\n", stringSplitted)
    #Delete "" in array
    stringSplitted = list(filter(None, stringSplitted))

    #convert array to count:
    #	{word1:numberWord1, word2:numberWord2...wordN:numberWordN}
    counts.update(word.strip('.,?!"\'').lower() for word in stringSplitted)
    #count to dict
    diccionary = dict(counts)
    #dict to json
    dumped_json_string = json.dumps(diccionary)

    #upload file with result:
    #	nameFile	->	book + numberFunction
    #	body		->	json(dict(count))
    odb.put_object(res["ibm_cos"]["bucket"],
                   args["fileName"] + args["functionNumber"],
                   dumped_json_string)
    #send a msg to reduce with the file name as body
    channel.basic_publish(exchange='',
                          routing_key='WordCount',
                          body=args["fileName"] + args["functionNumber"])
    #close the connection
    connection.close()
    return {}
Exemplo n.º 25
0
    for i in range(int(sys.argv[2])):
        params['num_partition'] = i
        params['space'] = (i * partition_size, (i + 1) * partition_size)
        tasks.append(loop.create_task(perform_cloud('map', params.copy())))
    #Esperem fins que acabin les tasques al cloud.
    loop.run_until_complete(asyncio.gather(*tasks))
    #Tasques acabades:
    params['num_partitions'] = int(sys.argv[2])
    result = ibm_cf.invoke_with_result('reduce', params)

    time_diff = datetime.now() - initial_time

    if result.get('finish') == "OK":
        if program == 1:
            print("\nCounting Words del fitxer " + file)
            result = int(cos_backend.get_object(bucket_name, 'final_' + file))
            print("Resultat: El fitxer conte " + str(result) + " paraules.")
        else:
            print("\nWord Count del fitxer " + file)
            result = cos_backend.get_object(bucket_name, 'final_' + file)
            print("Resultat:")
            print(result)
    else:
        print(result)

    print("\nTemps d'execucio: " + str(time_diff.total_seconds()) + "\n")

else:
    print("Error: Havia de seleccionar 0 o 1 segons la opcio.")