Exemplo n.º 1
0
def migrate_files(connection):
    """
    Migrate the various files to S3
    """
    LOG.info('Migrating the files')

    s3helper = S3Helper()

    migrate_image_files(connection, get_galaxy_image_bucket(), get_files_bucket(), s3helper)
    migrate_hdf5_files(connection, get_files_bucket(), s3helper)
Exemplo n.º 2
0
def delete_galaxy(connection, galaxy_ids):
    try:
        for galaxy_id_str in galaxy_ids:
            transaction = connection.begin()
            galaxy_id1 = int(galaxy_id_str)
            galaxy = connection.execute(
                select([GALAXY
                        ]).where(GALAXY.c.galaxy_id == galaxy_id1)).first()
            if galaxy is None:
                LOG.info('Error: Galaxy with galaxy_id of %d was not found',
                         galaxy_id1)
            else:
                LOG.info('Deleting Galaxy with galaxy_id of %d - %s',
                         galaxy_id1, galaxy[GALAXY.c.name])
                area_count = connection.execute(
                    select([func.count(AREA.c.area_id)
                            ]).where(AREA.c.galaxy_id == galaxy[
                                GALAXY.c.galaxy_id])).first()[0]
                counter = 1

                for area_id1 in connection.execute(
                        select(
                            [AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[
                                GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)):
                    LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format(
                        galaxy_id_str, area_id1[0], counter, area_count))
                    connection.execute(PIXEL_RESULT.delete().where(
                        PIXEL_RESULT.c.area_id == area_id1[0]))

                    # Give the rest of the world a chance to access the database
                    time.sleep(0.1)
                    counter += 1

                # Now empty the bucket
                s3helper = S3Helper()
                bucket = s3helper.get_bucket(get_files_bucket())
                galaxy_file_name = get_galaxy_file_name(
                    galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id],
                    galaxy[GALAXY.c.galaxy_id])
                for key in bucket.list(
                        prefix='{0}/sed/'.format(galaxy_file_name)):
                    # Ignore the key
                    if key.key.endswith('/'):
                        continue

                    bucket.delete_key(key)

                # Now the folder
                key = Key(bucket)
                key.key = '{0}/sed/'.format(galaxy_file_name)
                bucket.delete_key(key)

            LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id1)
            connection.execute(
                GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values(
                    status_id=DELETED, status_time=datetime.datetime.now()))
            transaction.commit()

    except Exception:
        LOG.exception('Major error')
def regenerated_original_images(galaxy_name, run_id, galaxy_id, s3Helper, connection):
    """
    We need to regenerate the image
    :param galaxy_name:
    :param run_id:
    :param galaxy_id:
    :return: if we succeed
    """
    all_ok = False

    # Get the fits file
    bucket = s3Helper.get_bucket(get_files_bucket())
    galaxy_file_name = get_galaxy_file_name(galaxy_name, run_id, galaxy_id)
    key_name = '{0}/{0}.fits'.format(galaxy_name)
    key = bucket.get_key(key_name)
    if key is None:
        LOG.error('The fits file does not seem to exists')
        return all_ok

    path_name = get_temp_file('fits')
    key.get_contents_to_filename(path_name)

    # Now regenerate
    try:
        image = FitsImage(connection)
        image.build_image(path_name, galaxy_file_name, galaxy_id, get_galaxy_image_bucket())
        all_ok = True
    except Exception:
        LOG.exception('Major error')
        all_ok = False
    finally:
        os.remove(path_name)
    return all_ok
def remove_s3_files(galaxy_name, run_id, galaxy_id):
    """
    Remove the files from S3

    :return:
    """
    s3_helper = S3Helper()
    remove_files_with_key(s3_helper.get_bucket(get_galaxy_image_bucket()), galaxy_name, run_id, galaxy_id)
    remove_files_with_key(s3_helper.get_bucket(get_files_bucket()), galaxy_name, run_id, galaxy_id)
Exemplo n.º 5
0
def remigrate_files(connection):
    """
    Migrate the various files to S3
    """
    LOG.info('Migrating the files')

    s3helper = S3Helper()
    files_bucket = get_files_bucket()
    bad_galaxies = find_bad_hdf5_files(s3helper, files_bucket)
    migrate_hdf5_files(bad_galaxies, connection, files_bucket, s3helper)
Exemplo n.º 6
0
def remigrate_files(connection):
    """
    Migrate the various files to S3
    """
    LOG.info('Migrating the files')

    s3helper = S3Helper()
    files_bucket = get_files_bucket()
    bad_galaxies = find_bad_hdf5_files(s3helper, files_bucket)
    migrate_hdf5_files(bad_galaxies, connection, files_bucket, s3helper)
Exemplo n.º 7
0
def get_hdf5_size_data():
    """
    Get the HDF5 data we need
    :return:
    """
    # Get the list of files
    LOG.info('Getting the hdf5 files from the database')
    data = {}
    set_names = set()

    for entry in connection.execute(select([HDF5_SIZE])):
        key_size_mb = entry[HDF5_SIZE.c.size] / 1000000.0
        LOG.info('Processing {0} {1} {2}'.format(entry[HDF5_SIZE.c.name],
                                                 entry[HDF5_SIZE.c.size],
                                                 key_size_mb))
        run_id = entry[HDF5_SIZE.c.run_id]

        # Get the array
        row_data = data.get(run_id)
        if row_data is None:
            row_data = []
            data[run_id] = row_data

        row_data.append(key_size_mb)
        set_names.add(entry[HDF5_SIZE.c.name])

    LOG.info('Getting the hdf5 files from S3')
    s3helper = S3Helper()
    bucket = s3helper.get_bucket(get_files_bucket())
    insert_hdf5 = HDF5_SIZE.insert()
    for prefix in bucket.list(prefix='', delimiter='/'):
        prefix_name = prefix.name[:-1]
        if prefix_name not in set_names:
            key = bucket.get_key('{0}/{0}.hdf5'.format(prefix_name))
            if key is not None:
                key_size_mb = key.size / 1000000.0
                LOG.info('Processing {0} {1} {2}'.format(
                    key.name, key.size, key_size_mb))
                elements = prefix.name.split('__')
                run_id = int(elements[1])

                connection.execute(insert_hdf5,
                                   name=prefix_name,
                                   size=key.size,
                                   run_id=run_id)

                # Get the array
                row_data = data.get(run_id)
                if row_data is None:
                    row_data = []
                    data[run_id] = row_data

                row_data.append(key_size_mb)

    return data
Exemplo n.º 8
0
def get_hdf5_size_data():
    """
    Get the HDF5 data we need
    :return:
    """
    # Get the list of files
    LOG.info('Getting the hdf5 files from the database')
    data = {}
    set_names = set()

    for entry in connection.execute(select([HDF5_SIZE])):
        key_size_mb = entry[HDF5_SIZE.c.size] / 1000000.0
        LOG.info('Processing {0} {1} {2}'.format(entry[HDF5_SIZE.c.name], entry[HDF5_SIZE.c.size], key_size_mb))
        run_id = entry[HDF5_SIZE.c.run_id]

        # Get the array
        row_data = data.get(run_id)
        if row_data is None:
            row_data = []
            data[run_id] = row_data

        row_data.append(key_size_mb)
        set_names.add(entry[HDF5_SIZE.c.name])

    LOG.info('Getting the hdf5 files from S3')
    s3helper = S3Helper()
    bucket = s3helper.get_bucket(get_files_bucket())
    insert_hdf5 = HDF5_SIZE.insert()
    for prefix in bucket.list(prefix='', delimiter='/'):
        prefix_name = prefix.name[:-1]
        if prefix_name not in set_names:
            key = bucket.get_key('{0}/{0}.hdf5'.format(prefix_name))
            if key is not None:
                key_size_mb = key.size / 1000000.0
                LOG.info('Processing {0} {1} {2}'.format(key.name, key.size, key_size_mb))
                elements = prefix.name.split('__')
                run_id = int(elements[1])

                connection.execute(insert_hdf5, name=prefix_name, size=key.size, run_id=run_id)

                # Get the array
                row_data = data.get(run_id)
                if row_data is None:
                    row_data = []
                    data[run_id] = row_data

                row_data.append(key_size_mb)

    return data
Exemplo n.º 9
0
def store_files(hdf5_dir):
    """
    Scan a directory for files and send them to the archive

    :param hdf5_dir:  the directory to scan
    :return:
    """
    LOG.info('Directory: %s', hdf5_dir)

    # Get the work units still being processed
    ENGINE = create_engine(DB_LOGIN)
    connection = ENGINE.connect()

    files = os.path.join(hdf5_dir, '*.hdf5')
    file_count = 0

    try:
        s3helper = S3Helper()
        bucket_name = get_files_bucket()

        for file_name in glob.glob(files):
            size = os.path.getsize(file_name)
            galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name)
            if galaxy_id >= 0:
                key = '{0}/{0}.hdf5'.format(galaxy_name)
                LOG.info('File name: %s', file_name)
                LOG.info('File size: %d', size)
                LOG.info('Bucket:    %s', bucket_name)
                LOG.info('Key:       %s', key)

                s3helper.add_file_to_bucket(bucket_name, key, file_name)
                file_count += 1
                os.remove(file_name)
                connection.execute(GALAXY.update().where(
                    GALAXY.c.galaxy_id == galaxy_id).values(
                        status_id=STORED, status_time=datetime.datetime.now()))

            else:
                LOG.error('File name: %s', file_name)
                LOG.error('File size: %d', size)
                LOG.error('Could not get the galaxy id')

    except Exception:
        LOG.exception('Major error')

    finally:
        connection.close()

    return file_count
Exemplo n.º 10
0
def get_hdf5_file(s3Helper, output_dir, galaxy_name, run_id, galaxy_id):
    """
    Get the HDF file

    :param galaxy_name: the name of the galaxy
    :param run_id: the run id
    :param galaxy_id: the galaxy id
    :return:
    """
    bucket_name = get_files_bucket()
    key = get_key_hdf5(galaxy_name, run_id, galaxy_id)
    tmp_file = get_temp_file('.hdf5', 'pogs', output_dir)

    s3Helper.get_file_from_bucket(bucket_name=bucket_name, key_name=key, file_name=tmp_file)
    return tmp_file
Exemplo n.º 11
0
def store_files(hdf5_dir):
    """
    Scan a directory for files and send them to the archive

    :param hdf5_dir:  the directory to scan
    :return:
    """
    LOG.info('Directory: %s', hdf5_dir)

    # Get the work units still being processed
    ENGINE = create_engine(DB_LOGIN)
    connection = ENGINE.connect()

    files = os.path.join(hdf5_dir, '*.hdf5')
    file_count = 0

    try:
        s3helper = S3Helper()
        bucket_name = get_files_bucket()

        for file_name in glob.glob(files):
            size = os.path.getsize(file_name)
            galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name)
            if galaxy_id >= 0:
                key = '{0}/{0}.hdf5'.format(galaxy_name)
                LOG.info('File name: %s', file_name)
                LOG.info('File size: %d', size)
                LOG.info('Bucket:    %s', bucket_name)
                LOG.info('Key:       %s', key)

                s3helper.add_file_to_bucket(bucket_name, key, file_name)
                file_count += 1
                os.remove(file_name)
                connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=STORED, status_time=datetime.datetime.now()))

            else:
                LOG.error('File name: %s', file_name)
                LOG.error('File size: %d', size)
                LOG.error('Could not get the galaxy id')

    except Exception:
        LOG.exception('Major error')

    finally:
        connection.close()

    return file_count
Exemplo n.º 12
0
def delete_galaxy(connection, galaxy_ids):
    try:
        for galaxy_id_str in galaxy_ids:
            transaction = connection.begin()
            galaxy_id1 = int(galaxy_id_str)
            galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id1)).first()
            if galaxy is None:
                LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id1)
            else:
                LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id1, galaxy[GALAXY.c.name])
                area_count = connection.execute(select([func.count(AREA.c.area_id)]).where(AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])).first()[0]
                counter = 1

                for area_id1 in connection.execute(select([AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)):
                    LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format(galaxy_id_str, area_id1[0], counter, area_count))
                    connection.execute(PIXEL_RESULT.delete().where(PIXEL_RESULT.c.area_id == area_id1[0]))

                    # Give the rest of the world a chance to access the database
                    time.sleep(0.1)
                    counter += 1

                # Now empty the bucket
                s3helper = S3Helper()
                bucket = s3helper.get_bucket(get_files_bucket())
                galaxy_file_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
                for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)):
                    # Ignore the key
                    if key.key.endswith('/'):
                        continue

                    bucket.delete_key(key)

                # Now the folder
                key = Key(bucket)
                key.key = '{0}/sed/'.format(galaxy_file_name)
                bucket.delete_key(key)

            LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id1)
            connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values(status_id=DELETED, status_time=datetime.datetime.now()))
            transaction.commit()

    except Exception:
        LOG.exception('Major error')
Exemplo n.º 13
0
def store_files(connection):
    """
    Scan a directory for files and send them to the archive

    :param hdf5_dir:  the directory to scan
    :return:
    """
    LOG.info('Directory: %s', HDF5_OUTPUT_DIRECTORY)

    to_store_dir = os.path.join(HDF5_OUTPUT_DIRECTORY, 'to_store')
    files = os.path.join(to_store_dir, '*.hdf5')
    file_count = 0

    s3helper = S3Helper()
    bucket_name = get_files_bucket()

    for file_name in glob.glob(files):
        size = os.path.getsize(file_name)
        galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name)
        if galaxy_id >= 0:
            key = '{0}/{0}.hdf5'.format(galaxy_name)
            LOG.info('File name: %s', file_name)
            LOG.info('File size: %d', size)
            LOG.info('Bucket:    %s', bucket_name)
            LOG.info('Key:       %s', key)

            s3helper.add_file_to_bucket(bucket_name, key, file_name)
            file_count += 1
            os.remove(file_name)
            connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=STORED, status_time=datetime.datetime.now()))

        else:
            LOG.error('File name: %s', file_name)
            LOG.error('File size: %d', size)
            LOG.error('Could not get the galaxy id')

    return file_count
Exemplo n.º 14
0
def store_pixels(connection, galaxy_file_name, group, dimension_x, dimension_y,
                 dimension_z, area_total, output_directory,
                 map_parameter_name):
    """
    Store the pixel data
    """
    LOG.info('Storing the pixel data for {0} - {1} areas to process'.format(
        galaxy_file_name, area_total))
    data = numpy.empty(
        (dimension_x, dimension_y, NUMBER_PARAMETERS, NUMBER_IMAGES),
        dtype=numpy.float)
    data.fill(numpy.NaN)
    data_pixel_details = group.create_dataset('pixel_details',
                                              (dimension_x, dimension_y),
                                              dtype=data_type_pixel,
                                              compression='gzip')
    data_pixel_parameters = group.create_dataset(
        'pixel_parameters', (dimension_x, dimension_y, NUMBER_PARAMETERS),
        dtype=data_type_pixel_parameter,
        compression='gzip')
    data_pixel_filter = group.create_dataset(
        'pixel_filters', (dimension_x, dimension_y, dimension_z),
        dtype=data_type_pixel_filter,
        compression='gzip')
    data_pixel_histograms_grid = group.create_dataset(
        'pixel_histograms_grid', (dimension_x, dimension_y, NUMBER_PARAMETERS),
        dtype=data_type_block_details,
        compression='gzip')

    histogram_group = group.create_group('histogram_blocks')
    histogram_list = []
    pixel_count = 0
    area_count = 0
    block_id = 1
    block_index = 0
    histogram_data = histogram_group.create_dataset(
        'block_1', (BLOCK_SIZE, ),
        dtype=data_type_pixel_histogram,
        compression='gzip')

    s3helper = S3Helper()
    bucket = s3helper.get_bucket(get_files_bucket())
    for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)):
        # Ignore the key
        if key.key.endswith('/'):
            continue

        # Now process the file
        start_time = time.time()
        LOG.info('Processing file {0}'.format(key.key))
        temp_file = os.path.join(output_directory, 'temp.sed')
        key.get_contents_to_filename(temp_file)

        if is_gzip(temp_file):
            f = gzip.open(temp_file, "rb")
        else:
            f = open(temp_file, "r")

        area_id = None
        pxresult_id = None
        line_number = 0
        percentiles_next = False
        histogram_next = False
        skynet_next1 = False
        skynet_next2 = False
        map_pixel_results = {}
        list_filters = []
        try:
            for line in f:
                line_number += 1

                if line.startswith(" ####### "):
                    # Clear all the maps and stuff
                    map_pixel_results = {}
                    list_filters = []

                    # Split the line to extract the data
                    values = line.split()
                    pointName = values[1]
                    pxresult_id = pointName[3:].rstrip()
                    (x, y, area_id) = get_pixel_result(connection, pxresult_id)
                    line_number = 0
                    percentiles_next = False
                    histogram_next = False
                    skynet_next1 = False
                    skynet_next2 = False
                    pixel_count += 1
                elif pxresult_id is not None:
                    if line_number == 2:
                        filter_names = line.split()
                        filter_layer = 0
                        for filter_name in filter_names:
                            if filter_name != '#':
                                data_pixel_filter.attrs[
                                    filter_name] = filter_layer
                                filter_layer += 1
                    elif line_number == 3:
                        values = line.split()
                        for value in values:
                            list_filters.append([float(value)])
                    elif line_number == 4:
                        filter_layer = 0
                        values = line.split()
                        for value in values:
                            filter_description = list_filters[filter_layer]
                            filter_description.append(float(value))
                            filter_layer += 1
                    elif line_number == 9:
                        values = line.split()
                        map_pixel_results['i_sfh'] = float(values[0])
                        map_pixel_results['i_ir'] = float(values[1])
                        map_pixel_results['chi2'] = float(values[2])
                        map_pixel_results['redshift'] = float(values[3])
                    elif line_number == 11:
                        values = line.split()
                        data[x, y, INDEX_F_MU_SFH,
                             INDEX_BEST_FIT] = float(values[0])
                        data[x, y, INDEX_F_MU_IR,
                             INDEX_BEST_FIT] = float(values[1])
                        data[x, y, INDEX_MU_PARAMETER,
                             INDEX_BEST_FIT] = float(values[2])
                        data[x, y, INDEX_TAU_V,
                             INDEX_BEST_FIT] = float(values[3])
                        data[x, y, INDEX_SSFR_0_1GYR,
                             INDEX_BEST_FIT] = float(values[4])
                        data[x, y, INDEX_M_STARS,
                             INDEX_BEST_FIT] = float(values[5])
                        data[x, y, INDEX_L_DUST,
                             INDEX_BEST_FIT] = float(values[6])
                        data[x, y, INDEX_T_W_BC,
                             INDEX_BEST_FIT] = float(values[7])
                        data[x, y, INDEX_T_C_ISM,
                             INDEX_BEST_FIT] = float(values[8])
                        data[x, y, INDEX_XI_C_TOT,
                             INDEX_BEST_FIT] = float(values[9])
                        data[x, y, INDEX_XI_PAH_TOT,
                             INDEX_BEST_FIT] = float(values[10])
                        data[x, y, INDEX_XI_MIR_TOT,
                             INDEX_BEST_FIT] = float(values[11])
                        data[x, y, INDEX_XI_W_TOT,
                             INDEX_BEST_FIT] = float(values[12])
                        data[x, y, INDEX_TAU_V_ISM,
                             INDEX_BEST_FIT] = float(values[13])
                        data[x, y, INDEX_M_DUST,
                             INDEX_BEST_FIT] = float(values[14])
                        data[x, y, INDEX_SFR_0_1GYR,
                             INDEX_BEST_FIT] = float(values[15])
                    elif line_number == 13:
                        filter_layer = 0
                        values = line.split()
                        for value in values:
                            filter_description = list_filters[filter_layer]
                            if filter_layer < dimension_z:
                                data_pixel_filter[x, y, filter_layer] = (
                                    filter_description[0],
                                    filter_description[1],
                                    float(value),
                                )
                                filter_layer += 1
                    elif line_number > 13:
                        if line.startswith("# ..."):
                            parts = line.split('...')
                            parameter_name = parts[1].strip()
                            parameter_name_id = map_parameter_name[
                                parameter_name]
                            percentiles_next = False
                            histogram_next = True
                            skynet_next1 = False
                            skynet_next2 = False
                            histogram_list = []
                        elif line.startswith(
                                "#....percentiles of the PDF......"):
                            percentiles_next = True
                            histogram_next = False
                            skynet_next1 = False
                            skynet_next2 = False

                            # Write out the histogram into a block for compression improvement
                            data_pixel_histograms_grid[x, y,
                                                       parameter_name_id -
                                                       1] = (
                                                           block_id,
                                                           block_index,
                                                           len(histogram_list))
                            for pixel_histogram_item in histogram_list:
                                # Do we need a new block
                                if block_index >= BLOCK_SIZE:
                                    block_id += 1
                                    block_index = 0
                                    histogram_data = histogram_group.create_dataset(
                                        'block_{0}'.format(block_id),
                                        (BLOCK_SIZE, ),
                                        dtype=data_type_pixel_histogram,
                                        compression='gzip')

                                histogram_data[block_index] = (
                                    pixel_histogram_item[0],
                                    pixel_histogram_item[1],
                                )
                                block_index += 1
                        elif line.startswith(" #...theSkyNet"):
                            percentiles_next = False
                            histogram_next = False
                            skynet_next1 = True
                            skynet_next2 = False
                        elif line.startswith("# theSkyNet2"):
                            percentiles_next = False
                            histogram_next = False
                            skynet_next1 = False
                            skynet_next2 = True
                        elif percentiles_next:
                            values = line.split()
                            z = parameter_name_id - 1
                            data[x, y, z,
                                 INDEX_PERCENTILE_2_5] = float(values[0])
                            data[x, y, z,
                                 INDEX_PERCENTILE_16] = float(values[1])
                            data[x, y, z,
                                 INDEX_PERCENTILE_50] = float(values[2])
                            data[x, y, z,
                                 INDEX_PERCENTILE_84] = float(values[3])
                            data[x, y, z,
                                 INDEX_PERCENTILE_97_5] = float(values[4])
                            percentiles_next = False
                        elif histogram_next:
                            values = line.split()
                            hist_value = float(values[1])
                            if hist_value > MIN_HIST_VALUE and not math.isnan(
                                    hist_value):
                                histogram_list.append(
                                    (float(values[0]), hist_value))
                        elif skynet_next1:
                            values = line.split()
                            data_pixel_details[x, y] = (
                                pxresult_id,
                                area_id,
                                map_pixel_results['i_sfh'],
                                map_pixel_results['i_ir'],
                                map_pixel_results['chi2'],
                                map_pixel_results['redshift'],
                                float(values[0]),
                                float(values[2]),
                                float(values[3]),
                                float(values[4]),
                            )
                            skynet_next1 = False
                        elif skynet_next2:
                            # We have the highest bin probability values which require the parameter_id
                            values = line.split()
                            high_prob_bin = float(values[0]) if float(
                                values[0]) is not None else numpy.NaN
                            first_prob_bin = float(values[1]) if float(
                                values[1]) is not None else numpy.NaN
                            last_prob_bin = float(values[2]) if float(
                                values[2]) is not None else numpy.NaN
                            bin_step = float(values[3]) if float(
                                values[3]) is not None else numpy.NaN
                            z = parameter_name_id - 1
                            data[x, y, z,
                                 INDEX_HIGHEST_PROB_BIN] = high_prob_bin
                            data_pixel_parameters[x, y, z] = (
                                first_prob_bin,
                                last_prob_bin,
                                bin_step,
                            )
                            skynet_next2 = False

        except IOError:
            LOG.error('IOError after {0} lines'.format(line_number))
        finally:
            f.close()

        area_count += 1
        LOG.info('{0:0.3f} seconds for file {1}. {2} of {3} areas.'.format(
            time.time() - start_time, key.key, area_count, area_total))

    pixel_dataset = group.create_dataset('pixels',
                                         data=data,
                                         compression='gzip')
    pixel_dataset.attrs['DIM3_F_MU_SFH'] = INDEX_F_MU_SFH
    pixel_dataset.attrs['DIM3_F_MU_IR'] = INDEX_F_MU_IR
    pixel_dataset.attrs['DIM3_MU_PARAMETER'] = INDEX_MU_PARAMETER
    pixel_dataset.attrs['DIM3_TAU_V'] = INDEX_TAU_V
    pixel_dataset.attrs['DIM3_SSFR_0_1GYR'] = INDEX_SSFR_0_1GYR
    pixel_dataset.attrs['DIM3_M_STARS'] = INDEX_M_STARS
    pixel_dataset.attrs['DIM3_L_DUST'] = INDEX_L_DUST
    pixel_dataset.attrs['DIM3_T_C_ISM'] = INDEX_T_C_ISM
    pixel_dataset.attrs['DIM3_T_W_BC'] = INDEX_T_W_BC
    pixel_dataset.attrs['DIM3_XI_C_TOT'] = INDEX_XI_C_TOT
    pixel_dataset.attrs['DIM3_XI_PAH_TOT'] = INDEX_XI_PAH_TOT
    pixel_dataset.attrs['DIM3_XI_MIR_TOT'] = INDEX_XI_MIR_TOT
    pixel_dataset.attrs['DIM3_XI_W_TOT'] = INDEX_XI_W_TOT
    pixel_dataset.attrs['DIM3_TAU_V_ISM'] = INDEX_TAU_V_ISM
    pixel_dataset.attrs['DIM3_M_DUST'] = INDEX_M_DUST
    pixel_dataset.attrs['DIM3_SFR_0_1GYR'] = INDEX_SFR_0_1GYR

    pixel_dataset.attrs['DIM4_BEST_FIT'] = INDEX_BEST_FIT
    pixel_dataset.attrs['DIM4_PERCENTILE_50'] = INDEX_PERCENTILE_50
    pixel_dataset.attrs['DIM4_HIGHEST_PROB_BIN'] = INDEX_HIGHEST_PROB_BIN
    pixel_dataset.attrs['DIM4_PERCENTILE_2_5'] = INDEX_PERCENTILE_2_5
    pixel_dataset.attrs['DIM4_PERCENTILE_16'] = INDEX_PERCENTILE_16
    pixel_dataset.attrs['DIM4_PERCENTILE_84'] = INDEX_PERCENTILE_84
    pixel_dataset.attrs['DIM4_PERCENTILE_97_5'] = INDEX_PERCENTILE_97_5

    LOG.info('Created {0} blocks'.format(block_id))

    return pixel_count
Exemplo n.º 15
0
def store_pixels(connection, galaxy_file_name, group, dimension_x, dimension_y, dimension_z, area_total, galaxy_id, map_parameter_name):
    """
    Store the pixel data
    """
    LOG.info('Storing the pixel data for {0} - {1} areas to process'.format(galaxy_file_name, area_total))
    group.attrs['PIXELS_MAX_X_Y_BLOCK'] = MAX_X_Y_BLOCK
    group.attrs['PIXELS_DIM3_F_MU_SFH'] = INDEX_F_MU_SFH
    group.attrs['PIXELS_DIM3_F_MU_IR'] = INDEX_F_MU_IR
    group.attrs['PIXELS_DIM3_MU_PARAMETER'] = INDEX_MU_PARAMETER
    group.attrs['PIXELS_DIM3_TAU_V'] = INDEX_TAU_V
    group.attrs['PIXELS_DIM3_SSFR_0_1GYR'] = INDEX_SSFR_0_1GYR
    group.attrs['PIXELS_DIM3_M_STARS'] = INDEX_M_STARS
    group.attrs['PIXELS_DIM3_L_DUST'] = INDEX_L_DUST
    group.attrs['PIXELS_DIM3_T_C_ISM'] = INDEX_T_C_ISM
    group.attrs['PIXELS_DIM3_T_W_BC'] = INDEX_T_W_BC
    group.attrs['PIXELS_DIM3_XI_C_TOT'] = INDEX_XI_C_TOT
    group.attrs['PIXELS_DIM3_XI_PAH_TOT'] = INDEX_XI_PAH_TOT
    group.attrs['PIXELS_DIM3_XI_MIR_TOT'] = INDEX_XI_MIR_TOT
    group.attrs['PIXELS_DIM3_XI_W_TOT'] = INDEX_XI_W_TOT
    group.attrs['PIXELS_DIM3_TAU_V_ISM'] = INDEX_TAU_V_ISM
    group.attrs['PIXELS_DIM3_M_DUST'] = INDEX_M_DUST
    group.attrs['PIXELS_DIM3_SFR_0_1GYR'] = INDEX_SFR_0_1GYR

    group.attrs['PIXELS_DIM4_BEST_FIT'] = INDEX_BEST_FIT
    group.attrs['PIXELS_DIM4_PERCENTILE_50'] = INDEX_PERCENTILE_50
    group.attrs['PIXELS_DIM4_HIGHEST_PROB_BIN'] = INDEX_HIGHEST_PROB_BIN
    group.attrs['PIXELS_DIM4_PERCENTILE_2_5'] = INDEX_PERCENTILE_2_5
    group.attrs['PIXELS_DIM4_PERCENTILE_16'] = INDEX_PERCENTILE_16
    group.attrs['PIXELS_DIM4_PERCENTILE_84'] = INDEX_PERCENTILE_84
    group.attrs['PIXELS_DIM4_PERCENTILE_97_5'] = INDEX_PERCENTILE_97_5

    histogram_list = []
    keys = []
    map_areas = {}
    pixel_count = 0
    area_count = 0
    histogram_block_id = 1
    histogram_block_index = 0
    s3helper = S3Helper()
    bucket = s3helper.get_bucket(get_files_bucket())

    # Load the area details and keys
    load_map_areas(connection, map_areas, galaxy_id)
    for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)):
        # Ignore the key
        if key.key.endswith('/'):
            continue
        keys.append(key)

    histogram_group = group.create_group('histogram_blocks')
    histogram_data = histogram_group.create_dataset('block_1', (HISTOGRAM_BLOCK_SIZE,), dtype=data_type_pixel_histogram, compression='gzip')

    for block_x in get_chunks(dimension_x):
        for block_y in get_chunks(dimension_y):
            LOG.info('Starting {0} : {1}.'.format(block_x, block_y))

            size_x = get_size(block_x, dimension_x)
            size_y = get_size(block_y, dimension_y)
            # Create the arrays for this block
            data = numpy.empty((size_x, size_y, NUMBER_PARAMETERS, NUMBER_IMAGES), dtype=numpy.float)
            data.fill(numpy.NaN)
            data_pixel_details = group.create_dataset('pixel_details_{0}_{1}'.format(block_x, block_y), (size_x, size_y), dtype=data_type_pixel, compression='gzip')
            data_pixel_parameters = group.create_dataset('pixel_parameters_{0}_{1}'.format(block_x, block_y), (size_x, size_y, NUMBER_PARAMETERS), dtype=data_type_pixel_parameter, compression='gzip')
            data_pixel_filter = group.create_dataset('pixel_filters_{0}_{1}'.format(block_x, block_y), (size_x, size_y, dimension_z), dtype=data_type_pixel_filter, compression='gzip')
            data_pixel_histograms_grid = group.create_dataset('pixel_histograms_grid_{0}_{1}'.format(block_x, block_y), (size_x, size_y, NUMBER_PARAMETERS), dtype=data_type_block_details, compression='gzip')

            for key in keys:
                if not area_intersects_block(connection, key.key, block_x, block_y, map_areas):
                    LOG.info('Skipping {0}'.format(key.key))
                    continue

                # Now process the file
                start_time = time.time()
                LOG.info('Processing file {0}'.format(key.key))
                temp_file = os.path.join(POGS_TMP, 'temp.sed')
                key.get_contents_to_filename(temp_file)

                if is_gzip(temp_file):
                    f = gzip.open(temp_file, "rb")
                else:
                    f = open(temp_file, "r")

                area_id = None
                pxresult_id = None
                line_number = 0
                percentiles_next = False
                histogram_next = False
                skynet_next1 = False
                skynet_next2 = False
                skip_this_pixel = False
                map_pixel_results = {}
                list_filters = []
                try:
                    for line in f:
                        line_number += 1

                        if line.startswith(" ####### "):
                            # Clear all the maps and stuff
                            map_pixel_results = {}
                            list_filters = []

                            # Split the line to extract the data
                            values = line.split()
                            pointName = values[1]
                            pxresult_id = pointName[3:].rstrip()
                            (raw_x, raw_y, area_id) = get_pixel_result(connection, pxresult_id)
                            # The pixel could be out of this block as the cutting up is not uniform
                            if pixel_in_block(raw_x, raw_y, block_x, block_y):
                                # correct x & y for this block
                                x = raw_x - (block_x * MAX_X_Y_BLOCK)
                                y = raw_y - (block_y * MAX_X_Y_BLOCK)
                                #LOG.info('Processing pixel {0}:{1} or {2}:{3} - {4}:{5}'.format(raw_x, raw_y, x, y, block_x, block_y))
                                line_number = 0
                                percentiles_next = False
                                histogram_next = False
                                skynet_next1 = False
                                skynet_next2 = False
                                skip_this_pixel = False
                                pixel_count += 1
                            else:
                                #LOG.info('Skipping pixel {0}:{1} - {2}:{3}'.format(raw_x, raw_y, block_x, block_y))
                                skip_this_pixel = True
                        elif skip_this_pixel:
                            # Do nothing as we're skipping this pixel
                            pass
                        elif pxresult_id is not None:
                            if line_number == 2:
                                filter_names = line.split()
                                filter_layer = 0
                                for filter_name in filter_names:
                                    if filter_name != '#':
                                        data_pixel_filter.attrs[filter_name] = filter_layer
                                        filter_layer += 1
                            elif line_number == 3:
                                values = line.split()
                                for value in values:
                                    list_filters.append([float(value)])
                            elif line_number == 4:
                                filter_layer = 0
                                values = line.split()
                                for value in values:
                                    filter_description = list_filters[filter_layer]
                                    filter_description.append(float(value))
                                    filter_layer += 1
                            elif line_number == 9:
                                values = line.split()
                                map_pixel_results['i_sfh'] = float(values[0])
                                map_pixel_results['i_ir'] = float(values[1])
                                map_pixel_results['chi2'] = float(values[2])
                                map_pixel_results['redshift'] = float(values[3])
                            elif line_number == 11:
                                values = line.split()
                                data[x, y, INDEX_F_MU_SFH, INDEX_BEST_FIT] = float(values[0])
                                data[x, y, INDEX_F_MU_IR, INDEX_BEST_FIT] = float(values[1])
                                data[x, y, INDEX_MU_PARAMETER, INDEX_BEST_FIT] = float(values[2])
                                data[x, y, INDEX_TAU_V, INDEX_BEST_FIT] = float(values[3])
                                data[x, y, INDEX_SSFR_0_1GYR, INDEX_BEST_FIT] = float(values[4])
                                data[x, y, INDEX_M_STARS, INDEX_BEST_FIT] = float(values[5])
                                data[x, y, INDEX_L_DUST, INDEX_BEST_FIT] = float(values[6])
                                data[x, y, INDEX_T_W_BC, INDEX_BEST_FIT] = float(values[7])
                                data[x, y, INDEX_T_C_ISM, INDEX_BEST_FIT] = float(values[8])
                                data[x, y, INDEX_XI_C_TOT, INDEX_BEST_FIT] = float(values[9])
                                data[x, y, INDEX_XI_PAH_TOT, INDEX_BEST_FIT] = float(values[10])
                                data[x, y, INDEX_XI_MIR_TOT, INDEX_BEST_FIT] = float(values[11])
                                data[x, y, INDEX_XI_W_TOT, INDEX_BEST_FIT] = float(values[12])
                                data[x, y, INDEX_TAU_V_ISM, INDEX_BEST_FIT] = float(values[13])
                                data[x, y, INDEX_M_DUST, INDEX_BEST_FIT] = float(values[14])
                                data[x, y, INDEX_SFR_0_1GYR, INDEX_BEST_FIT] = float(values[15])
                            elif line_number == 13:
                                filter_layer = 0
                                values = line.split()
                                for value in values:
                                    filter_description = list_filters[filter_layer]
                                    if filter_layer < dimension_z:
                                        data_pixel_filter[x, y, filter_layer] = (
                                            filter_description[0],
                                            filter_description[1],
                                            float(value),
                                        )
                                        filter_layer += 1
                            elif line_number > 13:
                                if line.startswith("# ..."):
                                    parts = line.split('...')
                                    parameter_name = parts[1].strip()
                                    parameter_name_id = map_parameter_name[parameter_name]
                                    percentiles_next = False
                                    histogram_next = True
                                    skynet_next1 = False
                                    skynet_next2 = False
                                    histogram_list = []
                                elif line.startswith("#....percentiles of the PDF......"):
                                    percentiles_next = True
                                    histogram_next = False
                                    skynet_next1 = False
                                    skynet_next2 = False

                                    # Write out the histogram into a block for compression improvement
                                    data_pixel_histograms_grid[x, y, parameter_name_id - 1] = (histogram_block_id, histogram_block_index, len(histogram_list))
                                    for pixel_histogram_item in histogram_list:
                                        # Do we need a new block
                                        if histogram_block_index >= HISTOGRAM_BLOCK_SIZE:
                                            histogram_block_id += 1
                                            histogram_block_index = 0
                                            histogram_data = histogram_group.create_dataset('block_{0}'.format(histogram_block_id), (HISTOGRAM_BLOCK_SIZE,), dtype=data_type_pixel_histogram, compression='gzip')

                                        histogram_data[histogram_block_index] = (
                                            pixel_histogram_item[0],
                                            pixel_histogram_item[1],
                                        )
                                        histogram_block_index += 1
                                elif line.startswith(" #...theSkyNet"):
                                    percentiles_next = False
                                    histogram_next = False
                                    skynet_next1 = True
                                    skynet_next2 = False
                                elif line.startswith("# theSkyNet2"):
                                    percentiles_next = False
                                    histogram_next = False
                                    skynet_next1 = False
                                    skynet_next2 = True
                                elif percentiles_next:
                                    values = line.split()
                                    z = parameter_name_id - 1
                                    data[x, y, z, INDEX_PERCENTILE_2_5] = float(values[0])
                                    data[x, y, z, INDEX_PERCENTILE_16] = float(values[1])
                                    data[x, y, z, INDEX_PERCENTILE_50] = float(values[2])
                                    data[x, y, z, INDEX_PERCENTILE_84] = float(values[3])
                                    data[x, y, z, INDEX_PERCENTILE_97_5] = float(values[4])
                                    percentiles_next = False
                                elif histogram_next:
                                    values = line.split()
                                    hist_value = float(values[1])
                                    if hist_value > MIN_HIST_VALUE and not math.isnan(hist_value):
                                        histogram_list.append((float(values[0]), hist_value))
                                elif skynet_next1:
                                    values = line.split()
                                    data_pixel_details[x, y] = (
                                        pxresult_id,
                                        area_id,
                                        map_pixel_results['i_sfh'],
                                        map_pixel_results['i_ir'],
                                        map_pixel_results['chi2'],
                                        map_pixel_results['redshift'],
                                        float(values[0]),
                                        float(values[2]),
                                        float(values[3]),
                                        float(values[4]),
                                    )
                                    skynet_next1 = False
                                elif skynet_next2:
                                    # We have the highest bin probability values which require the parameter_id
                                    values = line.split()
                                    high_prob_bin = float(values[0]) if float(values[0]) is not None else numpy.NaN
                                    first_prob_bin = float(values[1]) if float(values[1]) is not None else numpy.NaN
                                    last_prob_bin = float(values[2]) if float(values[2]) is not None else numpy.NaN
                                    bin_step = float(values[3]) if float(values[3]) is not None else numpy.NaN
                                    z = parameter_name_id - 1
                                    data[x, y, z, INDEX_HIGHEST_PROB_BIN] = high_prob_bin
                                    data_pixel_parameters[x, y, z] = (
                                        first_prob_bin,
                                        last_prob_bin,
                                        bin_step,
                                    )
                                    skynet_next2 = False

                except IOError:
                    LOG.error('IOError after {0} lines'.format(line_number))
                finally:
                    f.close()

                area_count += 1
                LOG.info('{0:0.3f} seconds for file {1}. {2} of {3} areas.'.format(time.time() - start_time, key.key, area_count, area_total))

            group.create_dataset('pixels_{0}_{1}'.format(block_x, block_y), data=data, compression='gzip')

    LOG.info('histogram_blocks: {0}, x_blocks: {1}, y_blocks: {2}'.format(histogram_block_id, block_x, block_y))

    return pixel_count
Exemplo n.º 16
0
    for extension in ['fits', 'hdf5']:
        copy_files(old_name, new_name, run_id, galaxy_id, extension,
                   bucket_files)
    remove_files_folder(old_name, run_id, galaxy_id, bucket_files)

    for file_name in [
            'colour_1.png', 'colour_2.png', 'colour_3.png', 'colour_4.png',
            'ldust.png', 'm.png', 'mu.png', 'sfr.png', 'tn_colour_1.png'
    ]:
        copy_galaxy_images(old_name, new_name, run_id, galaxy_id, file_name,
                           bucket_galaxy_image)
    remove_galaxy_images_folder(old_name, run_id, galaxy_id,
                                bucket_galaxy_image)

    if DRY_RUN:
        LOG.info('Updating {0} to {1}'.format(galaxy_id, new_name))
    else:
        connection.execute(GALAXY.update().where(
            GALAXY.c.galaxy_id == galaxy_id).values(name=new_name))


for galaxy in connection.execute(select([GALAXY])):
    s3helper = S3Helper()
    bucket_files = s3helper.get_bucket(get_files_bucket())
    bucket_galaxy_image = s3helper.get_bucket(get_galaxy_image_bucket())

    if needs_fixing(galaxy[GALAXY.c.name]):
        fix_galaxy(galaxy, bucket_files, bucket_galaxy_image)

connection.close()
Exemplo n.º 17
0
    def assimilate_handler(self, wu, results, canonical_result):
        """
        Process the Results.
        """
        self.logDebug("Start of assimilate_handler for wu %d\n", wu.id)
        connection = None
        transaction = None
        try:
            if wu.canonical_result:
                out_file = self.get_file_path(canonical_result)
                self.area = None
                if out_file:
                    if os.path.isfile(out_file):
                        pass
                    else:
                        self.logDebug("File [%s] not found\n", out_file)
                        out_file = None

                if out_file:
                    self.logDebug("Reading File [%s]\n", out_file)
                    start = time.time()
                    connection = ENGINE.connect()
                    transaction = connection.begin()
                    resultCount = self._process_result(connection, out_file, wu)
                    if self.noinsert:
                        transaction.rollback()
                    else:
                        if not resultCount:
                            self.logCritical("No results were found in the output file\n")

                        if self._area_id is None:
                            self.logDebug("The Area was not found\n")
                        else:
                            connection.execute(AREA.update()
                                               .where(AREA.c.area_id == self._area_id)
                                               .values(workunit_id=wu.id, update_time=datetime.datetime.now()))

                            user_id_set = set()
                            for result in results:
                                if result.user and result.validate_state == boinc_db.VALIDATE_STATE_VALID:
                                    user_id = result.user.id
                                    if user_id not in user_id_set:
                                        user_id_set.add(user_id)

                            connection.execute(AREA_USER.delete().where(AREA_USER.c.area_id == self._area_id))
                            insert = AREA_USER.insert()
                            for user_id in user_id_set:
                                connection.execute(insert, area_id=self._area_id, userid=user_id)

                            # Copy the file to S3
                            s3helper = S3Helper()
                            s3helper.add_file_to_bucket(get_files_bucket(),
                                                        get_key_sed(self._galaxy_name, self._run_id, self._galaxy_id, self._area_id),
                                                        out_file,
                                                        reduced_redundancy=True)

                        time_taken = '{0:.2f}'.format(time.time() - start)
                        self.logDebug("Saving %d results for workunit %d in %s seconds\n", resultCount, wu.id, time_taken)
                        transaction.commit()
                    connection.close()
                else:
                    self.logCritical("The output file was not found\n")
            else:
                self.logDebug("No canonical_result for workunit\n")
                self.report_errors(wu)
        except:
            if transaction is not None:
                transaction.rollback()
            if connection is not None:
                connection.close()
            print "Unexpected error:", sys.exc_info()[0]
            traceback.print_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2])
            self.logCritical("Unexpected error occurred, retrying...\n")
            return -1

        return 0
Exemplo n.º 18
0
    def process_file(self, registration):
        """
        Process a registration.

        :param registration:
        """
        self._filename = registration[REGISTER.c.filename]
        self._galaxy_name = registration[REGISTER.c.galaxy_name]
        self._galaxy_type = registration[REGISTER.c.galaxy_type]
        self._priority = registration[REGISTER.c.priority]
        self._redshift = registration[REGISTER.c.redshift]
        self._run_id = registration[REGISTER.c.run_id]
        self._sigma = registration[REGISTER.c.sigma]
        self._sigma_filename = registration[REGISTER.c.sigma_filename]

        # Have we files that we can use for this?
        self._rounded_redshift = self._get_rounded_redshift()
        if self._rounded_redshift is None:
            LOG.error('No models matching the redshift of %.4f', self._redshift)
            return 0

        self._hdu_list = pyfits.open(self._filename, memmap=True)
        self._layer_count = len(self._hdu_list)

        # Do we need to open and sort the S/N Ratio file
        if self._sigma_filename is not None:
            self._sigma = 0.0
            self._signal_noise_hdu = pyfits.open(self._sigma_filename, memmap=True)
            if self._layer_count != len(self._signal_noise_hdu):
                LOG.error('The layer counts do not match %d vs %d', self._layer_count, len(self._signal_noise_hdu))
                return 0, 0
        else:
            self._sigma = float(self._sigma)

        self._end_y = self._hdu_list[0].data.shape[0]
        self._end_x = self._hdu_list[0].data.shape[1]

        LOG.info("Image dimensions: %(x)d x %(y)d x %(z)d => %(pix).2f Mpixels" % {'x': self._end_x, 'y': self._end_y, 'z': self._layer_count, 'pix': self._end_x * self._end_y / 1000000.0})

        # Get the flops estimate amd cobblestone factor
        run = self._connection.execute(select([RUN]).where(RUN.c.run_id == self._run_id)).first()
        self._fpops_est_per_pixel = run[RUN.c.fpops_est]
        self._cobblestone_scaling_factor = run[RUN.c.cobblestone_factor]

        # Create and save the object
        datetime_now = datetime.now()
        result = self._connection.execute(GALAXY.insert().values(name=self._galaxy_name,
                                                                 dimension_x=self._end_x,
                                                                 dimension_y=self._end_y,
                                                                 dimension_z=self._layer_count,
                                                                 redshift=self._redshift,
                                                                 sigma=self._sigma,
                                                                 create_time=datetime_now,
                                                                 image_time=datetime_now,
                                                                 galaxy_type=self._galaxy_type,
                                                                 ra_cent=0,
                                                                 dec_cent=0,
                                                                 pixel_count=0,
                                                                 pixels_processed=0,
                                                                 run_id=self._run_id))
        self._galaxy_id = result.inserted_primary_key[0]
        LOG.info("Writing %s to database", self._galaxy_name)

        # Store the fits header
        self._store_fits_header()

        # Get the filters we're using for this run and sort the layers
        self._get_filters_sort_layers()

        # Build the template file we need if necessary
        self._build_template_file()

        # Copy the filter and model files we need
        self._copy_important_files()

        # Now break up the galaxy into chunks
        self._break_up_galaxy()
        self._connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == self._galaxy_id).values(pixel_count=self._pixel_count))

        LOG.info('Building the images')
        galaxy_file_name = get_galaxy_file_name(self._galaxy_name, self._run_id, self._galaxy_id)
        s3helper = S3Helper()
        image = FitsImage(self._connection)
        image.build_image(self._filename, galaxy_file_name, self._galaxy_id, get_galaxy_image_bucket())

        # Copy the fits file to S3 - renamed to make it unique
        bucket_name = get_files_bucket()
        s3helper.add_file_to_bucket(bucket_name, get_key_fits(self._galaxy_name, self._run_id, self._galaxy_id), self._filename)
        if self._sigma_filename is not None:
            s3helper.add_file_to_bucket(bucket_name, get_key_sigma_fits(self._galaxy_name, self._run_id, self._galaxy_id), self._sigma_filename)

        return self._work_units_added, self._pixel_count
Exemplo n.º 19
0
    def assimilate_handler(self, wu, results, canonical_result):
        """
        Process the Results.
        """
        self.logDebug("Start of assimilate_handler for wu %d\n", wu.id)
        connection = None
        transaction = None
        try:
            if wu.canonical_result:
                out_file = self.get_file_path(canonical_result)
                self.area = None
                if out_file:
                    if os.path.isfile(out_file):
                        pass
                    else:
                        self.logDebug("File [%s] not found\n", out_file)
                        out_file = None

                if out_file:
                    self.logDebug("Reading File [%s]\n", out_file)
                    start = time.time()
                    connection = ENGINE.connect()
                    transaction = connection.begin()
                    resultCount = self._process_result(connection, out_file, wu)
                    if self.noinsert:
                        transaction.rollback()
                    else:
                        if not resultCount:
                            self.logCritical("No results were found in the output file\n")

                        if self._area_id is None:
                            self.logDebug("The Area was not found\n")
                        else:
                            connection.execute(AREA.update()
                                               .where(AREA.c.area_id == self._area_id)
                                               .values(workunit_id=wu.id, update_time=datetime.datetime.now()))

                            user_id_set = set()
                            for result in results:
                                if result.user and result.validate_state == boinc_db.VALIDATE_STATE_VALID:
                                    user_id = result.user.id
                                    if user_id not in user_id_set:
                                        user_id_set.add(user_id)

                            connection.execute(AREA_USER.delete().where(AREA_USER.c.area_id == self._area_id))
                            insert_area_user = AREA_USER.insert()
                            insert_galaxy_user = GALAXY_USER.insert().prefix_with('IGNORE')
                            for user_id in user_id_set:
                                connection.execute(insert_area_user, area_id=self._area_id, userid=user_id)
                                # self.logDebug("Inserting row into galaxy_user for userid: %d galaxy_id: %d\n", user_id, self._galaxy_id)
                                connection.execute(insert_galaxy_user, galaxy_id=self._galaxy_id, userid=user_id)

                            # Copy the file to S3
                            s3helper = S3Helper()
                            s3helper.add_file_to_bucket(get_files_bucket(),
                                                        get_key_sed(self._galaxy_name, self._run_id, self._galaxy_id, self._area_id),
                                                        out_file,
                                                        reduced_redundancy=True)

                        time_taken = '{0:.2f}'.format(time.time() - start)
                        self.logDebug("Saving %d results for workunit %d in %s seconds\n", resultCount, wu.id, time_taken)
                        transaction.commit()
                    connection.close()
                else:
                    self.logCritical("The output file was not found\n")
            else:
                self.logDebug("No canonical_result for workunit\n")
                self.report_errors(wu)
        except:
            if transaction is not None:
                transaction.rollback()
            if connection is not None:
                connection.close()
            print "Unexpected error:", sys.exc_info()[0]
            traceback.print_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2])
            self.logCritical("Unexpected error occurred, retrying...\n")
            return -1

        return 0
Exemplo n.º 20
0
def store_pixels(connection, galaxy_file_name, group, dimension_x, dimension_y, dimension_z, area_total, output_directory, map_parameter_name):
    """
    Store the pixel data
    """
    LOG.info('Storing the pixel data for {0} - {1} areas to process'.format(galaxy_file_name, area_total))
    data = numpy.empty((dimension_x, dimension_y, NUMBER_PARAMETERS, NUMBER_IMAGES), dtype=numpy.float)
    data.fill(numpy.NaN)
    data_pixel_details = group.create_dataset('pixel_details', (dimension_x, dimension_y), dtype=data_type_pixel, compression='gzip')
    data_pixel_parameters = group.create_dataset('pixel_parameters', (dimension_x, dimension_y, NUMBER_PARAMETERS), dtype=data_type_pixel_parameter, compression='gzip')
    data_pixel_filter = group.create_dataset('pixel_filters', (dimension_x, dimension_y, dimension_z), dtype=data_type_pixel_filter, compression='gzip')
    data_pixel_histograms_grid = group.create_dataset('pixel_histograms_grid', (dimension_x, dimension_y, NUMBER_PARAMETERS), dtype=data_type_block_details, compression='gzip')

    histogram_group = group.create_group('histogram_blocks')
    histogram_list = []
    pixel_count = 0
    area_count = 0
    block_id = 1
    block_index = 0
    histogram_data = histogram_group.create_dataset('block_1', (BLOCK_SIZE,), dtype=data_type_pixel_histogram, compression='gzip')

    s3helper = S3Helper()
    bucket = s3helper.get_bucket(get_files_bucket())
    for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)):
        # Ignore the key
        if key.key.endswith('/'):
            continue

        # Now process the file
        start_time = time.time()
        LOG.info('Processing file {0}'.format(key.key))
        temp_file = os.path.join(output_directory, 'temp.sed')
        key.get_contents_to_filename(temp_file)

        if is_gzip(temp_file):
            f = gzip.open(temp_file, "rb")
        else:
            f = open(temp_file, "r")

        area_id = None
        pxresult_id = None
        line_number = 0
        percentiles_next = False
        histogram_next = False
        skynet_next1 = False
        skynet_next2 = False
        map_pixel_results = {}
        list_filters = []
        try:
            for line in f:
                line_number += 1

                if line.startswith(" ####### "):
                    # Clear all the maps and stuff
                    map_pixel_results = {}
                    list_filters = []

                    # Split the line to extract the data
                    values = line.split()
                    pointName = values[1]
                    pxresult_id = pointName[3:].rstrip()
                    (x, y, area_id) = get_pixel_result(connection, pxresult_id)
                    line_number = 0
                    percentiles_next = False
                    histogram_next = False
                    skynet_next1 = False
                    skynet_next2 = False
                    pixel_count += 1
                elif pxresult_id is not None:
                    if line_number == 2:
                        filter_names = line.split()
                        filter_layer = 0
                        for filter_name in filter_names:
                            if filter_name != '#':
                                data_pixel_filter.attrs[filter_name] = filter_layer
                                filter_layer += 1
                    elif line_number == 3:
                        values = line.split()
                        for value in values:
                            list_filters.append([float(value)])
                    elif line_number == 4:
                        filter_layer = 0
                        values = line.split()
                        for value in values:
                            filter_description = list_filters[filter_layer]
                            filter_description.append(float(value))
                            filter_layer += 1
                    elif line_number == 9:
                        values = line.split()
                        map_pixel_results['i_sfh'] = float(values[0])
                        map_pixel_results['i_ir'] = float(values[1])
                        map_pixel_results['chi2'] = float(values[2])
                        map_pixel_results['redshift'] = float(values[3])
                    elif line_number == 11:
                        values = line.split()
                        data[x, y, INDEX_F_MU_SFH, INDEX_BEST_FIT] = float(values[0])
                        data[x, y, INDEX_F_MU_IR, INDEX_BEST_FIT] = float(values[1])
                        data[x, y, INDEX_MU_PARAMETER, INDEX_BEST_FIT] = float(values[2])
                        data[x, y, INDEX_TAU_V, INDEX_BEST_FIT] = float(values[3])
                        data[x, y, INDEX_SSFR_0_1GYR, INDEX_BEST_FIT] = float(values[4])
                        data[x, y, INDEX_M_STARS, INDEX_BEST_FIT] = float(values[5])
                        data[x, y, INDEX_L_DUST, INDEX_BEST_FIT] = float(values[6])
                        data[x, y, INDEX_T_W_BC, INDEX_BEST_FIT] = float(values[7])
                        data[x, y, INDEX_T_C_ISM, INDEX_BEST_FIT] = float(values[8])
                        data[x, y, INDEX_XI_C_TOT, INDEX_BEST_FIT] = float(values[9])
                        data[x, y, INDEX_XI_PAH_TOT, INDEX_BEST_FIT] = float(values[10])
                        data[x, y, INDEX_XI_MIR_TOT, INDEX_BEST_FIT] = float(values[11])
                        data[x, y, INDEX_XI_W_TOT, INDEX_BEST_FIT] = float(values[12])
                        data[x, y, INDEX_TAU_V_ISM, INDEX_BEST_FIT] = float(values[13])
                        data[x, y, INDEX_M_DUST, INDEX_BEST_FIT] = float(values[14])
                        data[x, y, INDEX_SFR_0_1GYR, INDEX_BEST_FIT] = float(values[15])
                    elif line_number == 13:
                        filter_layer = 0
                        values = line.split()
                        for value in values:
                            filter_description = list_filters[filter_layer]
                            if filter_layer < dimension_z:
                                data_pixel_filter[x, y, filter_layer] = (
                                    filter_description[0],
                                    filter_description[1],
                                    float(value),
                                )
                                filter_layer += 1
                    elif line_number > 13:
                        if line.startswith("# ..."):
                            parts = line.split('...')
                            parameter_name = parts[1].strip()
                            parameter_name_id = map_parameter_name[parameter_name]
                            percentiles_next = False
                            histogram_next = True
                            skynet_next1 = False
                            skynet_next2 = False
                            histogram_list = []
                        elif line.startswith("#....percentiles of the PDF......"):
                            percentiles_next = True
                            histogram_next = False
                            skynet_next1 = False
                            skynet_next2 = False

                            # Write out the histogram into a block for compression improvement
                            data_pixel_histograms_grid[x, y, parameter_name_id - 1] = (block_id, block_index, len(histogram_list))
                            for pixel_histogram_item in histogram_list:
                                # Do we need a new block
                                if block_index >= BLOCK_SIZE:
                                    block_id += 1
                                    block_index = 0
                                    histogram_data = histogram_group.create_dataset('block_{0}'.format(block_id), (BLOCK_SIZE,), dtype=data_type_pixel_histogram, compression='gzip')

                                histogram_data[block_index] = (
                                    pixel_histogram_item[0],
                                    pixel_histogram_item[1],
                                )
                                block_index += 1
                        elif line.startswith(" #...theSkyNet"):
                            percentiles_next = False
                            histogram_next = False
                            skynet_next1 = True
                            skynet_next2 = False
                        elif line.startswith("# theSkyNet2"):
                            percentiles_next = False
                            histogram_next = False
                            skynet_next1 = False
                            skynet_next2 = True
                        elif percentiles_next:
                            values = line.split()
                            z = parameter_name_id - 1
                            data[x, y, z, INDEX_PERCENTILE_2_5] = float(values[0])
                            data[x, y, z, INDEX_PERCENTILE_16] = float(values[1])
                            data[x, y, z, INDEX_PERCENTILE_50] = float(values[2])
                            data[x, y, z, INDEX_PERCENTILE_84] = float(values[3])
                            data[x, y, z, INDEX_PERCENTILE_97_5] = float(values[4])
                            percentiles_next = False
                        elif histogram_next:
                            values = line.split()
                            hist_value = float(values[1])
                            if hist_value > MIN_HIST_VALUE and not math.isnan(hist_value):
                                histogram_list.append((float(values[0]), hist_value))
                        elif skynet_next1:
                            values = line.split()
                            data_pixel_details[x, y] = (
                                pxresult_id,
                                area_id,
                                map_pixel_results['i_sfh'],
                                map_pixel_results['i_ir'],
                                map_pixel_results['chi2'],
                                map_pixel_results['redshift'],
                                float(values[0]),
                                float(values[2]),
                                float(values[3]),
                                float(values[4]),
                            )
                            skynet_next1 = False
                        elif skynet_next2:
                            # We have the highest bin probability values which require the parameter_id
                            values = line.split()
                            high_prob_bin = float(values[0]) if float(values[0]) is not None else numpy.NaN
                            first_prob_bin = float(values[1]) if float(values[1]) is not None else numpy.NaN
                            last_prob_bin = float(values[2]) if float(values[2]) is not None else numpy.NaN
                            bin_step = float(values[3]) if float(values[3]) is not None else numpy.NaN
                            z = parameter_name_id - 1
                            data[x, y, z, INDEX_HIGHEST_PROB_BIN] = high_prob_bin
                            data_pixel_parameters[x, y, z] = (
                                first_prob_bin,
                                last_prob_bin,
                                bin_step,
                            )
                            skynet_next2 = False

        except IOError:
            LOG.error('IOError after {0} lines'.format(line_number))
        finally:
            f.close()

        area_count += 1
        LOG.info('{0:0.3f} seconds for file {1}. {2} of {3} areas.'.format(time.time() - start_time, key.key, area_count, area_total))

    pixel_dataset = group.create_dataset('pixels', data=data, compression='gzip')
    pixel_dataset.attrs['DIM3_F_MU_SFH'] = INDEX_F_MU_SFH
    pixel_dataset.attrs['DIM3_F_MU_IR'] = INDEX_F_MU_IR
    pixel_dataset.attrs['DIM3_MU_PARAMETER'] = INDEX_MU_PARAMETER
    pixel_dataset.attrs['DIM3_TAU_V'] = INDEX_TAU_V
    pixel_dataset.attrs['DIM3_SSFR_0_1GYR'] = INDEX_SSFR_0_1GYR
    pixel_dataset.attrs['DIM3_M_STARS'] = INDEX_M_STARS
    pixel_dataset.attrs['DIM3_L_DUST'] = INDEX_L_DUST
    pixel_dataset.attrs['DIM3_T_C_ISM'] = INDEX_T_C_ISM
    pixel_dataset.attrs['DIM3_T_W_BC'] = INDEX_T_W_BC
    pixel_dataset.attrs['DIM3_XI_C_TOT'] = INDEX_XI_C_TOT
    pixel_dataset.attrs['DIM3_XI_PAH_TOT'] = INDEX_XI_PAH_TOT
    pixel_dataset.attrs['DIM3_XI_MIR_TOT'] = INDEX_XI_MIR_TOT
    pixel_dataset.attrs['DIM3_XI_W_TOT'] = INDEX_XI_W_TOT
    pixel_dataset.attrs['DIM3_TAU_V_ISM'] = INDEX_TAU_V_ISM
    pixel_dataset.attrs['DIM3_M_DUST'] = INDEX_M_DUST
    pixel_dataset.attrs['DIM3_SFR_0_1GYR'] = INDEX_SFR_0_1GYR

    pixel_dataset.attrs['DIM4_BEST_FIT'] = INDEX_BEST_FIT
    pixel_dataset.attrs['DIM4_PERCENTILE_50'] = INDEX_PERCENTILE_50
    pixel_dataset.attrs['DIM4_HIGHEST_PROB_BIN'] = INDEX_HIGHEST_PROB_BIN
    pixel_dataset.attrs['DIM4_PERCENTILE_2_5'] = INDEX_PERCENTILE_2_5
    pixel_dataset.attrs['DIM4_PERCENTILE_16'] = INDEX_PERCENTILE_16
    pixel_dataset.attrs['DIM4_PERCENTILE_84'] = INDEX_PERCENTILE_84
    pixel_dataset.attrs['DIM4_PERCENTILE_97_5'] = INDEX_PERCENTILE_97_5

    LOG.info('Created {0} blocks'.format(block_id))

    return pixel_count
Exemplo n.º 21
0
    :return:
    """
    old_name = galaxy[GALAXY.c.name]
    new_name = old_name[:-1]
    galaxy_id = galaxy[GALAXY.c.galaxy_id]
    run_id = galaxy[GALAXY.c.run_id]
    LOG.info('Fixing {0}({1}) t0 {2}'.format(old_name, galaxy_id, new_name))
    for extension in ['fits', 'hdf5']:
        copy_files(old_name, new_name, run_id, galaxy_id, extension, bucket_files)
    remove_files_folder(old_name, run_id, galaxy_id, bucket_files)

    for file_name in ['colour_1.png', 'colour_2.png', 'colour_3.png', 'colour_4.png', 'ldust.png', 'm.png', 'mu.png', 'sfr.png', 'tn_colour_1.png']:
        copy_galaxy_images(old_name, new_name, run_id, galaxy_id, file_name, bucket_galaxy_image)
    remove_galaxy_images_folder(old_name, run_id, galaxy_id, bucket_galaxy_image)

    if DRY_RUN:
        LOG.info('Updating {0} to {1}'.format(galaxy_id, new_name))
    else:
        connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(name=new_name))


for galaxy in connection.execute(select([GALAXY])):
    s3helper = S3Helper()
    bucket_files = s3helper.get_bucket(get_files_bucket())
    bucket_galaxy_image = s3helper.get_bucket(get_galaxy_image_bucket())

    if needs_fixing(galaxy[GALAXY.c.name]):
        fix_galaxy(galaxy, bucket_files, bucket_galaxy_image)

connection.close()