Exemplo n.º 1
0
def get_hdf5_from_s3(galaxy, directory):
    bucket_name = get_saved_files_bucket()
    key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id],
                       galaxy[GALAXY.c.galaxy_id])
    s3_helper = S3Helper()
    if s3_helper.file_exists(bucket_name, key):
        if s3_helper.file_archived(bucket_name, key):
            # file is archived
            if s3_helper.file_restoring(bucket_name, key):
                # if file is restoring, just need to wait for it
                LOG.info(
                    'Galaxy {0} ({1}) is still restoring from glacier'.format(
                        galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id]))
            else:
                # if file is not restoring, need to request.
                LOG.info('Making request for archived galaxy {0} ({1})'.format(
                    galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id]))
                s3_helper.restore_archived_file(bucket_name, key, days=10)
        else:
            # file is not archived
            LOG.info('Galaxy {0} ({1}) is available in s3'.format(
                galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id]))
            filename = os.path.join(
                directory,
                get_galaxy_file_name(galaxy[GALAXY.c.name],
                                     galaxy[GALAXY.c.run_id],
                                     galaxy[GALAXY.c.galaxy_id])) + '.hdf5'
            s3_helper.get_file_from_bucket(bucket_name=bucket_name,
                                           key_name=key,
                                           file_name=filename)

    else:
        LOG.info('The key {0} in bucket {1} does not exist'.format(
            key, bucket_name))
Exemplo n.º 2
0
def delete_galaxy(connection, galaxy_ids):
    try:
        for galaxy_id_str in galaxy_ids:
            transaction = connection.begin()
            galaxy_id1 = int(galaxy_id_str)
            galaxy = connection.execute(
                select([GALAXY
                        ]).where(GALAXY.c.galaxy_id == galaxy_id1)).first()
            if galaxy is None:
                LOG.info('Error: Galaxy with galaxy_id of %d was not found',
                         galaxy_id1)
            else:
                LOG.info('Deleting Galaxy with galaxy_id of %d - %s',
                         galaxy_id1, galaxy[GALAXY.c.name])
                area_count = connection.execute(
                    select([func.count(AREA.c.area_id)
                            ]).where(AREA.c.galaxy_id == galaxy[
                                GALAXY.c.galaxy_id])).first()[0]
                counter = 1

                for area_id1 in connection.execute(
                        select(
                            [AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[
                                GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)):
                    LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format(
                        galaxy_id_str, area_id1[0], counter, area_count))
                    connection.execute(PIXEL_RESULT.delete().where(
                        PIXEL_RESULT.c.area_id == area_id1[0]))

                    # Give the rest of the world a chance to access the database
                    time.sleep(0.1)
                    counter += 1

                # Now empty the bucket
                s3helper = S3Helper()
                bucket = s3helper.get_bucket(get_files_bucket())
                galaxy_file_name = get_galaxy_file_name(
                    galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id],
                    galaxy[GALAXY.c.galaxy_id])
                for key in bucket.list(
                        prefix='{0}/sed/'.format(galaxy_file_name)):
                    # Ignore the key
                    if key.key.endswith('/'):
                        continue

                    bucket.delete_key(key)

                # Now the folder
                key = Key(bucket)
                key.key = '{0}/sed/'.format(galaxy_file_name)
                bucket.delete_key(key)

            LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id1)
            connection.execute(
                GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values(
                    status_id=DELETED, status_time=datetime.datetime.now()))
            transaction.commit()

    except Exception:
        LOG.exception('Major error')
Exemplo n.º 3
0
    def _build_image_asinh(self, fits_file_name, galaxy_key_stub, centre,
                           galaxy_id, bucket_name):
        """
        Build Three Colour Images using the asinh() function.
        :param fits_file_name:
        :param galaxy_key_stub:
        :param centre:
        :param galaxy_id:
        :param bucket_name:
        """
        hdulist = pyfits.open(fits_file_name, memmap=True)

        hdu = hdulist[0]
        width = hdu.header['NAXIS1']
        height = hdu.header['NAXIS2']

        (image1_filters, image2_filters, image3_filters,
         image4_filters) = self._get_image_filters(hdulist)

        # Create Three Colour Images
        image1 = ImageBuilder(bucket_name, 1,
                              get_colour_image_key(galaxy_key_stub, 1),
                              get_thumbnail_colour_image_key(
                                  galaxy_key_stub,
                                  1), image1_filters[0], image1_filters[1],
                              image1_filters[2], width, height, centre,
                              self._connection, galaxy_id)  # i, r, g
        image2 = ImageBuilder(bucket_name, 2,
                              get_colour_image_key(galaxy_key_stub, 2), None,
                              image2_filters[0], image2_filters[1],
                              image2_filters[2], width, height, centre,
                              self._connection, galaxy_id)  # r, g, NUV
        image3 = ImageBuilder(bucket_name, 3,
                              get_colour_image_key(galaxy_key_stub, 3), None,
                              image3_filters[0], image3_filters[1],
                              image3_filters[2], width, height, centre,
                              self._connection, galaxy_id)  # 3.6, g, NUV
        image4 = ImageBuilder(bucket_name, 4,
                              get_colour_image_key(galaxy_key_stub, 4), None,
                              image4_filters[0], image4_filters[1],
                              image4_filters[2], width, height, centre,
                              self._connection, galaxy_id)  # 22, r, NUV
        images = [image1, image2, image3, image4]

        for hdu in hdulist:
            filter_band = hdu.header['MAGPHYSI']
            for image in images:
                image.set_data(filter_band, hdu.data)

        s3helper = S3Helper()
        for image in images:
            if image.is_valid():
                image.save_image(s3helper)
            else:
                print 'not valid'

        hdulist.close()
Exemplo n.º 4
0
def migrate_files(connection):
    """
    Migrate the various files to S3
    """
    LOG.info('Migrating the files')

    s3helper = S3Helper()

    migrate_image_files(connection, get_galaxy_image_bucket(), get_files_bucket(), s3helper)
    migrate_hdf5_files(connection, get_files_bucket(), s3helper)
Exemplo n.º 5
0
def remigrate_files(connection):
    """
    Migrate the various files to S3
    """
    LOG.info('Migrating the files')

    s3helper = S3Helper()
    files_bucket = get_files_bucket()
    bad_galaxies = find_bad_hdf5_files(s3helper, files_bucket)
    migrate_hdf5_files(bad_galaxies, connection, files_bucket, s3helper)
Exemplo n.º 6
0
def get_hdf5_size_data():
    """
    Get the HDF5 data we need
    :return:
    """
    # Get the list of files
    LOG.info('Getting the hdf5 files from the database')
    data = {}
    set_names = set()

    for entry in connection.execute(select([HDF5_SIZE])):
        key_size_mb = entry[HDF5_SIZE.c.size] / 1000000.0
        LOG.info('Processing {0} {1} {2}'.format(entry[HDF5_SIZE.c.name],
                                                 entry[HDF5_SIZE.c.size],
                                                 key_size_mb))
        run_id = entry[HDF5_SIZE.c.run_id]

        # Get the array
        row_data = data.get(run_id)
        if row_data is None:
            row_data = []
            data[run_id] = row_data

        row_data.append(key_size_mb)
        set_names.add(entry[HDF5_SIZE.c.name])

    LOG.info('Getting the hdf5 files from S3')
    s3helper = S3Helper()
    bucket = s3helper.get_bucket(get_files_bucket())
    insert_hdf5 = HDF5_SIZE.insert()
    for prefix in bucket.list(prefix='', delimiter='/'):
        prefix_name = prefix.name[:-1]
        if prefix_name not in set_names:
            key = bucket.get_key('{0}/{0}.hdf5'.format(prefix_name))
            if key is not None:
                key_size_mb = key.size / 1000000.0
                LOG.info('Processing {0} {1} {2}'.format(
                    key.name, key.size, key_size_mb))
                elements = prefix.name.split('__')
                run_id = int(elements[1])

                connection.execute(insert_hdf5,
                                   name=prefix_name,
                                   size=key.size,
                                   run_id=run_id)

                # Get the array
                row_data = data.get(run_id)
                if row_data is None:
                    row_data = []
                    data[run_id] = row_data

                row_data.append(key_size_mb)

    return data
Exemplo n.º 7
0
def remove_s3_files(galaxy_name, run_id, galaxy_id):
    """
    Remove the files from S3

    :return:
    """
    s3_helper = S3Helper()
    remove_files_with_key(s3_helper.get_bucket(get_galaxy_image_bucket()),
                          galaxy_name, run_id, galaxy_id)
    remove_files_with_key(s3_helper.get_bucket(get_files_bucket()),
                          galaxy_name, run_id, galaxy_id)
Exemplo n.º 8
0
def access_s3():
    """
    Check we can access the archive bucket
    :return:
    """
    try:
        s3helper = S3Helper()
        bucket = s3helper.get_bucket(get_archive_bucket())
        LOG.info('Access S3 bucket name: {0}'.format(bucket.name))
    except Exception:
        LOG.exception('check_database_connection')
        return False

    return True
Exemplo n.º 9
0
def store_files(hdf5_dir):
    """
    Scan a directory for files and send them to the archive

    :param hdf5_dir:  the directory to scan
    :return:
    """
    LOG.info('Directory: %s', hdf5_dir)

    # Get the work units still being processed
    ENGINE = create_engine(DB_LOGIN)
    connection = ENGINE.connect()

    files = os.path.join(hdf5_dir, '*.hdf5')
    file_count = 0

    try:
        s3helper = S3Helper()
        bucket_name = get_files_bucket()

        for file_name in glob.glob(files):
            size = os.path.getsize(file_name)
            galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name)
            if galaxy_id >= 0:
                key = '{0}/{0}.hdf5'.format(galaxy_name)
                LOG.info('File name: %s', file_name)
                LOG.info('File size: %d', size)
                LOG.info('Bucket:    %s', bucket_name)
                LOG.info('Key:       %s', key)

                s3helper.add_file_to_bucket(bucket_name, key, file_name)
                file_count += 1
                os.remove(file_name)
                connection.execute(GALAXY.update().where(
                    GALAXY.c.galaxy_id == galaxy_id).values(
                        status_id=STORED, status_time=datetime.datetime.now()))

            else:
                LOG.error('File name: %s', file_name)
                LOG.error('File size: %d', size)
                LOG.error('Could not get the galaxy id')

    except Exception:
        LOG.exception('Major error')

    finally:
        connection.close()

    return file_count
Exemplo n.º 10
0
def access_s3():
    """
    Check we can access the archive bucket
    :return:
    """
    try:
        LOG.info('Testing S3 access')
        s3helper = S3Helper()
        bucket = s3helper.get_bucket(get_archive_bucket())
        LOG.info('Access S3 bucket name: {0}'.format(bucket.name))
    except Exception:
        LOG.exception('access_s3')
        return False

    return True
Exemplo n.º 11
0
def init(project, template):
    project_fname = os.path.join(find_project_root(), 'project_info.json')
    logger = logging.getLogger(__name__)
    # write empty template file to fill in manually
    if template:
        template = {"name": project, "keywords": []}
        with open(project_fname, 'w') as f:
            json.dump(template, f, cls=JSONEncoder, indent=4)
        logger.info(
            'Successfully wrote empty template file "{}". Please fill in values manually.'
            .format(project_fname))
        return
    # sync project info
    s3_helper = S3Helper()
    s3_helper.sync_project_info(project)
def original_image_checked_ami():
    """
    We're running in the AMI instance - so do the actual work

    Check the newly created images to make sure the images have been created
    :return:
    """
    # Connect to the database - the login string is set in the database package
    engine = create_engine(DB_LOGIN)
    connection = engine.connect()

    s3helper = S3Helper()
    try:
        # Look in the database for the galaxies
        galaxy_ids = []
        for galaxy in connection.execute(
                select([GALAXY]).where(
                    and_(GALAXY.c.original_image_checked == None,
                         GALAXY.c.pixel_count > 0)).order_by(
                             GALAXY.c.galaxy_id)):
            galaxy_ids.append(galaxy[GALAXY.c.galaxy_id])

        for galaxy_id in galaxy_ids:
            galaxy = connection.execute(
                select([GALAXY
                        ]).where(GALAXY.c.galaxy_id == galaxy_id)).first()

            if not image_files_exist(galaxy[GALAXY.c.name],
                                     galaxy[GALAXY.c.run_id],
                                     galaxy[GALAXY.c.galaxy_id], s3helper):
                mark_as_checked = regenerated_original_images(
                    galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id],
                    galaxy[GALAXY.c.galaxy_id], s3helper, connection)
            else:
                mark_as_checked = True

            if mark_as_checked:
                connection.execute(GALAXY.update().where(
                    GALAXY.c.galaxy_id == galaxy_id).values(
                        original_image_checked=datetime.datetime.now()))

    except Exception:
        LOG.exception('Major error')

    finally:
        connection.close()
Exemplo n.º 13
0
def process_ami():
    """
    We're running on the AMI instance - so actually do the work

    Find the files and move them to S3
    :return:
    """
    delete_delay_ago = datetime.datetime.now() - datetime.timedelta(
        days=float(ARC_BOINC_STATISTICS_DELAY))
    LOG.info('delete_delay_ago: {0}'.format(delete_delay_ago))
    s3helper = S3Helper()
    for directory_name in glob.glob(
            os.path.join(POGS_BOINC_PROJECT_ROOT, 'html/stats_archive/*')):
        if os.path.isdir(directory_name):
            directory_mtime = datetime.datetime.fromtimestamp(
                os.path.getmtime(directory_name))
            LOG.info('directory: {0}, mtime: {1}'.format(
                directory_name, directory_mtime))
            if directory_mtime < delete_delay_ago:
                move_files_to_s3(s3helper, directory_name)
Exemplo n.º 14
0
def archive_boinc_db_purge():
    """
    Clean up the BOINC DB Purge records

    Find the files and move them to S3
    :return:
    """
    delete_delay_ago = datetime.datetime.now() - datetime.timedelta(
        days=float(ARC_BOINC_STATISTICS_DELAY))
    LOG.info('delete_delay_ago: {0}'.format(delete_delay_ago))
    s3helper = S3Helper()
    for directory_name in glob.glob(
            os.path.join(POGS_BOINC_PROJECT_ROOT, 'archives/*')):
        if os.path.isdir(directory_name):
            directory_mtime = datetime.datetime.fromtimestamp(
                os.path.getmtime(directory_name))
            LOG.info('directory: {0}, mtime: {1}'.format(
                directory_name, directory_mtime))
            if directory_mtime < delete_delay_ago:
                move_files_to_s3(s3helper, directory_name)
Exemplo n.º 15
0
def store_files(connection, modulus, remainder):
    """
    Scan a directory for files and send them to the archive

    """
    LOG.info('Directory: %s', HDF5_OUTPUT_DIRECTORY)

    to_store_dir = os.path.join(HDF5_OUTPUT_DIRECTORY, 'to_store')
    files = os.path.join(to_store_dir, '*.hdf5')
    file_count = 0

    s3helper = S3Helper()
    bucket_name = get_saved_files_bucket()

    for file_name in glob.glob(files):
        galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name)
        if galaxy_id >= 0:
            if modulus is None or galaxy_id % modulus == remainder:
                size = os.path.getsize(file_name)
                key = '{0}/{0}.hdf5'.format(galaxy_name)
                LOG.info('File name: %s', file_name)
                LOG.info('File size: %d', size)
                LOG.info('Bucket:    %s', bucket_name)
                LOG.info('Key:       %s', key)

                s3helper.add_file_to_bucket(bucket_name, key, file_name)
                file_count += 1
                os.remove(file_name)
                connection.execute(GALAXY.update().where(
                    GALAXY.c.galaxy_id == galaxy_id).values(
                        status_id=STORED, status_time=datetime.datetime.now()))

        else:
            LOG.error('File name: %s', file_name)
            LOG.error('Could not get the galaxy id')

        if shutdown() is True:
            raise SystemExit

    return file_count
Exemplo n.º 16
0
def get_glacier_data_size(connection, bucket_name):
    """
    Returns the total number of bytes that we have stored in glacier.
    Checks with the database first for a cached copy of this info to not have to keep re-requesting it.
    :param connection: The database connection.
    :param bucket_name: Name of the bucket to count.
    :return:
    """

    # Load most recent entry from database
    # if timestamp on most recent entry is < 24 hours from now, use it
    # if not, do the full check and add a new entry in the db specifying the glacier size.

    day_ago = seconds_since_epoch(get_hours_ago(24))
    result = connection.execute(
        select([HDF5_GLACIER_STORAGE_SIZE
                ]).where(HDF5_GLACIER_STORAGE_SIZE.c.count_time > day_ago))

    latest_time = 0
    latest_size = 0
    for row in result:
        if row['count_time'] > latest_time:
            latest_size = row['size']
            latest_time = row['count_time']

    if latest_time == 0 or latest_size == 0:
        # Need to re-count
        s3helper = S3Helper()
        LOG.info("Glacier data size expired, recounting...")
        size = s3helper.glacier_data_size(bucket_name)
        LOG.info("Glacier data size counted: {0} bytes".format(size))

        connection.execute(HDF5_GLACIER_STORAGE_SIZE.insert(),
                           size=size,
                           count_time=seconds_since_epoch(datetime.now()))
    else:
        size = latest_size

    return size
Exemplo n.º 17
0
    def upload(self, request, pk=None, *args, **kwargs):
        multipart_file = request.data.get("multipart_file")
        store_file = StoreFile(file_obj=multipart_file)
        store_file.store = Store(pk=pk)
        store_file.save()

        file_path = store_file.file_obj.path
        file_size = store_file.file_obj.size

        if file_path and not file_path == "":
            orig_filename = file_path
            filename = orig_filename.split("/")[-1].lower()
            file_ext = filename.split(".")[-1]
            filename_hash = '{}.{}'.format(uuid.uuid4(), file_ext)
            folder = 'dev_public/test'
            upload_request = S3Helper.upload_file(orig_filename, folder,
                                                  filename_hash)

            if upload_request.get("status") == 200:
                # Delete created file object in disk
                store_file.file_obj.delete()

                # Update store_file
                store_file.storage_url = upload_request.get("upload_url")
                store_file.filename = filename_hash
                store_file.file_size = file_size
                store_file.content_type = FILE_TYPES[file_ext]
                store_file.save()

                return Response(
                    {
                        "status": HTTP_200_OK,
                        "store_file": store_file.to_json()
                    },
                    status=HTTP_200_OK)

        return Response({"status": HTTP_400_BAD_REQUEST},
                        status=HTTP_400_BAD_REQUEST)
Exemplo n.º 18
0
args = vars(parser.parse_args())

if args['option'] == 'boinc':
    LOG.info('PYTHONPATH = {0}'.format(sys.path))
    # We're running from the BOINC server
    process_boinc()
else:
    # We're running from a specially created AMI
    filename, full_filename = get_ami_log_file('archive_boinc_stats')
    add_file_handler_to_root(full_filename)
    LOG.info('PYTHONPATH = {0}'.format(sys.path))
    LOG.info('About to perform sanity checks')
    if pass_sanity_checks():
        process_ami()
    else:
        LOG.error('Failed to pass sanity tests')

    # Try copying the log file to S3
    try:
        LOG.info('About to copy the log file')
        s3helper = S3Helper()
        s3helper.add_file_to_bucket(get_archive_bucket(), get_log_archive_key('archive_boinc_stats', filename), full_filename, True)
        os.remove(full_filename)
    except:
        LOG.exception('Failed to copy the log file')

    ec2_helper = EC2Helper()
    ec2_helper.release_public_ip()

LOG.INFO('All done')
Exemplo n.º 19
0
def build_png_image_ami():
    """
    Build the images

    :return:
    """
    # First check the galaxy exists in the database
    engine = create_engine(DB_LOGIN)
    connection = engine.connect()
    try:
        query = select([GALAXY]).distinct().where(and_(AREA.c.galaxy_id == GALAXY.c.galaxy_id, AREA.c.update_time >= GALAXY.c.image_time))

        galaxy_count = 0
        s3helper = S3Helper()
        bucket_name = get_galaxy_image_bucket()

        # Start the shutdown signal poller to check when this instance must close
        start_poll()
        galaxy_list = []

        for galaxy in connection.execute(query):
            galaxy_list.append(galaxy)

        total_galaxies = len(galaxy_list)
        processed_galaxies = 0
        processed_print_point = 50

        for galaxy in galaxy_list:

            if processed_galaxies == processed_print_point:
                LOG.info('{0} out of {1} galaxies processed'.format(processed_galaxies, total_galaxies))
                processed_print_point += 50

            processed_galaxies += 1

            LOG.info('Working on galaxy %s', galaxy[GALAXY.c.name])
            array = numpy.empty((galaxy[GALAXY.c.dimension_y], galaxy[GALAXY.c.dimension_x], len(PNG_IMAGE_NAMES)), dtype=numpy.float)
            array.fill(numpy.NaN)

            # Return the rows
            pixel_count = 0
            pixels_processed = 0
            for row in connection.execute(select([PIXEL_RESULT]).where((PIXEL_RESULT.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]) and PIXEL_RESULT.c.x > -1)):
                row__x = row[PIXEL_RESULT.c.x]
                row__y = row[PIXEL_RESULT.c.y]
                pixel_count += 1
                if row[PIXEL_RESULT.c.workunit_id] is not None:
                    pixels_processed += 1

                    # Defend against bad values
                    if row[PIXEL_RESULT.c.mu] is not None:
                        array[row__y, row__x, 0] = row[PIXEL_RESULT.c.mu]
                    if row[PIXEL_RESULT.c.m] is not None:
                        array[row__y, row__x, 1] = row[PIXEL_RESULT.c.m]
                    if row[PIXEL_RESULT.c.ldust] is not None:
                        array[row__y, row__x, 2] = row[PIXEL_RESULT.c.ldust]
                    if row[PIXEL_RESULT.c.sfr] is not None:
                        # the SFR is a log
                        array[row__y, row__x, 3] = math.pow(10, row[PIXEL_RESULT.c.sfr])

            connection.execute(GALAXY.update()
                               .where(GALAXY.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])
                               .values(image_time=datetime.datetime.now(), pixel_count=pixel_count, pixels_processed=pixels_processed))
            galaxy_count += 1

            # Now write the files
            black_rgb = (0, 0, 0)
            for name in PNG_IMAGE_NAMES:
                value = 0
                height = galaxy[GALAXY.c.dimension_y]
                width = galaxy[GALAXY.c.dimension_x]
                idx = 0
                if name == 'mu':
                    idx = 0
                elif name == 'm':
                    idx = 1
                elif name == 'ldust':
                    idx = 2
                elif name == 'sfr':
                    idx = 3

                values = []
                for x in range(0, width - 1):
                    for y in range(0, height - 1):
                        value = array[y, x, idx]
                        if not math.isnan(value) and value > 0:
                            values.append(value)

                values.sort()
                if len(values) > 1000:
                    top_count = int(len(values) * 0.005)
                    top_value = values[len(values) - top_count]
                elif len(values) > 0:
                    top_value = values[len(values) - 1]
                else:
                    top_value = 1
                if len(values) > 1:
                    median_value = values[int(len(values) / 2)]
                elif len(values) > 0:
                    median_value = values[0]
                else:
                    median_value = 1

                sigma = 1 / median_value
                multiplier = 255.0 / math.asinh(top_value * sigma)

                image = Image.new("RGB", (width, height), black_rgb)
                for x in range(0, width - 1):
                    for y in range(0, height - 1):
                        value = array[y, x, idx]
                        if not math.isnan(value) and value > 0:
                            value = int(math.asinh(value * sigma) * multiplier)
                            if value > 255:
                                value = 255
                            red = FIRE_R[value]
                            green = FIRE_G[value]
                            blue = FIRE_B[value]
                            image.putpixel((x, height - y - 1), (red, green, blue))

                file_name = '{0}/image.png'.format(POGS_TMP)
                image.save(file_name)
                s3helper.add_file_to_bucket(bucket_name,
                                            get_build_png_name(get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]),
                                                               name),
                                            file_name)
            if shutdown() is True:
                LOG.info('Spot Instance Terminate Notice received, build_png_image is shutting down')
                break

    except:
        LOG.exception('An exception occurred.')

    finally:
        connection.close()

    LOG.info('Built images for %d galaxies', galaxy_count)
Exemplo n.º 20
0
def delete_galaxy(connection, galaxy_ids):
    for galaxy_id in galaxy_ids:
        transaction = connection.begin()
        galaxy = connection.execute(
            select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id)).first()
        if galaxy is None:
            LOG.info('Error: Galaxy with galaxy_id of %d was not found',
                     galaxy_id)
        else:
            LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id,
                     galaxy[GALAXY.c.name])
            area_count = connection.execute(
                select([func.count(AREA.c.area_id)]).where(
                    AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])).first()[0]
            counter = 1

            for area_id1 in connection.execute(
                    select([AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[
                        GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)):
                LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format(
                    galaxy_id, area_id1[0], counter, area_count))
                connection.execute(PIXEL_RESULT.delete().where(
                    PIXEL_RESULT.c.area_id == area_id1[0]))

                # Give the rest of the world a chance to access the database
                time.sleep(0.1)
                counter += 1

                if shutdown() is True:
                    transaction.rollback()
                    raise SystemExit

            LOG.info("Deleting FITS headers for galaxy {0}".format(galaxy_id))
            connection.execute(FITS_HEADER.delete().where(
                FITS_HEADER.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]))

            # Now empty the bucket of the sed files
            s3helper = S3Helper()
            bucket = s3helper.get_bucket(get_sed_files_bucket())
            galaxy_file_name = get_galaxy_file_name(galaxy[GALAXY.c.name],
                                                    galaxy[GALAXY.c.run_id],
                                                    galaxy[GALAXY.c.galaxy_id])
            for key in bucket.list(prefix='{0}/'.format(galaxy_file_name)):
                # Ignore the key
                if key.key.endswith('/'):
                    continue

                bucket.delete_key(key)

                if shutdown() is True:
                    transaction.rollback()
                    raise SystemExit

            # Now the folder
            key = Key(bucket)
            key.key = '{0}/'.format(galaxy_file_name)
            bucket.delete_key(key)

        LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id)
        connection.execute(
            GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(
                status_id=DELETED, status_time=datetime.datetime.now()))

        if shutdown() is True:
            transaction.rollback()
            raise SystemExit

        transaction.commit()
Exemplo n.º 21
0
def store_pixels(connection, galaxy_file_name, group, dimension_x, dimension_y,
                 dimension_z, area_total, output_directory,
                 map_parameter_name):
    """
    Store the pixel data
    """
    LOG.info('Storing the pixel data for {0} - {1} areas to process'.format(
        galaxy_file_name, area_total))
    data = numpy.empty(
        (dimension_x, dimension_y, NUMBER_PARAMETERS, NUMBER_IMAGES),
        dtype=numpy.float)
    data.fill(numpy.NaN)
    data_pixel_details = group.create_dataset('pixel_details',
                                              (dimension_x, dimension_y),
                                              dtype=data_type_pixel,
                                              compression='gzip')
    data_pixel_parameters = group.create_dataset(
        'pixel_parameters', (dimension_x, dimension_y, NUMBER_PARAMETERS),
        dtype=data_type_pixel_parameter,
        compression='gzip')
    data_pixel_filter = group.create_dataset(
        'pixel_filters', (dimension_x, dimension_y, dimension_z),
        dtype=data_type_pixel_filter,
        compression='gzip')
    data_pixel_histograms_grid = group.create_dataset(
        'pixel_histograms_grid', (dimension_x, dimension_y, NUMBER_PARAMETERS),
        dtype=data_type_block_details,
        compression='gzip')

    histogram_group = group.create_group('histogram_blocks')
    histogram_list = []
    pixel_count = 0
    area_count = 0
    block_id = 1
    block_index = 0
    histogram_data = histogram_group.create_dataset(
        'block_1', (BLOCK_SIZE, ),
        dtype=data_type_pixel_histogram,
        compression='gzip')

    s3helper = S3Helper()
    bucket = s3helper.get_bucket(get_files_bucket())
    for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)):
        # Ignore the key
        if key.key.endswith('/'):
            continue

        # Now process the file
        start_time = time.time()
        LOG.info('Processing file {0}'.format(key.key))
        temp_file = os.path.join(output_directory, 'temp.sed')
        key.get_contents_to_filename(temp_file)

        if is_gzip(temp_file):
            f = gzip.open(temp_file, "rb")
        else:
            f = open(temp_file, "r")

        area_id = None
        pxresult_id = None
        line_number = 0
        percentiles_next = False
        histogram_next = False
        skynet_next1 = False
        skynet_next2 = False
        map_pixel_results = {}
        list_filters = []
        try:
            for line in f:
                line_number += 1

                if line.startswith(" ####### "):
                    # Clear all the maps and stuff
                    map_pixel_results = {}
                    list_filters = []

                    # Split the line to extract the data
                    values = line.split()
                    pointName = values[1]
                    pxresult_id = pointName[3:].rstrip()
                    (x, y, area_id) = get_pixel_result(connection, pxresult_id)
                    line_number = 0
                    percentiles_next = False
                    histogram_next = False
                    skynet_next1 = False
                    skynet_next2 = False
                    pixel_count += 1
                elif pxresult_id is not None:
                    if line_number == 2:
                        filter_names = line.split()
                        filter_layer = 0
                        for filter_name in filter_names:
                            if filter_name != '#':
                                data_pixel_filter.attrs[
                                    filter_name] = filter_layer
                                filter_layer += 1
                    elif line_number == 3:
                        values = line.split()
                        for value in values:
                            list_filters.append([float(value)])
                    elif line_number == 4:
                        filter_layer = 0
                        values = line.split()
                        for value in values:
                            filter_description = list_filters[filter_layer]
                            filter_description.append(float(value))
                            filter_layer += 1
                    elif line_number == 9:
                        values = line.split()
                        map_pixel_results['i_sfh'] = float(values[0])
                        map_pixel_results['i_ir'] = float(values[1])
                        map_pixel_results['chi2'] = float(values[2])
                        map_pixel_results['redshift'] = float(values[3])
                    elif line_number == 11:
                        values = line.split()
                        data[x, y, INDEX_F_MU_SFH,
                             INDEX_BEST_FIT] = float(values[0])
                        data[x, y, INDEX_F_MU_IR,
                             INDEX_BEST_FIT] = float(values[1])
                        data[x, y, INDEX_MU_PARAMETER,
                             INDEX_BEST_FIT] = float(values[2])
                        data[x, y, INDEX_TAU_V,
                             INDEX_BEST_FIT] = float(values[3])
                        data[x, y, INDEX_SSFR_0_1GYR,
                             INDEX_BEST_FIT] = float(values[4])
                        data[x, y, INDEX_M_STARS,
                             INDEX_BEST_FIT] = float(values[5])
                        data[x, y, INDEX_L_DUST,
                             INDEX_BEST_FIT] = float(values[6])
                        data[x, y, INDEX_T_W_BC,
                             INDEX_BEST_FIT] = float(values[7])
                        data[x, y, INDEX_T_C_ISM,
                             INDEX_BEST_FIT] = float(values[8])
                        data[x, y, INDEX_XI_C_TOT,
                             INDEX_BEST_FIT] = float(values[9])
                        data[x, y, INDEX_XI_PAH_TOT,
                             INDEX_BEST_FIT] = float(values[10])
                        data[x, y, INDEX_XI_MIR_TOT,
                             INDEX_BEST_FIT] = float(values[11])
                        data[x, y, INDEX_XI_W_TOT,
                             INDEX_BEST_FIT] = float(values[12])
                        data[x, y, INDEX_TAU_V_ISM,
                             INDEX_BEST_FIT] = float(values[13])
                        data[x, y, INDEX_M_DUST,
                             INDEX_BEST_FIT] = float(values[14])
                        data[x, y, INDEX_SFR_0_1GYR,
                             INDEX_BEST_FIT] = float(values[15])
                    elif line_number == 13:
                        filter_layer = 0
                        values = line.split()
                        for value in values:
                            filter_description = list_filters[filter_layer]
                            if filter_layer < dimension_z:
                                data_pixel_filter[x, y, filter_layer] = (
                                    filter_description[0],
                                    filter_description[1],
                                    float(value),
                                )
                                filter_layer += 1
                    elif line_number > 13:
                        if line.startswith("# ..."):
                            parts = line.split('...')
                            parameter_name = parts[1].strip()
                            parameter_name_id = map_parameter_name[
                                parameter_name]
                            percentiles_next = False
                            histogram_next = True
                            skynet_next1 = False
                            skynet_next2 = False
                            histogram_list = []
                        elif line.startswith(
                                "#....percentiles of the PDF......"):
                            percentiles_next = True
                            histogram_next = False
                            skynet_next1 = False
                            skynet_next2 = False

                            # Write out the histogram into a block for compression improvement
                            data_pixel_histograms_grid[x, y,
                                                       parameter_name_id -
                                                       1] = (
                                                           block_id,
                                                           block_index,
                                                           len(histogram_list))
                            for pixel_histogram_item in histogram_list:
                                # Do we need a new block
                                if block_index >= BLOCK_SIZE:
                                    block_id += 1
                                    block_index = 0
                                    histogram_data = histogram_group.create_dataset(
                                        'block_{0}'.format(block_id),
                                        (BLOCK_SIZE, ),
                                        dtype=data_type_pixel_histogram,
                                        compression='gzip')

                                histogram_data[block_index] = (
                                    pixel_histogram_item[0],
                                    pixel_histogram_item[1],
                                )
                                block_index += 1
                        elif line.startswith(" #...theSkyNet"):
                            percentiles_next = False
                            histogram_next = False
                            skynet_next1 = True
                            skynet_next2 = False
                        elif line.startswith("# theSkyNet2"):
                            percentiles_next = False
                            histogram_next = False
                            skynet_next1 = False
                            skynet_next2 = True
                        elif percentiles_next:
                            values = line.split()
                            z = parameter_name_id - 1
                            data[x, y, z,
                                 INDEX_PERCENTILE_2_5] = float(values[0])
                            data[x, y, z,
                                 INDEX_PERCENTILE_16] = float(values[1])
                            data[x, y, z,
                                 INDEX_PERCENTILE_50] = float(values[2])
                            data[x, y, z,
                                 INDEX_PERCENTILE_84] = float(values[3])
                            data[x, y, z,
                                 INDEX_PERCENTILE_97_5] = float(values[4])
                            percentiles_next = False
                        elif histogram_next:
                            values = line.split()
                            hist_value = float(values[1])
                            if hist_value > MIN_HIST_VALUE and not math.isnan(
                                    hist_value):
                                histogram_list.append(
                                    (float(values[0]), hist_value))
                        elif skynet_next1:
                            values = line.split()
                            data_pixel_details[x, y] = (
                                pxresult_id,
                                area_id,
                                map_pixel_results['i_sfh'],
                                map_pixel_results['i_ir'],
                                map_pixel_results['chi2'],
                                map_pixel_results['redshift'],
                                float(values[0]),
                                float(values[2]),
                                float(values[3]),
                                float(values[4]),
                            )
                            skynet_next1 = False
                        elif skynet_next2:
                            # We have the highest bin probability values which require the parameter_id
                            values = line.split()
                            high_prob_bin = float(values[0]) if float(
                                values[0]) is not None else numpy.NaN
                            first_prob_bin = float(values[1]) if float(
                                values[1]) is not None else numpy.NaN
                            last_prob_bin = float(values[2]) if float(
                                values[2]) is not None else numpy.NaN
                            bin_step = float(values[3]) if float(
                                values[3]) is not None else numpy.NaN
                            z = parameter_name_id - 1
                            data[x, y, z,
                                 INDEX_HIGHEST_PROB_BIN] = high_prob_bin
                            data_pixel_parameters[x, y, z] = (
                                first_prob_bin,
                                last_prob_bin,
                                bin_step,
                            )
                            skynet_next2 = False

        except IOError:
            LOG.error('IOError after {0} lines'.format(line_number))
        finally:
            f.close()

        area_count += 1
        LOG.info('{0:0.3f} seconds for file {1}. {2} of {3} areas.'.format(
            time.time() - start_time, key.key, area_count, area_total))

    pixel_dataset = group.create_dataset('pixels',
                                         data=data,
                                         compression='gzip')
    pixel_dataset.attrs['DIM3_F_MU_SFH'] = INDEX_F_MU_SFH
    pixel_dataset.attrs['DIM3_F_MU_IR'] = INDEX_F_MU_IR
    pixel_dataset.attrs['DIM3_MU_PARAMETER'] = INDEX_MU_PARAMETER
    pixel_dataset.attrs['DIM3_TAU_V'] = INDEX_TAU_V
    pixel_dataset.attrs['DIM3_SSFR_0_1GYR'] = INDEX_SSFR_0_1GYR
    pixel_dataset.attrs['DIM3_M_STARS'] = INDEX_M_STARS
    pixel_dataset.attrs['DIM3_L_DUST'] = INDEX_L_DUST
    pixel_dataset.attrs['DIM3_T_C_ISM'] = INDEX_T_C_ISM
    pixel_dataset.attrs['DIM3_T_W_BC'] = INDEX_T_W_BC
    pixel_dataset.attrs['DIM3_XI_C_TOT'] = INDEX_XI_C_TOT
    pixel_dataset.attrs['DIM3_XI_PAH_TOT'] = INDEX_XI_PAH_TOT
    pixel_dataset.attrs['DIM3_XI_MIR_TOT'] = INDEX_XI_MIR_TOT
    pixel_dataset.attrs['DIM3_XI_W_TOT'] = INDEX_XI_W_TOT
    pixel_dataset.attrs['DIM3_TAU_V_ISM'] = INDEX_TAU_V_ISM
    pixel_dataset.attrs['DIM3_M_DUST'] = INDEX_M_DUST
    pixel_dataset.attrs['DIM3_SFR_0_1GYR'] = INDEX_SFR_0_1GYR

    pixel_dataset.attrs['DIM4_BEST_FIT'] = INDEX_BEST_FIT
    pixel_dataset.attrs['DIM4_PERCENTILE_50'] = INDEX_PERCENTILE_50
    pixel_dataset.attrs['DIM4_HIGHEST_PROB_BIN'] = INDEX_HIGHEST_PROB_BIN
    pixel_dataset.attrs['DIM4_PERCENTILE_2_5'] = INDEX_PERCENTILE_2_5
    pixel_dataset.attrs['DIM4_PERCENTILE_16'] = INDEX_PERCENTILE_16
    pixel_dataset.attrs['DIM4_PERCENTILE_84'] = INDEX_PERCENTILE_84
    pixel_dataset.attrs['DIM4_PERCENTILE_97_5'] = INDEX_PERCENTILE_97_5

    LOG.info('Created {0} blocks'.format(block_id))

    return pixel_count
Exemplo n.º 22
0
    def assimilate_handler(self, wu, results, canonical_result):
        """
        Process the Results.
        """
        self.logDebug("Start of assimilate_handler for wu %d\n", wu.id)
        connection = None
        transaction = None
        try:
            if wu.canonical_result:
                out_file = self.get_file_path(canonical_result)
                self.area = None
                if out_file:
                    if os.path.isfile(out_file):
                        pass
                    else:
                        self.logDebug("File [%s] not found\n", out_file)
                        out_file = None

                if out_file:
                    self.logDebug("Reading File [%s]\n", out_file)
                    start = time.time()
                    connection = ENGINE.connect()
                    transaction = connection.begin()
                    resultCount = self._process_result(connection, out_file, wu)
                    if self.noinsert:
                        transaction.rollback()
                    else:
                        if not resultCount:
                            self.logCritical("No results were found in the output file\n")

                        if self._area_id is None:
                            self.logDebug("The Area was not found\n")
                        else:
                            connection.execute(AREA.update()
                                               .where(AREA.c.area_id == self._area_id)
                                               .values(workunit_id=wu.id, update_time=datetime.datetime.now()))

                            user_id_set = set()
                            for result in results:
                                if result.user and result.validate_state == boinc_db.VALIDATE_STATE_VALID:
                                    user_id = result.user.id
                                    if user_id not in user_id_set:
                                        user_id_set.add(user_id)

                            connection.execute(AREA_USER.delete().where(AREA_USER.c.area_id == self._area_id))
                            insert = AREA_USER.insert()
                            for user_id in user_id_set:
                                connection.execute(insert, area_id=self._area_id, userid=user_id)

                            # Copy the file to S3
                            s3helper = S3Helper()
                            s3helper.add_file_to_bucket(get_files_bucket(),
                                                        get_key_sed(self._galaxy_name, self._run_id, self._galaxy_id, self._area_id),
                                                        out_file,
                                                        reduced_redundancy=True)

                        time_taken = '{0:.2f}'.format(time.time() - start)
                        self.logDebug("Saving %d results for workunit %d in %s seconds\n", resultCount, wu.id, time_taken)
                        transaction.commit()
                    connection.close()
                else:
                    self.logCritical("The output file was not found\n")
            else:
                self.logDebug("No canonical_result for workunit\n")
                self.report_errors(wu)
        except:
            if transaction is not None:
                transaction.rollback()
            if connection is not None:
                connection.close()
            print "Unexpected error:", sys.exc_info()[0]
            traceback.print_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2])
            self.logCritical("Unexpected error occurred, retrying...\n")
            return -1

        return 0
Exemplo n.º 23
0
def get_data(output_directory):
    """
    Get the stats from the S3 archive and build the csv files
    :param output_directory: where to store the files
    :return:
    """
    done_dates = get_done_dates()

    # Now get ready to load the files
    keys_being_restored = []
    s3helper = S3Helper()
    bucket = s3helper.get_bucket(get_archive_bucket())
    set_filenames = set()
    for prefix in bucket.list(prefix='stats/', delimiter='/'):
        elements = prefix.name.split('/')
        elements = elements[1].split('_')
        date_file = date(int(elements[1]), int(elements[2]), int(elements[3]))
        if date_file not in done_dates:
            stats_file = '{0}_{1}_{2}_user.gz'.format(elements[1], elements[2],
                                                      elements[3])
            full_filename = os.path.join(output_directory, stats_file)
            if full_filename in set_filenames:
                # Ignore
                pass
            elif not os.path.exists(full_filename) or os.path.getsize(
                    full_filename) == 9:
                set_filenames.add(full_filename)
                key = bucket.get_key(os.path.join(prefix.name, 'user.gz'))
                if key is not None:
                    if key.ongoing_restore or key.storage_class == 'GLACIER':
                        LOG.info('Restoring {0}'.format(key.name))
                        # We need retrieve it
                        if not key.ongoing_restore:
                            key.restore(days=5)
                        keys_being_restored.append([key.name, full_filename])

                        # Put an empty file in the directory
                        if not os.path.exists(full_filename):
                            output_file = open(full_filename, "wb")
                            output_file.write('Restoring')
                            output_file.close()
                    else:
                        # Put the file in the storage area
                        LOG.info('Fetching {0}'.format(key.name))
                        key.get_contents_to_filename(full_filename)

    # Now we have to wait for all the files we need to be restored
    for key_pair in keys_being_restored:
        key = bucket.get_key(key_pair[0])
        if key.ongoing_restore:
            time.sleep(300)
        else:
            # The file has been restored so copy it
            LOG.info('Fetching {0}'.format(key_pair[0]))
            key.get_contents_to_filename(key_pair[1])

    # Build the prepared statements
    insert_usage = USAGE.insert()
    insert_individual = INDIVIDUAL.insert()

    # Now build up the list of filenames
    for file_name in glob.glob(os.path.join(output_directory, '*_user.gz')):
        (head, tail) = os.path.split(file_name)
        elements = tail.split('_')
        date_file = date(int(elements[0]), int(elements[1]), int(elements[2]))

        if date_file not in done_dates:
            # Read the contents
            LOG.info('Processing {0}'.format(file_name))
            gzip_file = gzip.open(file_name, 'rb')
            contents = gzip_file.read()
            gzip_file.close()

            # Extract the XML data
            root = ET.fromstring(contents)

            # Initialise
            gflops = 0.0
            active_users = 0
            registered_users = 0
            transaction = connection.begin()

            # The users are in a random order
            for user in root:
                user_id = user.find('id').text
                user_id = int(user_id)

                expavg_credit = user.find('expavg_credit').text
                expavg_credit = float(expavg_credit)

                connection.execute(insert_individual,
                                   date=date_file,
                                   user_id=user_id,
                                   expavg_credit=expavg_credit)

                registered_users += 1

                if expavg_credit > 1:
                    active_users += 1

                gflops += expavg_credit

            connection.execute(insert_usage,
                               date=date_file,
                               gflops=gflops / COBBLESTONE_FACTOR,
                               active_users=active_users,
                               registered_users=registered_users)
            transaction.commit()
Exemplo n.º 24
0
    def process_file(self, registration):
        """
        Process a registration.

        :param registration:
        """
        self._filename = registration[REGISTER.c.filename]
        self._galaxy_name = registration[REGISTER.c.galaxy_name]
        self._galaxy_type = registration[REGISTER.c.galaxy_type]
        self._priority = registration[REGISTER.c.priority]
        self._redshift = registration[REGISTER.c.redshift]
        self._run_id = registration[REGISTER.c.run_id]
        self._sigma = registration[REGISTER.c.sigma]
        self._sigma_filename = registration[REGISTER.c.sigma_filename]

        # Have we files that we can use for this?
        self._rounded_redshift = self._get_rounded_redshift()
        if self._rounded_redshift is None:
            LOG.error('No models matching the redshift of %.4f', self._redshift)
            return 0

        self._hdu_list = pyfits.open(self._filename, memmap=True)
        self._layer_count = len(self._hdu_list)

        # Do we need to open and sort the S/N Ratio file
        if self._sigma_filename is not None:
            self._sigma = 0.0
            self._signal_noise_hdu = pyfits.open(self._sigma_filename, memmap=True)
            if self._layer_count != len(self._signal_noise_hdu):
                LOG.error('The layer counts do not match %d vs %d', self._layer_count, len(self._signal_noise_hdu))
                return 0, 0
        else:
            self._sigma = float(self._sigma)

        self._end_y = self._hdu_list[0].data.shape[0]
        self._end_x = self._hdu_list[0].data.shape[1]

        LOG.info("Image dimensions: %(x)d x %(y)d x %(z)d => %(pix).2f Mpixels" % {'x': self._end_x, 'y': self._end_y, 'z': self._layer_count, 'pix': self._end_x * self._end_y / 1000000.0})

        # Get the flops estimate amd cobblestone factor
        run = self._connection.execute(select([RUN]).where(RUN.c.run_id == self._run_id)).first()
        self._fpops_est_per_pixel = run[RUN.c.fpops_est]
        self._cobblestone_scaling_factor = run[RUN.c.cobblestone_factor]

        # Create and save the object
        datetime_now = datetime.now()
        result = self._connection.execute(GALAXY.insert().values(name=self._galaxy_name,
                                                                 dimension_x=self._end_x,
                                                                 dimension_y=self._end_y,
                                                                 dimension_z=self._layer_count,
                                                                 redshift=self._redshift,
                                                                 sigma=self._sigma,
                                                                 create_time=datetime_now,
                                                                 image_time=datetime_now,
                                                                 galaxy_type=self._galaxy_type,
                                                                 ra_cent=0,
                                                                 dec_cent=0,
                                                                 pixel_count=0,
                                                                 pixels_processed=0,
                                                                 run_id=self._run_id))
        self._galaxy_id = result.inserted_primary_key[0]
        LOG.info("Writing %s to database", self._galaxy_name)

        # Store the fits header
        self._store_fits_header()

        # Get the filters we're using for this run and sort the layers
        self._get_filters_sort_layers()

        # Build the template file we need if necessary
        self._build_template_file()

        # Copy the filter and model files we need
        self._copy_important_files()

        # Now break up the galaxy into chunks
        self._break_up_galaxy()
        self._connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == self._galaxy_id).values(pixel_count=self._pixel_count))

        LOG.info('Building the images')
        galaxy_file_name = get_galaxy_file_name(self._galaxy_name, self._run_id, self._galaxy_id)
        s3helper = S3Helper()
        image = FitsImage(self._connection)
        image.build_image(self._filename, galaxy_file_name, self._galaxy_id, get_galaxy_image_bucket())

        # Copy the fits file to S3 - renamed to make it unique
        bucket_name = get_files_bucket()
        s3helper.add_file_to_bucket(bucket_name, get_key_fits(self._galaxy_name, self._run_id, self._galaxy_id), self._filename)
        if self._sigma_filename is not None:
            s3helper.add_file_to_bucket(bucket_name, get_key_sigma_fits(self._galaxy_name, self._run_id, self._galaxy_id), self._sigma_filename)

        return self._work_units_added, self._pixel_count
Exemplo n.º 25
0
def sync(data_type='all', last_n_days=None):
    project_info = get_project_info()
    project_name = project_info['name']
    s3_helper = S3Helper()
    s3_helper.sync(project_name, data_type=data_type, last_n_days=last_n_days)
Exemplo n.º 26
0
def generate_files(connection, hdf5_request_galaxy_ids, email, features,
                   layers, pixel_types):
    """
    Get the FITS files for this request

    :type connection: The database connection
    :param pixel_types:
    :param hdf5_request_galaxy_ids: the galaxy id
    :param email:
    :param features:
    :param layers:
    :return:
    """
    uuid_string = str(uuid.uuid4())
    results = []
    available_galaxies = []
    s3_helper = S3Helper()
    bucket_name = get_saved_files_bucket()

    # Check whether all the requested galaxies are available or not.
    for hdf5_request_galaxy in hdf5_request_galaxy_ids:
        galaxy = connection.execute(
            select([GALAXY]).where(
                GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first()
        hdf5_request_galaxy = connection.execute(
            select([
                HDF5_REQUEST_GALAXY
            ]).where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                     hdf5_request_galaxy.hdf5_request_galaxy_id)).first()
        state = hdf5_request_galaxy.state

        if state is not 0:
            LOG.info('Skipping {0}, state is {1}'.format(
                galaxy[GALAXY.c.name], state))
            continue  # Skip

        key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id],
                           galaxy[GALAXY.c.galaxy_id])

        if s3_helper.file_exists(bucket_name, key):
            if s3_helper.file_archived(bucket_name, key):
                # file is archived
                if s3_helper.file_restoring(bucket_name, key):
                    # if file is restoring, just need to wait for it
                    LOG.info(
                        'Galaxy {0} is still restoring from glacier'.format(
                            galaxy[GALAXY.c.name]))
                else:
                    # if file is not restoring, need to request.
                    file_size = s3_helper.file_size(bucket_name, key)

                    if restore_file_size_check(connection, bucket_name,
                                               file_size):
                        # We're good to restore
                        LOG.info(
                            'Making request for archived galaxy {0}'.format(
                                galaxy[GALAXY.c.name]))
                        s3_helper.restore_archived_file(bucket_name, key)

                        connection.execute(
                            HDF5_REQUEST_GALAXY_SIZE.insert(),
                            hdf5_request_galaxy_id=hdf5_request_galaxy[
                                'hdf5_request_galaxy_id'],
                            size=file_size,
                            request_time=seconds_since_epoch(datetime.now()))
                    else:
                        # Don't restore or we risk spending a lot of money
                        LOG.info(
                            'Daily galaxy restore size hit. Cannot request archived galaxy.'
                        )
            else:
                # file is not archived
                LOG.info('Galaxy {0} is available in s3'.format(
                    galaxy[GALAXY.c.name]))
                available_galaxies.append(hdf5_request_galaxy)
        else:
            LOG.error('Galaxy {0} does not exist on s3 or glacier!'.format(
                galaxy[GALAXY.c.name]))

    total_request_galaxies = len(hdf5_request_galaxy_ids)
    LOG.info(
        'Need to have {0} galaxies available ({1} currently available)'.format(
            total_request_galaxies * GALAXY_EMAIL_THRESHOLD,
            len(available_galaxies)))
    if len(
            available_galaxies
    ) >= total_request_galaxies * GALAXY_EMAIL_THRESHOLD:  # Only proceed if more than the threshold of galaxies are available
        LOG.info('{0}/{1} (> {2}%) galaxies are available. Email will be sent'.
                 format(len(available_galaxies), total_request_galaxies,
                        GALAXY_EMAIL_THRESHOLD * 100))
        remaining_galaxies = total_request_galaxies - len(available_galaxies)

        for hdf5_request_galaxy in available_galaxies:
            result = HDF5ToFitsResult()
            results.append(result)
            connection.execute(HDF5_REQUEST_GALAXY.update().where(
                HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                hdf5_request_galaxy.hdf5_request_galaxy_id).values(state=1))
            # noinspection PyBroadException
            try:
                galaxy = connection.execute(
                    select([GALAXY
                            ]).where(GALAXY.c.galaxy_id ==
                                     hdf5_request_galaxy.galaxy_id)).first()
                result.galaxy_name = galaxy[GALAXY.c.name]
                LOG.info('Processing {0} ({1}) for {2}'.format(
                    galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], email))

                # make sure the galaxy is available
                if galaxy[GALAXY.c.status_id] == STORED or galaxy[
                        GALAXY.c.status_id] == DELETED:
                    output_dir = tempfile.mkdtemp()
                    try:
                        s3_helper = S3Helper()
                        LOG.info('Getting HDF5 file to {0}'.format(output_dir))
                        tmp_file = get_hdf5_file(s3_helper, output_dir,
                                                 galaxy[GALAXY.c.name],
                                                 galaxy[GALAXY.c.run_id],
                                                 galaxy[GALAXY.c.galaxy_id])
                        LOG.info('File stored in {0}'.format(tmp_file))

                        # We have the file
                        if os.path.isfile(tmp_file):
                            int_flux_output = os.path.join(
                                output_dir, 'intflux')
                            rad_output = os.path.join(output_dir, 'rad')

                            if not os.path.exists(int_flux_output):
                                os.mkdir(int_flux_output)

                            if not os.path.exists(rad_output):
                                os.mkdir(rad_output)

                            file_names = process_hdf5_file(
                                tmp_file,
                                galaxy[GALAXY.c.name],
                                galaxy[GALAXY.c.galaxy_id],
                                pixel_types,
                                features,
                                result,
                                layers,
                                output_dir,
                                rad_output,
                                int_flux_output,
                            )

                            url = zip_files(
                                s3_helper,
                                get_galaxy_file_name(
                                    galaxy[GALAXY.c.name],
                                    galaxy[GALAXY.c.run_id],
                                    galaxy[GALAXY.c.galaxy_id]), uuid_string,
                                file_names, output_dir)

                            connection.execute(HDF5_REQUEST_GALAXY.update(
                            ).where(
                                HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                                hdf5_request_galaxy.hdf5_request_galaxy_id
                            ).values(state=2,
                                     link=url,
                                     link_expires_at=datetime.now() +
                                     timedelta(days=10)))

                            result.error = None
                            result.link = url

                    except S3ResponseError as e:  # Handling for a strange s3 error
                        LOG.error(
                            'Error retrieving galaxy {0} from s3. Retrying next run'
                            .format(galaxy[GALAXY.c.name]))
                        LOG.error('{0}'.format(str(e)))
                        key = get_key_hdf5(galaxy[GALAXY.c.name],
                                           galaxy[GALAXY.c.run_id],
                                           galaxy[GALAXY.c.galaxy_id])
                        LOG.info('Key: {0}'.format(key))
                        LOG.info('Exists: {0}'.format(
                            s3_helper.file_exists(bucket_name, key)))
                        result.error = traceback.format_exc()
                        remaining_galaxies += 1
                    finally:
                        # Delete the temp files now we're done
                        shutil.rmtree(output_dir)

                else:
                    connection.execute(HDF5_REQUEST_GALAXY.update().where(
                        HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                        hdf5_request_galaxy.hdf5_request_galaxy_id).values(
                            state=3))
                    result.error = 'Cannot process {0} ({1}) as the HDF5 file has not been generated'.format(
                        galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id])
                    LOG.info(result.error)
            except:
                LOG.error('Major error')
                result.error = traceback.format_exc()
                connection.execute(HDF5_REQUEST_GALAXY.update().where(
                    HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                    hdf5_request_galaxy.hdf5_request_galaxy_id).values(
                        state=3))

        send_email(email, results, features, layers, pixel_types,
                   remaining_galaxies)