def process_galaxy(connection, galaxies, verbosity): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_MEAN_STANDARD_DEVIATION, True) if verbosity: LOG.info( 'galaxy_id: {0}, galaxy_name: {1}'.format( galaxy_id, galaxy[GALAXY.c.name] ) ) tables_required = get_tables_required(steps_done) if len(tables_required) > 0: for table in tables_required: with connection.begin(): create_mean_standard_deviation = CreateMeanStandardDeviation(connection, table, galaxy_id) create_mean_standard_deviation.calculate_details() create_mean_standard_deviation.update_details() add_step_done_id( connection, galaxy_id, STEP_DONE_ID_MEAN_STANDARD_DEVIATION, table.name )
def process_galaxy(connection, galaxies, verbosity): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_BUILD_MASK) if verbosity: LOG.info( 'galaxy_id: {0}, galaxy_name: {1}'.format( galaxy_id, galaxy[GALAXY.c.name] ) ) if STEP_DONE_ID_BUILD_MASK not in steps_done: with connection.begin(): build_mask = ProcessGalaxyMasks(connection, galaxy_id) build_mask.run() add_step_done_id( connection, galaxy_id, STEP_DONE_ID_BUILD_MASK )
def process_galaxy(connection_mysql, connection_mongodb, database_name, galaxies, radius): """ Process the galaxies """ # Get the mongodb database mongo_db = Database(connection_mongodb, database_name) collection_galaxies = Collection(mongo_db, COLLECTION_GALAXIES) collection_galaxy_data_sed = Collection(mongo_db, COLLECTION_GALAXY_DATA_SED) collection_galaxy_data_original = Collection(mongo_db, COLLECTION_GALAXY_DATA_ORIGINAL) # Build the tables build_dynamic_tables(connection_mysql) for galaxy in galaxies: time_start = time.time() galaxy_id = galaxy[GALAXY.c.galaxy_id] # Make sure we have an IMAGE_DETAIL as we use later for calculations image_detail = connection_mysql.execute( select([IMAGE_DETAIL]).where(IMAGE_DETAIL.c.galaxy_id == galaxy_id) ).first() if image_detail is not None: data01 = get_galaxy_data( connection_mysql, galaxy_id, DATA_TABLES_SED, radius ) data02 = get_galaxy_data( connection_mysql, galaxy_id, DATA_TABLES_ORIGINAL, radius ) if len(data01) > 0 and len(data02) > 0: galaxy_details = get_galaxy_details(connection_mysql, galaxy_id) galaxy_data = { 'galaxy_id': galaxy_id, 'run_id': galaxy[GALAXY.c.run_id], 'name': galaxy[GALAXY.c.name], 'dimension_x': galaxy[GALAXY.c.dimension_x], 'dimension_y': galaxy[GALAXY.c.dimension_y], 'dimension_z': galaxy[GALAXY.c.dimension_z], 'redshift': float(galaxy[GALAXY.c.redshift]), 'galaxy_type': galaxy[GALAXY.c.galaxy_type], 'galaxy_detail': galaxy_details, 'mask_radius': radius } # Put it into Mongo collection_galaxies.insert_one(galaxy_data) collection_galaxy_data_sed.insert_many(data01) collection_galaxy_data_original.insert_many(data02) time_end = time.time() LOG.info('galaxy_id: {0}, time: {1:.3f}s'.format(galaxy_id, time_end - time_start))
def process_galaxy(connection, galaxies, verbosity): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids( connection, galaxy_id, [STEP_DONE_ID_BUILD_MASK, STEP_DONE_ID_CENTROID_CENTRE_OF_MASS]) # have we built the mask if STEP_DONE_ID_BUILD_MASK in steps_done: if STEP_DONE_ID_CENTROID_CENTRE_OF_MASS not in steps_done: with connection.begin(): centroid_center_of_mass = CentroidCentreOfMass(connection, galaxy, verbosity) centroid_center_of_mass.calculate_results() if len(centroid_center_of_mass.layers) > 0: connection.execute( IMAGE_DETAIL.insert(), galaxy_id=galaxy_id, x_image=centroid_center_of_mass.dimension_x / 2.0, y_image=centroid_center_of_mass.dimension_y / 2.0, x_centroid=numpy.asscalar(centroid_center_of_mass.centroid[0]), y_centroid=numpy.asscalar(centroid_center_of_mass.centroid[1]), x_centre_of_mass=numpy.asscalar(centroid_center_of_mass.centre_of_mass[0]), y_centre_of_mass=numpy.asscalar(centroid_center_of_mass.centre_of_mass[1]) ) else: connection.execute( IMAGE_DETAIL.insert(), galaxy_id=galaxy_id, x_image=centroid_center_of_mass.dimension_x / 2.0, y_image=centroid_center_of_mass.dimension_y / 2.0, x_centroid=-1.0, y_centroid=-1.0, x_centre_of_mass=-1.0, y_centre_of_mass=-1.0 ) add_step_done_id(connection, galaxy_id, STEP_DONE_ID_CENTROID_CENTRE_OF_MASS) elif verbosity >= 1: LOG.info('Nothing to do for the galaxy {0}'.format(galaxy_id)) elif verbosity >= 1: LOG.info('The mask has not been built for the galaxy {0}'.format(galaxy_id))
def process_galaxy(connection, galaxies, verbosity): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: time_start = time.time() galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_BUILD_GALAXY_DETAILS, True) if verbosity: LOG.info( 'galaxy_id: {0}, galaxy_name: {1}'.format( galaxy_id, galaxy[GALAXY.c.name] ) ) tables_required = get_tables_required(steps_done) if len(tables_required) > 0: for table in tables_required: with connection.begin(): time_start_update = time.time() create_galaxy_detail = CreateGalaxyDetail(connection, table, galaxy_id) create_galaxy_detail.calculate_details() create_galaxy_detail.insert_details() add_step_done_id( connection, galaxy_id, STEP_DONE_ID_BUILD_GALAXY_DETAILS, table.name ) time_end_update = time.time() LOG.info( 'galaxy_id: {0}, table: {1}, time: {2:.3f}s'.format( galaxy_id, table.name, time_end_update - time_start_update ) ) time_end = time.time() LOG.info( 'galaxy_id: {0}, time: {1:.3f}s'.format( galaxy_id, time_end - time_start ) )
def run(self): time_start = time.time() engine = create_engine(DB_LOGIN) self._connection = engine.connect() # noinspection PyBroadException try: build_dynamic_tables(self._connection) for galaxy_id in self._galaxy_ids: time_get_data = time.time() self._get_data(galaxy_id) time_build_csv_lines = time.time() self._build_csv_lines() time_end = time.time() LOG.info( "galaxy_id: {0}, get_data: {1:.3f}s, build_csv_lines: {2:.3f}s".format( galaxy_id, time_build_csv_lines - time_get_data, time_end - time_build_csv_lines ) ) except Exception: LOG.exception("Getting data") self._still_valid = False finally: self._connection.close() time_shuffle = time.time() LOG.info("get_all_data: {0:.3f}s".format(time_shuffle - time_start)) if self._still_valid: # Now shuffle the elements shuffle(self._csv_line) time_write_to_file = time.time() LOG.info("shuffle: {0:.3f}s".format(time_write_to_file - time_shuffle)) self._write_to_file() LOG.info("write_to_file: {0:.3f}s".format(time.time() - time_write_to_file)) time_end = time.time() LOG.info("all: {0:.3f}s".format(time_end - time_start))
def process_galaxy(connection, galaxies): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids( connection, galaxy_id, [STEP_DONE_ID_BUILD_GALAXY_DETAILS, STEP_DONE_ID_NORMALISE_Z_MIN_MAX], True ) tables_required = get_tables_required(steps_done) if len(tables_required) > 0: time_start = time.time() for table in tables_required: with connection.begin(): time_start_update = time.time() mean_standard = connection.execute( select([GALAXY_DETAIL]).where( and_( GALAXY_DETAIL.c.galaxy_id == galaxy_id, GALAXY_DETAIL.c.table_name == table.name ) ) ).first() insert_data = [] if mean_standard is not None: mean = mean_standard[GALAXY_DETAIL.c.mean] standard_deviation = mean_standard[GALAXY_DETAIL.c.standard_deviation] min_value = mean_standard[GALAXY_DETAIL.c.min_value] max_value = mean_standard[GALAXY_DETAIL.c.max_value] max_minus_min = max_value - min_value if max_minus_min is not None and max_minus_min != 0.0 \ and standard_deviation is not None and standard_deviation != 0.0: if table.name.startswith('original_value__'): select_statement = select( [table] ).where( and_( table.c.galaxy_id == galaxy_id, table.c.galaxy_id == MASK_POINT.c.galaxy_id, table.c.x == MASK_POINT.c.x, table.c.y == MASK_POINT.c.y, table.c.value > 0.0 ) ) else: select_statement = select( [table] ).where( and_( table.c.galaxy_id == galaxy_id, table.c.galaxy_id == MASK_POINT.c.galaxy_id, table.c.x == MASK_POINT.c.x, table.c.y == MASK_POINT.c.y ) ) for table_data in connection.execute(select_statement): value = table_data[table.c.value] z_score = (value - mean) / standard_deviation insert_data.append( { 'original_id': table_data[table.c.id], 'value_zscore': z_score, 'value_min_max': (value - min_value) / max_minus_min, 'value_ln': math.log(value - min_value + 1), 'value_softmax': 1.0 / (1.0 + math.exp(-z_score)) } ) if len(insert_data) > 0: table_extended = DATA_TABLES_EXTENDED[table.name + '__extended'] connection.execute(table_extended.insert(), insert_data) add_step_done_id( connection, galaxy_id, STEP_DONE_ID_NORMALISE_Z_MIN_MAX, table.name ) time_end_update = time.time() LOG.info( 'galaxy_id: {0}, table: {1}__extended, time: {2:.3f}s, inserts: {3}'.format( galaxy_id, table.name, time_end_update - time_start_update, len(insert_data) ) ) time_end = time.time() LOG.info('galaxy_id: {0}, time: {1:.3f}s'.format(galaxy_id, time_end - time_start))
def process_galaxy(connection, galaxies, verbosity, profile_name='aws-pogs'): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids( connection, galaxy_id, [STEP_DONE_ID_NO_HDF5_FILE, STEP_DONE_ID_ORIGINAL_VALUES, STEP_DONE_ID_SED_DATA], True, True ) # Work out what SED data is required data_required_sed = get_data_required_sed(connection, steps_done) data_required_original = get_data_required_original(steps_done) if len(data_required_sed) > 0 or len(data_required_original) > 0: # Copy the file from S3 s3_helper = S3Helper(profile_name=profile_name) galaxy_id = int(galaxy[GALAXY.c.galaxy_id]) galaxy_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy_id) s3_name = os.path.join('{0:04d}000'.format(galaxy_id / 1000), galaxy_name) + '.hdf5' hdf5_file_name = os.path.join(FAST_DISK, galaxy_name) + '.hdf5' copy_ok = s3_helper.copy_file_to_disk(BUCKET_NAME, s3_name, hdf5_file_name) if copy_ok: h5_file = h5py.File(hdf5_file_name, 'r') LOG.info('Processing SED for name: {0}, run_id: {1}, galaxy_id: {2}'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy_id)) # Do we have anything to do? if len(data_required_sed) > 0: # noinspection PyBroadException try: # Store the SED fit values add_sed_data(connection, galaxy_id, h5_file, data_required_sed) except Exception: LOG.exception('An exception occurred in process_galaxy processing the SED values') else: if verbosity >= 1: LOG.info('Nothing to add - SED') if len(data_required_original) > 0: # noinspection PyBroadException try: add_original_data(connection, galaxy_id, h5_file, data_required_original) except Exception: LOG.exception('An exception occurred in process_galaxy processing the original values') else: if verbosity >= 1: LOG.info('Nothing to add - Original Data') # Clean up after ourselves h5_file.close() os.remove(hdf5_file_name) else: LOG.error('The file for name: {0}, run_id: {1}, galaxy_id: {2} does not exist'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy_id))