def parse_mag(self, data, **kwargs): """Return a `Photometry` object from a `PhotometryMag` marshmallow schema. Parameters ---------- data : dict The instance of the PhotometryMag schema to convert to Photometry. Returns ------- Photometry The Photometry object generated from the PhotometryMag dict. """ from skyportal.models import Instrument, Obj, PHOT_SYS, PHOT_ZP, Photometry from sncosmo.photdata import PhotometricData # check that mag and magerr are both null or both not null, not a mix ok = any([ all([op(field, None) for field in [data['mag'], data['magerr']]]) for op in [operator.is_, operator.is_not] ]) if not ok: raise ValidationError(f'Error parsing packet "{data}": mag ' f'and magerr must both be null, or both be ' f'not null.') # get the instrument instrument = Instrument.query.get(data['instrument_id']) if not instrument: raise ValidationError( f'Invalid instrument ID: {data["instrument_id"]}') # get the object obj = Obj.query.get( data['obj_id']) # TODO: implement permissions checking if not obj: raise ValidationError(f'Invalid object ID: {data["obj_id"]}') if data["filter"] not in instrument.filters: raise ValidationError( f"Instrument {instrument.name} has no filter " f"{data['filter']}.") # determine if this is a limit or a measurement hasmag = data['mag'] is not None if hasmag: flux = 10**(-0.4 * (data['mag'] - PHOT_ZP)) fluxerr = data['magerr'] / (2.5 / np.log(10)) * flux else: nsigflux = 10**(-0.4 * (data['limiting_mag'] - PHOT_ZP)) flux = None fluxerr = nsigflux / PHOT_DETECTION_THRESHOLD # convert flux to microJanskies. table = Table([{ 'flux': flux, 'fluxerr': fluxerr, 'magsys': data['magsys'], 'zp': PHOT_ZP, 'filter': data['filter'], 'mjd': data['mjd'], }]) if flux is None: # this needs to be non-null for the conversion step # will be replaced later with null table['flux'] = 0.0 # conversion happens here photdata = PhotometricData(table).normalized(zp=PHOT_ZP, zpsys=PHOT_SYS) # replace with null if needed final_flux = None if flux is None else photdata.flux[0] p = Photometry( obj_id=data['obj_id'], mjd=data['mjd'], flux=final_flux, fluxerr=photdata.fluxerr[0], instrument_id=data['instrument_id'], assignment_id=data['assignment_id'], filter=data['filter'], ra=data['ra'], dec=data['dec'], ra_unc=data['ra_unc'], dec_unc=data['dec_unc'], ) if 'alert_id' in data and data['alert_id'] is not None: p.alert_id = data['alert_id'] return p
def standardize_photometry_data(self): data = self.get_json() if not isinstance(data, dict): raise ValidationError( 'Top level JSON must be an instance of `dict`, got ' f'{type(data)}.') if "altdata" in data and not data["altdata"]: del data["altdata"] # quick validation - just to make sure things have the right fields try: data = PhotMagFlexible.load(data) except ValidationError as e1: try: data = PhotFluxFlexible.load(data) except ValidationError as e2: raise ValidationError( 'Invalid input format: Tried to parse data ' f'in mag space, got: ' f'"{e1.normalized_messages()}." Tried ' f'to parse data in flux space, got:' f' "{e2.normalized_messages()}."') else: kind = 'flux' else: kind = 'mag' # not used here _ = data.pop('group_ids', None) if allscalar(data): data = [data] try: df = pd.DataFrame(data) except ValueError as e: if "altdata" in data and "Mixing dicts with non-Series" in str(e): try: data["altdata"] = [{ key: value[i] for key, value in data["altdata"].items() } for i in range( len(data["altdata"][list(data["altdata"].keys())[-1]])) ] df = pd.DataFrame(data) except ValueError: raise ValidationError( 'Unable to coerce passed JSON to a series of packets. ' f'Error was: "{e}"') else: raise ValidationError( 'Unable to coerce passed JSON to a series of packets. ' f'Error was: "{e}"') # `to_numeric` coerces numbers written as strings to numeric types # (int, float) # errors='ignore' means if something is actually an alphanumeric # string, just leave it alone and dont error out # apply is used to apply it to each column # (https://stackoverflow.com/questions/34844711/convert-entire-pandas # -dataframe-to-integers-in-pandas-0-17-0/34844867 df = df.apply(pd.to_numeric, errors='ignore') # set origin to '' where it is None. df.loc[df['origin'].isna(), 'origin'] = '' if kind == 'mag': # ensure that neither or both mag and magerr are null magnull = df['mag'].isna() magerrnull = df['magerr'].isna() magdet = ~magnull # https://en.wikipedia.org/wiki/Bitwise_operation#XOR bad = magerrnull ^ magnull # bitwise exclusive or -- returns true # if A and not B or B and not A # coerce to numpy array bad = bad.values if any(bad): # find the first offending packet first_offender = np.argwhere(bad)[0, 0] packet = df.iloc[first_offender].to_dict() # coerce nans to nones for key in packet: if key != 'standardized_flux': packet[key] = nan_to_none(packet[key]) raise ValidationError( f'Error parsing packet "{packet}": mag ' f'and magerr must both be null, or both be ' f'not null.') for field in ['mag', 'magerr', 'limiting_mag']: infinite = np.isinf(df[field].values) if any(infinite): first_offender = np.argwhere(infinite)[0, 0] packet = df.iloc[first_offender].to_dict() # coerce nans to nones for key in packet: packet[key] = nan_to_none(packet[key]) raise ValidationError(f'Error parsing packet "{packet}": ' f'field {field} must be finite.') # ensure nothing is null for the required fields for field in PhotMagFlexible.required_keys: missing = df[field].isna() if any(missing): first_offender = np.argwhere(missing)[0, 0] packet = df.iloc[first_offender].to_dict() # coerce nans to nones for key in packet: packet[key] = nan_to_none(packet[key]) raise ValidationError(f'Error parsing packet "{packet}": ' f'missing required field {field}.') # convert the mags to fluxes # detections detflux = 10**(-0.4 * (df[magdet]['mag'] - PHOT_ZP)) detfluxerr = df[magdet]['magerr'] / (2.5 / np.log(10)) * detflux # non-detections limmag_flux = 10**(-0.4 * (df[magnull]['limiting_mag'] - PHOT_ZP)) ndetfluxerr = limmag_flux / df[magnull]['limiting_mag_nsigma'] # initialize flux to be none phot_table = Table.from_pandas(df[['mjd', 'magsys', 'filter']]) phot_table['zp'] = PHOT_ZP phot_table['flux'] = np.nan phot_table['fluxerr'] = np.nan phot_table['flux'][magdet] = detflux phot_table['fluxerr'][magdet] = detfluxerr phot_table['fluxerr'][magnull] = ndetfluxerr else: for field in PhotFluxFlexible.required_keys: missing = df[field].isna().values if any(missing): first_offender = np.argwhere(missing)[0, 0] packet = df.iloc[first_offender].to_dict() for key in packet: packet[key] = nan_to_none(packet[key]) raise ValidationError(f'Error parsing packet "{packet}": ' f'missing required field {field}.') for field in ['flux', 'fluxerr']: infinite = np.isinf(df[field].values) if any(infinite): first_offender = np.argwhere(infinite)[0, 0] packet = df.iloc[first_offender].to_dict() # coerce nans to nones for key in packet: packet[key] = nan_to_none(packet[key]) raise ValidationError(f'Error parsing packet "{packet}": ' f'field {field} must be finite.') phot_table = Table.from_pandas( df[['mjd', 'magsys', 'filter', 'zp']]) phot_table['flux'] = df['flux'].fillna(np.nan) phot_table['fluxerr'] = df['fluxerr'].fillna(np.nan) # convert to microjanskies, AB for DB storage as a vectorized operation pdata = PhotometricData(phot_table) standardized = pdata.normalized(zp=PHOT_ZP, zpsys='ab') df['standardized_flux'] = standardized.flux df['standardized_fluxerr'] = standardized.fluxerr instrument_cache = {} for iid in df['instrument_id'].unique(): instrument = Instrument.query.get(int(iid)) if not instrument: raise ValidationError(f'Invalid instrument ID: {iid}') instrument_cache[iid] = instrument for oid in df['obj_id'].unique(): obj = Obj.query.get(oid) if not obj: raise ValidationError(f'Invalid object ID: {oid}') return df, instrument_cache
def parse_flux(self, data, **kwargs): """Return a `Photometry` object from a `PhotometryFlux` marshmallow schema. Parameters ---------- data : dict The instance of the PhotometryFlux schema to convert to Photometry. Returns ------- Photometry The Photometry object generated from the PhotometryFlux object. """ from skyportal.models import Instrument, Obj, PHOT_SYS, PHOT_ZP, Photometry from sncosmo.photdata import PhotometricData # get the instrument instrument = Instrument.query.get(data['instrument_id']) if not instrument: raise ValidationError( f'Invalid instrument ID: {data["instrument_id"]}') # get the object obj = Obj.query.get( data['obj_id']) # TODO : implement permissions checking if not obj: raise ValidationError(f'Invalid object ID: {data["obj_id"]}') if data["filter"] not in instrument.filters: raise ValidationError( f"Instrument {instrument.name} has no filter " f"{data['filter']}.") # convert flux to microJanskies. table = Table([data]) if data['flux'] is None: # this needs to be non-null for the conversion step # will be replaced later with null table['flux'] = 0.0 # conversion happens here photdata = PhotometricData(table).normalized(zp=PHOT_ZP, zpsys=PHOT_SYS) # replace with null if needed final_flux = None if data['flux'] is None else photdata.flux[0] p = Photometry( obj_id=data['obj_id'], mjd=data['mjd'], flux=final_flux, fluxerr=photdata.fluxerr[0], instrument_id=data['instrument_id'], assignment_id=data['assignment_id'], filter=data['filter'], ra=data['ra'], dec=data['dec'], ra_unc=data['ra_unc'], dec_unc=data['dec_unc'], ) if 'alert_id' in data and data['alert_id'] is not None: p.alert_id = data['alert_id'] return p
def post(self): """ --- description: Upload photometry requestBody: content: application/json: schema: oneOf: - $ref: "#/components/schemas/PhotMagFlexible" - $ref: "#/components/schemas/PhotFluxFlexible" responses: 200: content: application/json: schema: allOf: - $ref: '#/components/schemas/Success' - type: object properties: data: type: object properties: ids: type: array items: type: integer description: List of new photometry IDs upload_id: type: string description: | Upload ID associated with all photometry points added in request. Can be used to later delete all points in a single request. """ data = self.get_json() if not isinstance(data, dict): return self.error( 'Top level JSON must be an instance of `dict`, got ' f'{type(data)}.') if "altdata" in data and not data["altdata"]: del data["altdata"] # quick validation - just to make sure things have the right fields try: data = PhotMagFlexible.load(data) except ValidationError as e1: try: data = PhotFluxFlexible.load(data) except ValidationError as e2: return self.error('Invalid input format: Tried to parse data ' f'in mag space, got: ' f'"{e1.normalized_messages()}." Tried ' f'to parse data in flux space, got:' f' "{e2.normalized_messages()}."') else: kind = 'flux' else: kind = 'mag' try: group_ids = data.pop("group_ids") except KeyError: return self.error("Missing required field: group_ids") groups = Group.query.filter(Group.id.in_(group_ids)).all() if not groups: return self.error("Invalid group_ids field. " "Specify at least one valid group ID.") if "Super admin" not in [ r.id for r in self.associated_user_object.roles ]: if not all([group in self.current_user.groups for group in groups]): return self.error( "Cannot upload photometry to groups that you " "are not a member of.") if "alert_id" in data: phot = Photometry.query.filter( Photometry.alert_id == data["alert_id"]).filter( Photometry.alert_id.isnot(None)).first() if phot is not None: phot.groups = groups DBSession().commit() return self.success(data={ "ids": [phot.id], "upload_id": phot.upload_id }) if allscalar(data): data = [data] upload_id = str(uuid.uuid4()) try: df = pd.DataFrame(data) except ValueError as e: if "altdata" in data and "Mixing dicts with non-Series" in str(e): try: data["altdata"] = [{ key: value[i] for key, value in data["altdata"].items() } for i in range( len(data["altdata"][list(data["altdata"].keys())[-1]])) ] df = pd.DataFrame(data) except ValueError: return self.error( 'Unable to coerce passed JSON to a series of packets. ' f'Error was: "{e}"') else: return self.error( 'Unable to coerce passed JSON to a series of packets. ' f'Error was: "{e}"') # `to_numeric` coerces numbers written as strings to numeric types # (int, float) # errors='ignore' means if something is actually an alphanumeric # string, just leave it alone and dont error out # apply is used to apply it to each column # (https://stackoverflow.com/questions/34844711/convert-entire-pandas # -dataframe-to-integers-in-pandas-0-17-0/34844867 df = df.apply(pd.to_numeric, errors='ignore') if kind == 'mag': # ensure that neither or both mag and magerr are null magnull = df['mag'].isna() magerrnull = df['magerr'].isna() magdet = ~magnull # https://en.wikipedia.org/wiki/Bitwise_operation#XOR bad = magerrnull ^ magnull # bitwise exclusive or -- returns true # if A and not B or B and not A if any(bad): # find the first offending packet first_offender = np.argwhere(bad)[0, 0] packet = df.iloc[first_offender].to_dict() # coerce nans to nones for key in packet: packet[key] = nan_to_none(packet[key]) return self.error(f'Error parsing packet "{packet}": mag ' f'and magerr must both be null, or both be ' f'not null.') # ensure nothing is null for the required fields for field in PhotMagFlexible.required_keys: missing = df[field].isna() if any(missing): first_offender = np.argwhere(missing)[0, 0] packet = df.iloc[first_offender].to_dict() # coerce nans to nones for key in packet: packet[key] = nan_to_none(packet[key]) return self.error(f'Error parsing packet "{packet}": ' f'missing required field {field}.') # convert the mags to fluxes # detections detflux = 10**(-0.4 * (df[magdet]['mag'] - PHOT_ZP)) detfluxerr = df[magdet]['magerr'] / (2.5 / np.log(10)) * detflux # non-detections limmag_flux = 10**(-0.4 * (df[magnull]['limiting_mag'] - PHOT_ZP)) ndetfluxerr = limmag_flux / df[magnull]['limiting_mag_nsigma'] # initialize flux to be none phot_table = Table.from_pandas(df[['mjd', 'magsys', 'filter']]) phot_table['zp'] = PHOT_ZP phot_table['flux'] = np.nan phot_table['fluxerr'] = np.nan phot_table['flux'][magdet] = detflux phot_table['fluxerr'][magdet] = detfluxerr phot_table['fluxerr'][magnull] = ndetfluxerr else: for field in PhotFluxFlexible.required_keys: missing = df[field].isna() if any(missing): first_offender = np.argwhere(missing)[0, 0] packet = df.iloc[first_offender].to_dict() for key in packet: packet[key] = nan_to_none(packet[key]) return self.error(f'Error parsing packet "{packet}": ' f'missing required field {field}.') phot_table = Table.from_pandas( df[['mjd', 'magsys', 'filter', 'zp']]) phot_table['flux'] = df['flux'].fillna(np.nan) phot_table['fluxerr'] = df['fluxerr'].fillna(np.nan) # convert to microjanskies, AB for DB storage as a vectorized operation pdata = PhotometricData(phot_table) standardized = pdata.normalized(zp=PHOT_ZP, zpsys='ab') df['standardized_flux'] = standardized.flux df['standardized_fluxerr'] = standardized.fluxerr instcache = {} for iid in df['instrument_id'].unique(): instrument = Instrument.query.get(int(iid)) if not instrument: return self.error(f'Invalid instrument ID: {iid}') instcache[iid] = instrument for oid in df['obj_id'].unique(): obj = Obj.query.get(oid) if not obj: return self.error(f'Invalid object ID: {oid}') # pre-fetch the photometry PKs. these are not guaranteed to be # gapless (e.g., 1, 2, 3, 4, 5, ...) but they are guaranteed # to be unique in the table and thus can be used to "reserve" # PK slots for uninserted rows pkq = f"SELECT nextval('photometry_id_seq') FROM " \ f"generate_series(1, {len(df)})" proxy = DBSession().execute(pkq) # cache this as list for response ids = [i[0] for i in proxy] df['id'] = ids rows = df.where(pd.notnull(df), None).to_dict('records') params = [] for packet in rows: if packet["filter"] not in instcache[ packet['instrument_id']].filters: raise ValidationError( f"Instrument {instrument.name} has no filter " f"{packet['filter']}.") flux = packet.pop('standardized_flux') fluxerr = packet.pop('standardized_fluxerr') # reduce the DB size by ~2x keys = ['limiting_mag', 'magsys', 'limiting_mag_nsigma'] original_user_data = { key: packet[key] for key in keys if key in packet } if original_user_data == {}: original_user_data = None phot = dict(id=packet['id'], original_user_data=original_user_data, upload_id=upload_id, flux=flux, fluxerr=fluxerr, obj_id=packet['obj_id'], altdata=packet['altdata'], instrument_id=packet['instrument_id'], ra_unc=packet['ra_unc'], dec_unc=packet['dec_unc'], mjd=packet['mjd'], filter=packet['filter'], ra=packet['ra'], dec=packet['dec']) params.append(phot) # actually do the insert query = Photometry.__table__.insert() DBSession().execute(query, params) groupquery = GroupPhotometry.__table__.insert() params = [] for id in ids: for group_id in group_ids: params.append({'photometr_id': id, 'group_id': group_id}) DBSession().execute(groupquery, params) DBSession().commit() return self.success(data={"ids": ids, "upload_id": upload_id})