def clean_up_document(document):
                """ Format passed in dicts for Mongo insertion """
                # convert types for pymongo:
                for k, v in document.items():
                    if k != "data":
                        if k in sources_int_fields:
                            document[k] = int(document[k])
                        else:
                            document[k] = float(document[k])
                            if k not in ("ra", "dec"):
                                # this will save a lot of space:
                                document[k] = round(document[k], 3)

                # generate unique _id:
                document["_id"] = baseid + document["matchid"]
                document["filter"] = filt
                document["field"] = field
                document["ccd"] = ccd
                document["quad"] = quadrant
                document["rc"] = readout_channel

                # GeoJSON for 2D indexing
                document["coordinates"] = dict()
                _ra = document["ra"]
                _dec = document["dec"]
                _radec_str = [deg2hms(_ra), deg2dms(_dec)]
                document["coordinates"]["radec_str"] = _radec_str
                # for GeoJSON, must be lon:[-180, 180], lat:[-90, 90] (i.e. in deg)
                _radec_geojson = [_ra - 180.0, _dec]
                document["coordinates"]["radec_geojson"] = {
                    "type": "Point",
                    "coordinates": _radec_geojson,
                }
                document["data"].sort(key=lambda x: x["hjd"])
                for data_point in document["data"]:
                    # convert types for pymongo:
                    for k, v in data_point.items():
                        if k in sourcedata_int_fields:
                            data_point[k] = int(data_point[k])
                        else:
                            data_point[k] = float(data_point[k])
                            if k not in ("ra", "dec", "hjd"):
                                data_point[k] = round(data_point[k], 3)
                            elif k == "hjd":
                                data_point[k] = round(data_point[k], 5)
                    # generate unique exposure id's that match _id's in exposures collection
                    data_point[
                        "uexpid"] = exposure_baseid + data_point["expid"]

                return document
示例#2
0
def process_file(file, collection, batch_size):

    # connect to MongoDB:
    log("Connecting to DB")
    mongo = Mongo(
        host=config["database"]["host"],
        port=config["database"]["port"],
        replica_set=config["database"]["replica_set"],
        username=config["database"]["username"],
        password=config["database"]["password"],
        db=config["database"]["db"],
        verbose=0,
    )
    log("Successfully connected")

    collection = "VLASS_DR1"

    log(f"Processing {file}")

    names = [
        "Component_name",
        "RA",
        "DEC",
        "E_RA",
        "E_DEC",
        "Total_flux",
        "E_Total_flux",
        "Peak_flux",
        "E_Peak_flux",
        "Maj",
        "E_Maj",
        "Min",
        "E_Min",
        "Duplicate_flag",
        "Quality_flag",
    ]

    for chunk_index, dataframe_chunk in enumerate(
        pd.read_csv(file, chunksize=batch_size)
    ):

        log(f"{file}: processing batch # {chunk_index + 1}")

        dataframe_chunk = dataframe_chunk[names]
        dataframe_chunk = dataframe_chunk[dataframe_chunk["Duplicate_flag"] < 2]
        dataframe_chunk = dataframe_chunk[dataframe_chunk["Quality_flag"] == 0]

        batch = dataframe_chunk.to_dict(orient="records")

        bad_document_indexes = []

        for document_index, document in enumerate(batch):
            try:
                # GeoJSON for 2D indexing
                document["coordinates"] = dict()
                # string format: H:M:S, D:M:S
                document["coordinates"]["radec_str"] = [
                    deg2hms(document["RA"]),
                    deg2dms(document["DEC"]),
                ]
                # for GeoJSON, must be lon:[-180, 180], lat:[-90, 90] (i.e. in deg)
                _radec_geojson = [document["RA"] - 180.0, document["DEC"]]
                document["coordinates"]["radec_geojson"] = {
                    "type": "Point",
                    "coordinates": _radec_geojson,
                }
            except Exception as e:
                log(str(e))
                bad_document_indexes.append(document_index)

        if len(bad_document_indexes) > 0:
            log("Removing bad docs")
            for index in sorted(bad_document_indexes, reverse=True):
                del batch[index]

        # ingest batch
        mongo.insert_many(collection=collection, documents=batch)

    # disconnect from db:
    try:
        mongo.client.close()
    finally:
        log("Successfully disconnected from db")
示例#3
0
def process_file(file, collection, batch_size):

    # connect to MongoDB:
    log("Connecting to DB")
    mongo = Mongo(
        host=config["database"]["host"],
        port=config["database"]["port"],
        replica_set=config["database"]["replica_set"],
        username=config["database"]["username"],
        password=config["database"]["password"],
        db=config["database"]["db"],
        verbose=0,
    )
    log("Successfully connected")

    collection = "IGAPS_DR2"

    log(f"Processing {file}")

    names = [
        "name",
        "RA",
        "DEC",
        "gal_long",
        "gal_lat",
        "sourceID",
        "posErr",
        "mergedClass",
        "pStar",
        "pGalaxy",
        "pNoise",
        "i",
        "iErr",
        "iAB",
        "iEll",
        "iClass",
        "iDeblend",
        "iSaturated",
        "iVignetted",
        "iTrail",
        "iTruncated",
        "iBadPix",
        "iMJD",
        "iSeeing",
        "iDetectionID",
        "iDeltaRA",
        "iDeltaDEC",
        "ha",
        "haErr",
        "haAB",
        "haEll",
        "haClass",
        "haDeblend",
        "haSaturated",
        "haVignetted",
        "haTrail",
        "haTruncated",
        "haBadPix",
        "haMJD",
        "haSeeing",
        "haDetectionID",
        "haDeltaRA",
        "haDeltaDEC",
        "r_I",
        "rErr_I",
        "rAB_I",
        "rEll_I",
        "rClass_I",
        "rDeblend_I",
        "rSaturated_I",
        "rVignetted_I",
        "rTrail_I",
        "rTruncated_I",
        "rBadPix_I",
        "rMJD_I",
        "rSeeing_I",
        "rDetectionID_I",
        "r_U",
        "rErr_U",
        "rAB_U",
        "rEll_U",
        "rClass_U",
        "rDeblend_U",
        "rSaturated_U",
        "rVignetted_U",
        "rTrail_U",
        "rTruncated_U",
        "rBadPix_U",
        "rMJD_U",
        "rSeeing_U",
        "rDetectionID_U",
        "rDeltaRA_U",
        "rDeltaDEC_U",
        "g",
        "gErr",
        "gAB",
        "gEll",
        "gClass",
        "gDeblend",
        "gSaturated",
        "gVignetted",
        "gTrail",
        "gTruncated",
        "gBadPix",
        "gmask",
        "gMJD",
        "gSeeing",
        "gDetectionID",
        "gDeltaRA",
        "gDeltaDEC",
        "U_RGO",
        "UErr",
        "UEll",
        "UClass",
        "UDeblend",
        "USaturated",
        "UVignetted",
        "UTrail",
        "UTruncated",
        "UBadPix",
        "UMJD",
        "USeeing",
        "UDetectionID",
        "UDeltaRA",
        "UDeltaDEC",
        "brightNeighb",
        "deblend",
        "saturated",
        "nBands",
        "errBits",
        "nObs_I",
        "nObs_U",
        "fieldID_I",
        "fieldID_U",
        "fieldGrade_I",
        "fieldGrade_U",
        "emitter",
        "variable",
        "SourceID2",
        "i2",
        "i2Err",
        "i2Class",
        "i2Seeing",
        "i2MJD",
        "i2DeltaRA",
        "i2DeltaDEC",
        "i2DetectionID",
        "i2ErrBits",
        "ha2",
        "ha2Err",
        "ha2Class",
        "ha2Seeing",
        "ha2MJD",
        "ha2DeltaRA",
        "ha2DeltaDEC",
        "ha2DetectionID",
        "ha2ErrBits",
        "r2_I",
        "r2Err_I",
        "r2Class_I",
        "r2Seeing_I",
        "r2MJD_I",
        "r2DeltaRA_I",
        "r2DeltaDEC_I",
        "r2DetectionID_I",
        "r2ErrBits_I",
        "r2_U",
        "r2Err_U",
        "r2Class_U",
        "r2Seeing_U",
        "r2MJD_U",
        "r2DeltaRA_U",
        "r2DeltaDEC_U",
        "r2DetectionID_U",
        "r2ErrBits_U",
        "g2",
        "g2Err",
        "g2Class",
        "g2Seeing",
        "g2MJD",
        "g2DeltaRA",
        "g2DeltaDEC",
        "g2DetectionID",
        "g2ErrBits",
        "U_RGO2",
        "U2Err",
        "U2Class",
        "U2Seeing",
        "U2MJD",
        "U2DeltaRA",
        "U2DeltaDEC",
        "U2DetectionID",
        "U2ErrBits",
        "errBits2",
    ]
    with fits.open(file) as hdulist:
        nhdu = 1
        dataframe = pd.DataFrame(np.asarray(hdulist[nhdu].data), columns=names)

    for chunk_index, dataframe_chunk in dataframe.groupby(
            np.arange(len(dataframe)) // batch_size):

        log(f"{file}: processing batch # {chunk_index + 1}")

        for col, dtype in dataframe_chunk.dtypes.items():
            if dtype == np.object:
                dataframe_chunk[col] = dataframe_chunk[col].apply(
                    lambda x: x.decode("utf-8"))

        batch = dataframe_chunk.to_dict(orient="records")
        batch = dataframe_chunk.fillna("DROPMEPLEASE").to_dict(
            orient="records")

        # pop nulls - save space
        batch = [{
            key: value
            for key, value in document.items() if value != "DROPMEPLEASE"
        } for document in batch]

        bad_document_indexes = []

        for document_index, document in enumerate(batch):
            try:
                # GeoJSON for 2D indexing
                document["coordinates"] = dict()
                # string format: H:M:S, D:M:S
                document["coordinates"]["radec_str"] = [
                    deg2hms(document["RA"]),
                    deg2dms(document["DEC"]),
                ]
                # for GeoJSON, must be lon:[-180, 180], lat:[-90, 90] (i.e. in deg)
                _radec_geojson = [document["RA"] - 180.0, document["DEC"]]
                document["coordinates"]["radec_geojson"] = {
                    "type": "Point",
                    "coordinates": _radec_geojson,
                }
            except Exception as e:
                log(str(e))
                bad_document_indexes.append(document_index)

        if len(bad_document_indexes) > 0:
            log("Removing bad docs")
            for index in sorted(bad_document_indexes, reverse=True):
                del batch[index]

        # ingest batch
        mongo.insert_many(collection=collection, documents=batch)

    # disconnect from db:
    try:
        mongo.client.close()
    finally:
        log("Successfully disconnected from db")
def process_file(_file,
                 _collections,
                 _batch_size=2048,
                 _keep_all=False,
                 _rm_file=False,
                 verbose=False,
                 _dry_run=False):

    # connect to MongoDB:
    if verbose:
        print('Connecting to DB')
    _client, _db = connect_to_db()
    if verbose:
        print('Successfully connected')

    if verbose:
        print(f'processing {_file}')

    try:
        with tables.open_file(_file) as f:
            # print(f.root['/matches'].attrs)
            group = f.root.matches
            # print(f.root.matches.exposures._v_attrs)
            # print(f.root.matches.sources._v_attrs)
            # print(f.root.matches.sourcedata._v_attrs)

            ff_basename = os.path.basename(_file)

            # base id:
            _, field, filt, ccd, quad, _ = ff_basename.split('_')
            field = int(field)
            filt = filters[filt]
            ccd = int(ccd[1:])
            quad = int(quad[1:])

            rc = ccd_quad_2_rc(ccd=ccd, quad=quad)
            baseid = int(1e13 + field * 1e9 + rc * 1e7 + filt * 1e6)
            if verbose:
                # print(f'{_file}: {field} {filt} {ccd} {quad}')
                print(f'{_file}: baseid {baseid}')

            exp_baseid = int(1e16 + field * 1e12 + rc * 1e10 + filt * 1e9)
            # print(int(1e16), int(field*1e12), int(rc*1e10), int(filt*1e9), exp_baseid)

            # tic = time.time()
            exposures = pd.DataFrame.from_records(group.exposures[:])
            # exposures_colnames = exposures.columns.values
            # print(exposures_colnames)

            # prepare docs to ingest into db:
            docs_exposures = []
            for index, row in exposures.iterrows():
                try:
                    doc = row.to_dict()

                    # unique exposure id:
                    doc['_id'] = exp_baseid + doc['expid']
                    # print(exp_baseid, doc['expid'], doc['_id'])

                    doc['matchfile'] = ff_basename
                    doc['filter'] = filt
                    doc['field'] = field
                    doc['ccd'] = ccd
                    doc['quad'] = quad
                    doc['rc'] = rc
                    # pprint(doc)
                    docs_exposures.append(doc)
                except Exception as e_:
                    print(str(e_))

            # ingest exposures in one go:
            if not _dry_run:
                if verbose:
                    print(f'ingesting exposures for {_file}')
                insert_multiple_db_entries(
                    _db,
                    _collection=_collections['exposures'],
                    _db_entries=docs_exposures)
                if verbose:
                    print(f'done ingesting exposures for {_file}')

            docs_sources = []
            batch_num = 1
            # fixme? skip transients
            # for source_type in ('source', 'transient'):
            for source_type in ('source', ):

                sources_colnames = group[f'{source_type}s'].colnames
                sources = np.array(group[f'{source_type}s'].read())
                # sources = group[f'{source_type}s'].read()

                # sourcedata = pd.DataFrame.from_records(group[f'{source_type}data'][:])
                # sourcedata_colnames = sourcedata.columns.values
                sourcedata_colnames = group[f'{source_type}data'].colnames
                # sourcedata = np.array(group[f'{source_type}data'].read())

                for source in sources:
                    try:
                        doc = dict(zip(sources_colnames, source))

                        # grab data first
                        sourcedata = np.array(
                            group[f'{source_type}data'].read_where(
                                f'matchid == {doc["matchid"]}'))
                        # print(sourcedata)
                        doc_data = [
                            dict(zip(sourcedata_colnames, sd))
                            for sd in sourcedata
                        ]

                        # skip sources that are only detected in the reference image:
                        if len(doc_data) == 0:
                            continue

                        # dump unwanted fields:
                        if not _keep_all:
                            # do not store all fields to save space
                            # sources_fields_to_keep = ('astrometricrms', 'chisq', 'con', 'lineartrend',
                            #                           'magrms', 'maxslope', 'meanmag', 'medianabsdev',
                            #                           'medianmag', 'minmag', 'maxmag',
                            #                           'nabovemeanbystd', 'nbelowmeanbystd',
                            #                           'nconsecabovemeanbystd', 'nconsecbelowmeanbystd',
                            #                           'nconsecfrommeanbystd',
                            #                           'nmedianbufferrange',
                            #                           'npairposslope', 'percentiles', 'skewness',
                            #                           'smallkurtosis', 'stetsonj', 'stetsonk',
                            #                           'vonneumannratio', 'weightedmagrms',
                            #                           'weightedmeanmag',
                            #                           'dec', 'matchid', 'nobs', 'ngoodobs',
                            #                           'ra', 'refchi', 'refmag', 'refmagerr', 'refsharp', 'refsnr')

                            # sources_fields_to_keep = ('meanmag',
                            #                           'percentiles',
                            #                           'vonneumannratio',
                            #                           'dec', 'matchid', 'nobs',
                            #                           'ra', 'refchi', 'refmag', 'refmagerr', 'refsharp', 'refsnr')

                            # refmagerr = 1.0857/refsnr
                            sources_fields_to_keep = ('meanmag', 'percentiles',
                                                      'vonneumannratio', 'dec',
                                                      'matchid', 'nobs', 'ra',
                                                      'refchi', 'refmag',
                                                      'refmagerr', 'refsharp')

                            doc_keys = list(doc.keys())
                            for kk in doc_keys:
                                if kk not in sources_fields_to_keep:
                                    doc.pop(kk)

                        # convert types for pymongo:
                        for k, v in doc.items():
                            # types.add(type(v))
                            if np.issubdtype(type(v), np.integer):
                                doc[k] = int(doc[k])
                            if np.issubdtype(type(v), np.inexact):
                                doc[k] = float(doc[k])
                                if k not in ('ra', 'dec'):
                                    doc[k] = round(doc[k], 3)
                            # convert numpy arrays into lists
                            if type(v) == np.ndarray:
                                doc[k] = doc[k].tolist()

                        # generate unique _id:
                        doc['_id'] = baseid + doc['matchid']

                        # from Frank Masci: compute ObjectID, same as serial key in ZTF Objects DB table in IRSA.
                        # oid = ((fieldid * 100000 + fid * 10000 + ccdid * 100 + qid * 10) * 10 ** 7) + int(matchid)

                        doc['iqr'] = doc['percentiles'][8] - doc[
                            'percentiles'][3]
                        doc['iqr'] = round(doc['iqr'], 3)
                        doc.pop('percentiles')

                        # doc['matchfile'] = ff_basename
                        doc['filter'] = filt
                        doc['field'] = field
                        doc['ccd'] = ccd
                        doc['quad'] = quad
                        doc['rc'] = rc

                        # doc['source_type'] = source_type

                        # GeoJSON for 2D indexing
                        doc['coordinates'] = {}
                        _ra = doc['ra']
                        _dec = doc['dec']
                        _radec = [_ra, _dec]
                        # string format: H:M:S, D:M:S
                        # tic = time.time()
                        _radec_str = [deg2hms(_ra), deg2dms(_dec)]
                        # print(time.time() - tic)
                        # print(_radec_str)
                        doc['coordinates']['radec_str'] = _radec_str
                        # for GeoJSON, must be lon:[-180, 180], lat:[-90, 90] (i.e. in deg)
                        _radec_geojson = [_ra - 180.0, _dec]
                        doc['coordinates']['radec_geojson'] = {
                            'type': 'Point',
                            'coordinates': _radec_geojson
                        }
                        # radians and degrees:
                        # doc['coordinates']['radec_rad'] = [_ra * np.pi / 180.0, _dec * np.pi / 180.0]
                        # doc['coordinates']['radec_deg'] = [_ra, _dec]

                        # data

                        doc['data'] = doc_data
                        # print(doc['data'])

                        if not _keep_all:
                            # do not store all fields to save space
                            if len(doc_data) > 0:
                                # magerr = 1.0857/snr
                                sourcedata_fields_to_keep = (
                                    'catflags',
                                    'chi',
                                    'dec',
                                    'expid',
                                    'hjd',
                                    'mag',
                                    'magerr',
                                    'programid',
                                    'ra',  # 'relphotflags', 'snr',
                                    'sharp')
                                doc_keys = list(doc_data[0].keys())
                                for ddi, ddp in enumerate(doc['data']):
                                    for kk in doc_keys:
                                        if kk not in sourcedata_fields_to_keep:
                                            doc['data'][ddi].pop(kk)

                        for dd in doc['data']:
                            # convert types for pymongo:
                            for k, v in dd.items():
                                # types.add(type(v))
                                if np.issubdtype(type(v), np.integer):
                                    dd[k] = int(dd[k])
                                if np.issubdtype(type(v), np.inexact):
                                    dd[k] = float(dd[k])
                                    if k not in ('ra', 'dec', 'hjd'):
                                        dd[k] = round(dd[k], 3)
                                    elif k == 'hjd':
                                        dd[k] = round(dd[k], 5)
                                # convert numpy arrays into lists
                                if type(v) == np.ndarray:
                                    dd[k] = dd[k].tolist()

                            # generate unique exposure id's that match _id's in exposures collection
                            dd['uexpid'] = exp_baseid + dd['expid']

                        # pprint(doc)
                        docs_sources.append(doc)

                    except Exception as e_:
                        print(str(e_))

                    # ingest in batches
                    try:
                        if len(docs_sources) % _batch_size == 0:
                            if verbose:
                                print(
                                    f'inserting batch #{batch_num} for {_file}'
                                )
                            if not _dry_run:
                                insert_multiple_db_entries(
                                    _db,
                                    _collection=_collections['sources'],
                                    _db_entries=docs_sources,
                                    _verbose=False)
                            # flush:
                            docs_sources = []
                            batch_num += 1
                    except Exception as e_:
                        print(str(e_))

        # ingest remaining
        while len(docs_sources) > 0:
            try:
                # In case mongo crashed and disconnected, docs will accumulate in documents
                # keep on trying to insert them until successful
                if verbose:
                    print(f'inserting batch #{batch_num} for {_file}')
                if not _dry_run:
                    insert_multiple_db_entries(
                        _db,
                        _collection=_collections['sources'],
                        _db_entries=docs_sources,
                        _verbose=False)
                    # flush:
                    docs_sources = []

            except Exception as e:
                traceback.print_exc()
                print(e)
                print('Failed, waiting 5 seconds to retry')
                time.sleep(5)

    except Exception as e:
        traceback.print_exc()
        print(e)

    # disconnect from db:
    try:
        if _rm_file:
            os.remove(_file)
            if verbose:
                print(f'Successfully removed {_file}')
        _client.close()
        if verbose:
            if verbose:
                print('Successfully disconnected from db')
    finally:
        pass
示例#5
0
def process_file(args):
    file, collection, batch_size, rm, verbose = args
    # connect to MongoDB:
    if verbose:
        log("Connecting to DB")
    mongo = Mongo(
        host=config["database"]["host"],
        port=config["database"]["port"],
        replica_set=config["database"]["replica_set"],
        username=config["database"]["username"],
        password=config["database"]["password"],
        db=config["database"]["db"],
        verbose=0,
    )
    if verbose:
        log("Successfully connected")

    collection = "Gaia_EDR3"

    if verbose:
        log(f"Processing {file}")

    for chunk_index, dataframe_chunk in enumerate(
            pd.read_csv(file, chunksize=batch_size)):

        if verbose:
            log(f"{file}: processing batch # {chunk_index + 1}")

        dataframe_chunk["_id"] = dataframe_chunk["source_id"].apply(
            lambda x: str(x))

        batch = dataframe_chunk.fillna("DROPMEPLEASE").to_dict(
            orient="records")

        # pop nulls - save space
        batch = [{
            key: value
            for key, value in document.items()
            if value not in ("DROPMEPLEASE", "NOT_AVAILABLE")
        } for document in batch]

        bad_document_indexes = []

        for document_index, document in enumerate(batch):
            try:
                # GeoJSON for 2D indexing
                document["coordinates"] = dict()
                # string format: H:M:S, D:M:S
                document["coordinates"]["radec_str"] = [
                    deg2hms(document["ra"]),
                    deg2dms(document["dec"]),
                ]
                # for GeoJSON, must be lon:[-180, 180], lat:[-90, 90] (i.e. in deg)
                _radec_geojson = [document["ra"] - 180.0, document["dec"]]
                document["coordinates"]["radec_geojson"] = {
                    "type": "Point",
                    "coordinates": _radec_geojson,
                }
            except Exception as e:
                if verbose:
                    log(str(e))
                bad_document_indexes.append(document_index)

        if len(bad_document_indexes) > 0:
            if verbose:
                log("Removing bad docs")
            for index in sorted(bad_document_indexes, reverse=True):
                del batch[index]

        # ingest batch
        mongo.insert_many(collection=collection, documents=batch)

    # disconnect from db:
    try:
        mongo.client.close()
    finally:
        if verbose:
            log("Successfully disconnected from db")

    # clean up:
    if rm:
        os.remove(file)
        if verbose:
            log(f"Successfully removed {file}")
示例#6
0
            def clean_up_document(group):
                """ Format passed in dicts for Mongo insertion """
                document = {}
                for k, v in group.items():
                    if k == "matchedSourceID":
                        document[k] = group[k]
                        continue
                    if k in sources_int_fields:
                        document[k] = [
                            int(group[k][key2]) for key2 in group[k].keys()
                        ]
                    else:
                        document[k] = [
                            float(group[k][key2]) for key2 in group[k].keys()
                        ]

                # document["ra"] = document["ra"][0]
                # document["dec"] = document["dec"][0]

                # generate unique _id:
                document["_id"] = baseid + document["matchedSourceID"]
                document["filter"] = filt
                document["field"] = field
                document["ccd"] = ccd

                # GeoJSON for 2D indexing
                document["coordinates"] = dict()
                _ra = np.median(document["ra"])
                _dec = np.median(document["dec"])
                _radec_str = [deg2hms(_ra), deg2dms(_dec)]
                document["coordinates"]["radec_str"] = _radec_str
                # for GeoJSON, must be lon:[-180, 180], lat:[-90, 90] (i.e. in deg)
                _radec_geojson = [_ra - 180.0, _dec]
                document["coordinates"]["radec_geojson"] = {
                    "type": "Point",
                    "coordinates": _radec_geojson,
                }

                document["data"] = []
                for t, m, e, f, _ra, _dec in zip(
                        document["mjd"],
                        document["mag"],
                        document["magErr"],
                        document["ipacFlags"],
                        document["ra"],
                        document["dec"],
                ):
                    data_point = {
                        "mjd": t,
                        "mag": m,
                        "magerr": e,
                        "ipacflags": f,
                        "ra": _ra,
                        "dec": _dec,
                    }
                    # convert types for pymongo:
                    for k, v in data_point.items():
                        if k in sourcedata_int_fields:
                            data_point[k] = int(data_point[k])
                        else:
                            data_point[k] = float(data_point[k])
                            if k == "mjd":
                                data_point[k] = round(data_point[k], 5)
                            elif k not in ("ra", "dec"):
                                data_point[k] = round(data_point[k], 3)
                    document["data"].append(data_point)
                del (
                    document["mjd"],
                    document["mag"],
                    document["magErr"],
                    document["ipacFlags"],
                    document["ra"],
                    document["dec"],
                )
                document["data"].sort(key=lambda x: x["mjd"])

                return document