def rmNonVariables(detDF, varDF, distThresh=0.000027778): print('extracting coordinates') #convert columns into list of points varRAs = varDF['RA'].tolist() varDECs = varDF['DEC'].tolist() varPoints = zip(varRAs, varDECs) kdTree = KDTree(varPoints) detRAs = detDF['RA'].tolist() detDECs = detDF['DEC'].tolist() #remove the rows that are close to a variable star time0 = time.time() findTime = 0 distTime = 0 dropTime = 0 print('removing variable stars') rmList = [] print('distance threshold:' + str(distThresh)) print('number of detections:' + str(len(detDF))) for x in range(len(detDF)): printPercentage(x, len(detDF), time.time() - time0) pt1 = (float(detRAs[x]), float(detDECs[x])) dist, i = kdTree.query(pt1, 1) if (dist < distThresh): rmList.append(x) print('\nnumber of detections removed: ' + str(len(rmList))) detDF = detDF.drop(detDF.index[rmList]) print('number of detections remaining: ' + str(len(detDF))) return detDF
def wrapDets(csvFile): df = pd.read_csv(csvFile) df = df.rename(columns={ 'SNOBJID': 'OBJID', 'SNFAKE_ID': 'FAKEID', 'CCDNUM': 'CCD' }) df.columns = df.columns.str.lower() size = len(df['objid']) raList = df['ra'].tolist() decList = df['dec'].tolist() mjdList = df['mjd'].tolist() if 'mag' in df.columns: fluxList = df['mag'].tolist() fluxList = [10**((31.4 - x) / 2.5) for x in fluxList] else: fluxList = df['flux'].tolist() objidList = df['objid'].tolist() expnumList = df['expnum'].tolist() ccdList = df['ccd'].tolist() bandList = df['band'].tolist() fakeidList = df['fakeid'].tolist() detlist = [] startT = time.time() lookAhead = 0 for y in range(size): printPercentage(y, size, time.time() - startT) det = Detection(float(raList[y]), float(decList[y]), float(mjdList[y]), float(fluxList[y]), int(objidList[y]), int(expnumList[y]), int(ccdList[y]), bandList[y], lookAhead, int(fakeidList[y])) detlist.append(det) return detlist
def generalCombine(arr, similarity, keep=False, progress=False): """ arr - numpy array of NLetRegion similarity - float between 0 and 1 keep - set it to True to keep nlets in inactive list of NLetRegion progress - set it to True for showing progress """ combined = resizableArray() unchanged = resizableArray() minRA = arr[0, 0].raLo minDec = arr[0, 0].decLo stepRA = arr[0, 0].raHi - minRA stepDec = arr[0, 0].decHi - minDec if progress: count = 0 time0 = time.time() uniqueNLets = {} for i in arr: for j in i: for nlet in j: nlet.sortByMjd() uniqueNlets[tuple([x.objid for x in nlet.dets])] = nlet size = len(uniqueNlets.values()) for x in arr: for y in arr: for nlet in y: potential = [] objid = set([x.objid for x in nlet.dets]) target = int(similarity * len(nlet.dets)) othernlets, coord = getSameRegNLet(nlet, arr, minRA, minDec, stepRA, stepDec) for other in othernlets: otherID = set([x.objid for x in others.dets]) if len(objid.intersection(otherID)) >= target: detgroup = [x for x in nlet.dets] potential.append( Triplet( list(set([x for x in other.dets] + detgroup)))) if len(potential) != 0: for pot in potential: combined.append(pot) else: unchanged.append(nlet) if progress: count += 1 printPercentage(count, size, time.time() - time0) for radec in coord: i = int((radec[0] - minRA) / stepRA) j = int((radec[1] - minDec) / stepDec) if keep: arr[i, j].inactive.append(nlet) arr[i, j].nlets.remove(nlet) return combined, unchanged
def combine(arr, numdet, progress=False): # we want the merged one to contain n+1 detections target = numdet + 1 #combined = [] #unchanged = [] combined = resizableArray() unchanged = resizableArray() minRA = arr[0, 0].raLo minDec = arr[0, 0].decLo stepRA = arr[0, 0].raHi - minRA stepDec = arr[0, 0].decHi - minDec newobjid = -1 # to be updated for the extra detection to be added if progress: count = 0 time0 = time.time() size = 0 for i in arr: for j in i: size += len(j.nlets) size /= numdet for x in arr: for y in x: # y is one of TripRegion for nlet1 in y.nlets: potential = [] objid = [i.objid for i in nlet1.dets] addedId = [] othernlets, coord = getSameRegNLet(nlet1, arr, minRA, minDec, stepRA, stepDec) for nlet2 in othernlets: detgroup = [x for x in nlet1.dets] for det in nlet2.dets: if det.objid not in objid and det.objid not in addedId: detgroup.append(det) newobjid = det.objid # check that there are n+1 objects if len(detgroup) == target: potential.append(Triplet(detgroup)) addedId.append(newobjid) if len(potential) == 0: unchanged.append(nlet1) else: for pot in potential: combined.append(pot) if progress: count += 1 printPercentage(count, size, time.time() - time0) for radec in coord: i = int((radec[0] - minRA) / stepRA) j = int((radec[1] - minDec) / stepDec) # if you want to keep the nlet, uncomment the line below # arr[i,j].inactive.append(nlet1) arr[i, j].nlets.remove(nlet1) return combined.toList(), unchanged.toList()
def detPrediction(nlets, filename): df = pd.read_csv(filename, header=None, delimiter=',', names=[ 'ind', 'expnum', 'nite', 'mjd', 'ra', 'dec', 'band', 'exptime', 'on', 'num1', 'num2', 'num3' ]) explist = df['expnum'] #ralist = df['ra'] #declist = df['dec'] mjdlist = df['mjd'] year = getYear(nlets[0].dets[0].mjd) division = {0: [], 1: [], 2: [], 3: []} for i in range(len(explist)): index = getYear(mjdlist[i]) if index == -1: continue division[index].append((explist[i], mjdlist[i])) if year - 1 in division: prev = division[year - 1] else: prev = [] if year + 1 in division: following = division[year + 1] else: following = [] searchSpan = prev + following + division[year] predDets = [] counter = 0 nletsSize = len(nlets) for nlet in nlets: counter += 1 predictions = [] count = 0 time0 = time.time() size = len(searchSpan) print('\n triplet number ' + str(counter) + ' of ' + str(nletsSize) + ' predictions for:') print(nlet.toStr()) for i in range(size): count += 1 printPercentage(count, size, time.time() - time0) coord, erra, errb, pa = nlet.predictPos(searchSpan[i][1]) det = Detection(coord[0], coord[1], searchSpan[i][1], 2, 0, searchSpan[i][0], 0, 0, 60, 0) det.erra = erra det.errb = errb det.pa = pa predictions.append(det) predDets.append(predictions) return zip(nlets, predDets)
def linkDetections(regions): ''' regions[0][-1] has largest RA and lowest Dec regions[-1][0] has lowest RA and highest Dec To summarize: for regions[i][j], increasing i would increase Dec and increasing j would increase RA ''' startT = time.time() detectionLinks = [] counter = len(regions) Lo = regions[0][0] Hi = regions[-1][-1] RaLo = Lo.raLo RaHi = Hi.raHi DecLo = Lo.decLo DecHi = Hi.decHi boundary = {'minRa': RaLo, 'maxRa': RaHi, 'minDec': DecLo, 'maxDec': DecHi} deltaRa = RaHi - RaLo deltaDec = DecHi - DecLo # row corresponds to dec # column corresponds to ra numrow = len(regions) numcol = len(regions[0]) step = (deltaRa / numcol, deltaDec / numrow) for regionlst in regions: for region in regionlst: counter2 = len(region.detections) startT2 = time.time() for det in region.detections: #printPercentage(len(region.detections)-counter2, len(region.detections), time.time()-startT2) subregion = getSubregion(det, regions, boundary, step) linkDetection(det, subregion) detectionLinks.append(det) counter2 -= 1 printPercentage( len(regions) - counter, len(regions), time.time() - startT) counter -= 1 return detectionLinks
def linkGraph(detections, addEdges=True): G = nx.Graph() startT = time.time() count = 0 dict = {} for det in detections: printPercentage(count, len(detections), time.time()-startT) count += 1 G.add_node(det) dict[det] = (det.ra, det.dec) for link in det.linkedList: G.add_node(link) dict[link] = (link.ra, link.dec) if(addEdges): G.add_edge(det, link) return G, dict
def checkGoodOrbit(nlets, chiSqCut=5, progress=False): goodnlet = [] time0 = time.time() size = len(nlets) count = 0 for nlet in nlets: elements, err = nlet.calcOrbit() chisq = nlet.getChiSq() if elements['a'] > 2 and elements['e'] < 1 and chisq < chiSqCut: goodnlet.append(nlet) if progress: count += 1 printPercentage(count, size, time.time() - time0) if progress: print("") return goodnlet
def connectGraph(triplets): G = nx.Graph() startT = time.time() count = 0 dict = {} for trip in triplets: printPercentage(count, len(triplets), time.time()-startT) count += 1 detList = sorted(trip.dets, key=lambda detection: detection.mjd) G.add_node(detList[0]) dict[(detList[0])] = (detList[0].ra, detList[0].dec) for i in range(1,len(detList)): G.add_node(detList[i]) dict[(detList[i])] = (detList[i].ra, detList[i].dec) G.add_edge(detList[i-1], detList[i]) return G, dict
def formTriplets(args): detPairs, queue = args #queue = 0 if not multiprocessing tripList = [] time0 = time.time() counter = 0 for det in detPairs: if (queue == 0): counter += 1 printPercentage(counter, len(detPairs), time.time() - time0) for link in det.linkedList: for trip in link.linkedList: triplet = Triplet([det, link, trip]) tripList.append(triplet) if (queue != 0): queue.put(tripList) return tripList
def splitList(tripList, chunks): print('\nspliting list of size ' + str(len(tripList)) + ' into ' + str(chunks) + ' chunks') size = len(tripList) #rounded down chunkSize = int(size / int(chunks)) splitTrips = [] time0 = time.time() for x in range(int(chunks)): chunkList = [] printPercentage(x, int(chunks), time.time() - time0) for y in range(chunkSize): chunkList.append(tripList.pop()) splitTrips.append(chunkList) print('\nappending ' + str(len(tripList)) + ' remaining trips') #the remainder of size/chunks if (tripList): splitTrips.append(tripList) return splitTrips
def addDetections(triplet, predList, expDict): chiThresh = 10 counter = 0 time0 = time.time() print('number of predictions to check: ' + str(len(predList))) for det in predList: counter += 1 printPercentage(counter, len(predList), time.time() - time0) potDets = [] try: potDets = expDict[det.expnum] except KeyError: pass count2 = 0 size2 = len(potDets) time1 = time.time() #print() for pdet in potDets: #printPercentage(count2, size2, time.time()-time1) count2 += 1 #print(str(count2) + ' of ' + str(size2) + ' checked.') #print(pdet.toStr()) triplet = checkDetection(triplet, det, pdet) ''' if(withinEllipse(pdet, det)): #print('in') pdets = triplet.dets[:] newTrip = Triplet(triplet.dets[:]) newTrip.addDetection(pdet) chisq = newTrip.getChiSq() oldsize = len(triplet.dets) newsize = len(newTrip.dets) #print(newTrip.toStr()) #if(pdet.fakeid == pdets[0].fakeid): #time.sleep(1) if(chisq < chiThresh and newTrip.elements['a'] > 2 and newsize > oldsize): print('\nfound new detection') print(pdet.toStr()) triplet = newTrip print(triplet.toStr()) ''' return triplet
def main(): args = argparse.ArgumentParser() #args.add_argument('csvDetList', nargs=1, help='path to csv file of list of detections') args.add_argument( 'expList', nargs=1, help='a corresponding list of exposures for that csvList') args.add_argument('potentialNlets', nargs=1, help='Nlets we want to add on to') args = args.parse_args() #print('\nwrapping detections ' + args.csvDetList[0]) #detlist = wrapDets(args.csvDetList[0]) print('\nloading nlets ' + args.potentialNlets[0]) nlets = pickle.load(open(args.potentialNlets[0], 'rb')) print('\npredicting nlets in exposures') predDets = detPrediction(nlets, args.expList[0]) newTripList = [] #print('\nmaking dictionary') #expDict = expDictionary(detlist) print('\n dictionary finished') saveName = args.potentialNlets[0].split('+')[-1].split('.')[0] saveName2 = args.potentialNlets[0].split('+')[-2].split('/')[-1] size = len(predDets) savesize = 5 counter = 0 print('\nsaving predictions') time0 = time.time() while (counter < size): printPercentage(counter, size, time.time() - time0) predChunkTup = [] for x in range(min(savesize, size - counter)): predChunkTup.append(predDets[counter]) predChunkTup[x][0].orbit = 0 counter += 1 chunkNum = counter / savesize with open( 'listOfPredictions+Chunk' + str(chunkNum) + '_' + saveName2 + '+' + saveName + '.pickle', 'wb') as f: pickle.dump(predChunkTup, f) '''
def formTriplets(detPairs, chunkSize, saveName, tripletStart=[-1,-1,-1], chunkStart = 1): # detPairs, queue = args linkDict = {} for pair in detPairs: linkDict[pair[0]] = pair[1] tripList = [] time0 = time.time() counter = 0 x = chunkStart trackCount = 0 print('size of each chunk: ' +str(chunkSize)) notFound = True for det in detPairs: counter += 1 links = det[1] if counter%1000==0: printPercentage(counter, len(detPairs), time.time()-time0) if det[0] != tripletStart[0] and tripletStart[0] != -1 and notFound: continue for link in links: if link != tripletStart[1] and tripletStart[1] != -1 and notFound : continue links2 = linkDict[link] for trip in links2: if trip != tripletStart[2] and tripletStart[2] != -1 and notFound: continue elif(det[0] == tripletStart[0] and link == tripletStart[1] and trip == tripletStart[2]): notFound = False trackid = (trackCount%chunkSize) triplet = (trackid, [det[0], link, trip]) trackCount += 1 tripList.append(triplet) if chunkSize > 0 and len(tripList) >= chunkSize: pickleTriplets(tripList, 'chunk{0:06d}'.format(x) + '+' + saveName + '.pickle', False) # writeTriplets(tripList, 'chunk{0:06d}'.format(x) + # '+' + saveName + '.txt', False, False) x += 1 trackCount = 0 tripList = [] return tripList
def graph_triplets(triplets, savename1, orbs, fakeDict=0): print('\nplotting graphs') counter = 0 time0 = time.time() for trip in triplets: printPercentage(counter, len(triplets), time.time() - time0) savename = 'triplet' + str(counter) + '+' + savename1 counter += 1 orgPoints = [] newPoints = [] for det in trip.dets: if (det.lookAhead == -1): newPoints.append((det.ra, det.dec)) else: orgPoints.append((det.ra, det.dec)) if fakeDict: fakeid = [x.fakeid for x in trip.dets if x.lookAhead != -1] missPoints = findMiss(trip, fakeDict[fakeid[0]]) else: missPoints = [] trip.sortByMjd() mjd1 = trip.dets[0].mjd mjd2 = trip.dets[-1].mjd predPoints = generate_predictions(trip, mjd1 - 365, mjd2 + 365, orbs) if fakeDict: fakePoints = generate_fake_preds(trip, mjd1 - 365, mjd2 + 365) else: fakePoints = [] try: a = trip.elements['a'] e = trip.elements['e'] i = trip.elements['i'] except TypeError: a = -1 e = -1 i = -1 graph_points(newPoints, orgPoints, predPoints, savename, a, e, i, trip.getChiSq(), missPoints, fakePoints, trip.realLength())
def rmDuplicates(detDF, sortedInd): print('size before removing duplicates: ' + str(len(detDF))) rmInd = [] expList = detDF['EXPNUM'].tolist() ccdList = detDF['CCDNUM'].tolist() raList = detDF['RA'].tolist() decList = detDF['DEC'].tolist() # temporary fix for mag/flux issue if 'MAG' in detDF.columns: fluxList = detDF['MAG'].tolist() fluxList = [10**((31.4 - x) / 2.5) for x in fluxList] else: fluxList = detDF['FLUX'].tolist() bandList = detDF['BAND'].tolist() sortedInd = detDF[sortedInd].tolist() objidList = detDF['OBJID'].tolist() time0 = time.time() for x in range(len(expList)): if x in rmInd: continue printPercentage(x, len(expList), time.time() - time0) props1 = (expList[x], ccdList[x], raList[x], decList[x], fluxList[x], bandList[x], sortedInd[x]) objid1 = objidList[x] withinSorted = True while withinSorted and x < len(expList) - 1: x += 1 props2 = (expList[x], ccdList[x], raList[x], decList[x], fluxList[x], bandList[x], sortedInd[x]) objid2 = objidList[x] withinFloating, withinSorted = checkWithinFloating(props1, props2) if (withinFloating): rmInd.append(x) print('\nnumber removed: ' + str(len(rmInd))) detDF = detDF.drop(detDF.index[rmInd]) print('size after removing: ' + str(len(detDF))) return detDF
def splitList(tripList, chunks): print('spliting list of size ' + str(len(tripList)) + ' into ' + str(chunks) + ' sized chunks') size = len(tripList) #sorry i made chunks be the chunksize instead of # number of chunks i promise i'll fix the variables # and documentation soon chunkSize = chunks chunks = size / chunkSize print('chunk size: ' + str(chunkSize)) splitTrips = [] time0 = time.time() for x in range(chunks): chunkList = [] printPercentage(x, chunks, time.time() - time0) for y in range(chunkSize): chunkList.append(tripList.pop()) splitTrips.append(chunkList) print('\nappending ' + str(len(tripList)) + ' remaining trips') #the remainder of size/chunks if (tripList): splitTrips.append(tripList) return splitTrips
def rmCCDs(df, thresh): print('number of detections: ' + str(len(df))) #a dictionary of dictionaries ccdict = {} expnums = df['EXPNUM'].tolist() ccdnums = df['CCDNUM'].tolist() time0 = time.time() #count the number of things with certain ccd for x in range(len(df)): printPercentage(x, len(df), time.time() - time0) exp = expnums[x] ccd = ccdnums[x] if exp in ccdict: if ccd in ccdict[exp]: ccdict[exp][ccd] += 1 else: ccdict[exp][ccd] = 1 else: ccdict[exp] = {} #list of (expnum, ccdnum) overThresh = [] #check if any are over thresh for key, value in ccdict.iteritems(): for key2, value2 in value.iteritems(): if value2 > thresh: print('expnum:' + str(key) + ' ccdnum:' + str(key2) + ' hits:' + str(value2)) overThresh.append((key, key2)) #to do remove detections with corresponding overthresh rmList = [] for x in range(len(df)): if ((expnums[x], ccdnums[x]) in overThresh): rmList.append(x) print('rows removed:' + str(len(rmList))) df = df.drop(df.index[rmList]) print('size after remove: ' + str(len(df))) return df
def main(): args = argparse.ArgumentParser() args.add_argument( 'regionsFile', nargs=1, help='path to pickle file for regions in the season; ' + 'filename has format regions+SNOBS_SEASON###_ML0#.pickle') args = args.parse_args() # load the regions that split the season print('loading region file') regions = pickle.load(open(args.regionsFile[0])) orgFile = args.regionsFile[0].split('+')[-1].split('.')[0] # link up each detection with potential pairs print('linking detections') startTime = time.time() detectionLinks = linkDetections(np.array(regions)) print('\n') print('Time taken: ' + str(time.time() - startTime)) txtfile = 'detectionLinks+' + orgFile + '.txt' #write to a text file with open(txtfile, 'w+') as f: counter = len(detectionLinks) startT = time.time() for d in detectionLinks: printPercentage( len(detectionLinks) - counter, len(detectionLinks), time.time() - startT) printDet(d, f) counter -= 1 #save list as a picle file saveName = 'detectionLinks+' + orgFile + '.pickle' with open(saveName, 'wb') as f: pickle.dump(detectionLinks, f) print '\n'
def main(): args = argparse.ArgumentParser() args.add_argument('folder', nargs=1, help='path to folder of chunks') args = args.parse_args() files = os.listdir(args.folder[0]) tripList = [] saveName = '' print('opening files in: ' + args.folder[0]) time0 = time.time() counter = 0 for f in files: counter += 1 printPercentage(counter, len(files), time.time() - time0) if (f.split('+')[0] == 'goodtriplets' and f.split('.')[-1] == 'pickle'): print('\nopening: ' + args.folder[0] + f) trips = pickle.load(open(args.folder[0] + f, 'rb')) saveName = f.split('+')[-1].split('.')[0] for t in trips: tripList.append(t) writeTriplets(tripList, 'goodTriplets+' + saveName + '.txt', True) pickleTriplets(tripList, 'goodTriplets+' + saveName + '.pickle')