def assign_columns(self, cidict, flines): """Assign columns based on header.""" used_cis = OrderedDict() akeys = list(self._critical_keys) + list(self._helpful_keys) dkeys = list(self._dep_keys) prt = self._printer for fi, fl in enumerate(flines): if not any([is_number(x) for x in fl]): # Try to associate column names with common header keys. conflict_keys = [] conflict_cis = [] for ci, col in enumerate(fl): for key in self._header_keys: if any([(x[0] if isinstance(x, tuple) else x) == col.lower() for x in self._header_keys[key]]): if key in cidict or ci in used_cis: # There is a conflict, ask user. conflict_keys.append(key) conflict_cis.append(ci) else: ind = [ (x[0] if isinstance(x, tuple) else x) for x in self._header_keys[key]].index( col.lower()) match = self._header_keys[key][ind] cidict[key] = [match[-1], ci] if isinstance( match, tuple) else ci used_cis[ci] = key break for cki, ck in enumerate(conflict_keys): if ck in cidict: ci = cidict[ck] del(cidict[ck]) del(used_cis[ci]) else: self._first_data = fi break # Look for columns that are band names if no mag/counts/flux dens # column was found. if (not any([x in cidict for x in [ PHOTOMETRY.MAGNITUDE, PHOTOMETRY.COUNT_RATE, PHOTOMETRY.FLUX_DENSITY]])): # Delete `E_MAGNITUDE` and `BAND` if they exist (we'll need to find # for each column). key = PHOTOMETRY.MAGNITUDE ekey = PHOTOMETRY.E_MAGNITUDE bkey = PHOTOMETRY.BAND if ekey in cidict: ci = cidict[ekey] del(cidict[used_cis[ci]]) del(used_cis[ci]) if bkey in cidict: ci = cidict[bkey] del(cidict[used_cis[ci]]) del(used_cis[ci]) for fi, fl in enumerate(flines): if not any([is_number(x) for x in fl]): # Try to associate column names with common header keys. for ci, col in enumerate(fl): if ci in used_cis: continue if col in self._band_names: cidict.setdefault(key, []).append(ci) used_cis[ci] = key cidict.setdefault(bkey, []).append(col) elif col in self._emagstrs: cidict.setdefault(ekey, []).append(ci) used_cis[ci] = ekey # See which keys we collected. If we are missing any critical keys, ask # the user which column they are. # First ask the user if this data is in magnitudes or in counts. self._data_type = 1 if (PHOTOMETRY.MAGNITUDE in cidict and PHOTOMETRY.COUNT_RATE not in cidict and PHOTOMETRY.FLUX_DENSITY not in cidict): self._data_type = 1 elif (PHOTOMETRY.MAGNITUDE not in cidict and PHOTOMETRY.COUNT_RATE in cidict and PHOTOMETRY.FLUX_DENSITY not in cidict): self._data_type = 2 elif (PHOTOMETRY.MAGNITUDE not in cidict and PHOTOMETRY.COUNT_RATE not in cidict and PHOTOMETRY.FLUX_DENSITY in cidict): self._data_type = 3 else: self._data_type = prt.prompt( 'counts_mags_fds', kind='option', options=['Magnitudes', 'Counts (per second)', 'Flux Densities (Jansky)'], none_string=None) if self._data_type in [1, 3]: akeys.remove(PHOTOMETRY.COUNT_RATE) akeys.remove(PHOTOMETRY.E_COUNT_RATE) akeys.remove(PHOTOMETRY.ZERO_POINT) if (PHOTOMETRY.MAGNITUDE in akeys and PHOTOMETRY.E_MAGNITUDE in akeys): akeys.remove(PHOTOMETRY.E_MAGNITUDE) akeys.insert( akeys.index(PHOTOMETRY.MAGNITUDE) + 1, PHOTOMETRY.E_MAGNITUDE) if (PHOTOMETRY.E_LOWER_MAGNITUDE in cidict and PHOTOMETRY.E_UPPER_MAGNITUDE in cidict): akeys.remove(PHOTOMETRY.E_MAGNITUDE) dkeys.remove(PHOTOMETRY.E_COUNT_RATE) if self._data_type in [2, 3]: akeys.remove(PHOTOMETRY.MAGNITUDE) akeys.remove(PHOTOMETRY.E_MAGNITUDE) dkeys.remove(PHOTOMETRY.E_MAGNITUDE) if self._data_type in [1, 2]: akeys.remove(PHOTOMETRY.FLUX_DENSITY) akeys.remove(PHOTOMETRY.E_FLUX_DENSITY) if (PHOTOMETRY.E_LOWER_FLUX_DENSITY in cidict and PHOTOMETRY.E_UPPER_FLUX_DENSITY in cidict): akeys.remove(PHOTOMETRY.E_FLUX_DENSITY) dkeys.remove(PHOTOMETRY.E_FLUX_DENSITY) dkeys.remove(PHOTOMETRY.U_FLUX_DENSITY) columns = np.array(flines[self._first_data:]).T.tolist() colstrs = np.array([ ', '.join(x[:5]) + ', ...' for x in columns]) colinds = np.setdiff1d(np.arange( len(colstrs)), list([x[-1] if ( isinstance(x, list) and not isinstance( x, string_types)) else x for x in cidict.values()])) ignore = prt.message('ignore_column', prt=False) specify = prt.message('specify_column', prt=False) for key in akeys: selected_cols = [ y for y in [a for b in [ listify(x) for x in list(cidict.values())] for a in b] if isinstance(y, (int, np.integer))] if key in cidict: continue if key in dkeys and self._use_mc: continue if key.type == KEY_TYPES.NUMERIC: lcolinds = [x for x in colinds if any(is_number(y) for y in columns[x]) and x not in selected_cols] elif key.type == KEY_TYPES.TIME: lcolinds = [x for x in colinds if any(is_date(y) or is_number(y) for y in columns[x]) and x not in selected_cols] elif key.type == KEY_TYPES.STRING: lcolinds = [x for x in colinds if any(not is_number(y) for y in columns[x]) and x not in selected_cols] else: lcolinds = [x for x in colinds if x not in selected_cols] select = False selects = [] while select is False: mc = 1 if key in self._mc_keys: pkey = self._inflect.plural(key) text = prt.message( 'one_per_line', [key, pkey, pkey], prt=False) mc = prt.prompt( text, kind='option', message=False, none_string=None, options=[ 'One `{}` per row'.format(key), 'Multiple `{}` per row'.format(pkey)]) if mc == 1: text = prt.message( 'no_matching_column', [key], prt=False) ns = ( ignore if key in ( self._optional_keys + self._helpful_keys) else specify if key in self._specify_keys else None) if len(colstrs[lcolinds]): select = prt.prompt( text, message=False, kind='option', none_string=ns, default=('j' if ns is None and len(colstrs[lcolinds]) > 1 else None if ns is None else 'n'), options=colstrs[lcolinds].tolist() + ( [('Multiple columns need to be joined.', 'j')] if len(colstrs[lcolinds]) > 1 else [])) else: select = None if select == 'j': select = None jsel = None selects.append('j') while jsel != 'd' and len(lcolinds): jsel = prt.prompt( 'join_which_columns', default='d', kind='option', none_string=None, options=colstrs[lcolinds].tolist() + [ ('All columns to be joined ' 'have been selected.', 'd') ]) if jsel != 'd': selects.append(lcolinds[jsel - 1]) lcolinds = np.delete(lcolinds, jsel - 1) else: self._use_mc = True select = False while select is not None: text = prt.message( 'select_mc_column', [key], prt=False) select = prt.prompt( text, message=False, kind='option', default='n', none_string='No more `{}` columns.'.format(key), options=colstrs[lcolinds].tolist()) if select is not None and select is not False: selects.append(lcolinds[select - 1]) lcolinds = np.delete(lcolinds, select - 1) else: break for dk in dkeys: dksel = None while dksel is None: text = prt.message( 'select_dep_column', [dk, key], prt=False) sk = dk in self._specify_keys if not sk: dksel = prt.prompt( text, message=False, kind='option', none_string=None, options=colstrs[lcolinds].tolist()) if dksel is not None: selects.append(lcolinds[dksel - 1]) lcolinds = np.delete( lcolinds, dksel - 1) else: spectext = prt.message( 'specify_mc_value', [dk, key], prt=False) val = '' while val.strip() is '': val = prt.prompt( spectext, message=False, kind='string') selects.append(val) break if select is not None: iselect = int(select) cidict[key] = lcolinds[iselect - 1] colinds = np.delete(colinds, np.argwhere( colinds == lcolinds[iselect - 1])) elif len(selects): if selects[0] == 'j': cidict[key] = selects else: kdkeys = [key] + dkeys allk = list(OrderedDict.fromkeys(kdkeys).keys()) for ki, k in enumerate(allk): cidict[k] = [ colinds[s - 1] if isinstance(s, ( int, np.integer)) else s for s in selects[ki::len(allk)]] for s in selects: if not isinstance(s, (int, np.integer)): continue colinds = np.delete(colinds, np.argwhere( colinds == s - 1)) elif key in self._specify_keys: msg = ('specify_value_blank' if key in self._helpful_keys else 'specify_value') text = prt.message(msg, [key], prt=False) cidict[key] = prt.prompt( text, message=False, kind='string', allow_blank=( key in self._helpful_keys)) self._zp = '' if self._data_type == 2 and PHOTOMETRY.ZERO_POINT not in cidict: while not is_number(self._zp): self._zp = prt.prompt('zeropoint', kind='string') self._ufd = None if self._data_type == 3 and PHOTOMETRY.U_FLUX_DENSITY not in cidict: while ((self._ufd.lower() if self._ufd is not None else None) not in ['µjy', 'mjy', 'jy', 'microjy', 'millijy', 'jy', 'microjansky', 'millijansky', 'jansky', '']): self._ufd = prt.prompt('u_flux_density', kind='string') self._system = None if self._data_type == 1 and PHOTOMETRY.SYSTEM not in cidict: systems = ['AB', 'Vega'] self._system = prt.prompt( 'system', kind='option', options=systems, none_string='Use default for all bands.', default='n') if self._system is not None: self._system = systems[int(self._system) - 1] if (PHOTOMETRY.INSTRUMENT not in cidict and PHOTOMETRY.TELESCOPE not in cidict): prt.message('instrument_recommended', warning=True)
def adjust_fixed_parameters(self, variance_for_each=[], output={}): """Create free parameters that depend on loaded data.""" unique_band_indices = list( sorted(set(output.get('all_band_indices', [])))) needs_general_variance = any( np.array(output.get('all_band_indices', [])) < 0) new_call_stack = OrderedDict() for task in self._call_stack: cur_task = self._call_stack[task] vfe = listify(variance_for_each) if task == 'variance' and 'band' in vfe: vfi = vfe.index('band') + 1 mwfd = float(vfe[vfi]) if (vfi < len(vfe) and is_number( vfe[vfi])) else self.MIN_WAVE_FRAC_DIFF # Find photometry in call stack. ptask = None for ptask in self._call_stack: if ptask == 'photometry': awaves = self._modules[ptask].average_wavelengths( unique_band_indices) abands = self._modules[ptask].bands( unique_band_indices) band_pairs = list(sorted(zip(awaves, abands))) break owav = 0.0 variance_bands = [] for (awav, band) in band_pairs: wave_frac_diff = abs(awav - owav) / (awav + owav) if wave_frac_diff < mwfd: continue new_task_name = '-'.join([task, 'band', band]) if new_task_name in self._call_stack: continue new_task = deepcopy(cur_task) new_call_stack[new_task_name] = new_task if 'latex' in new_task: new_task['latex'] += '_{\\rm ' + band + '}' new_call_stack[new_task_name] = new_task self._modules[new_task_name] = self._load_task_module( new_task_name, call_stack=new_call_stack) owav = awav variance_bands.append([awav, band]) if needs_general_variance: new_call_stack[task] = deepcopy(cur_task) if self._pool.is_master(): self._printer.message( 'anchoring_variances', [', '.join([x[1] for x in variance_bands])], wrapped=True) self._modules[ptask].set_variance_bands(variance_bands) else: new_call_stack[task] = deepcopy(cur_task) # Fixed any variables to be fixed if any conditional inputs are # fixed by the data. # if any([listify(x)[-1] == 'conditional' # for x in cur_task.get('inputs', [])]): self._call_stack = new_call_stack for task in reversed(self._call_stack): cur_task = self._call_stack[task] for inp in cur_task.get('inputs', []): other = listify(inp)[0] if (cur_task['kind'] == 'parameter' and output.get(other, None) is not None): if (not self._modules[other]._fixed or self._modules[other]._fixed_by_user): self._modules[task]._fixed = True self._modules[task]._derived_keys = list( set(self._modules[task]._derived_keys + [task]))
def generate_event_list(self, event_list): """Generate a list of events and/or convert events to JSON format.""" prt = self._printer cidict = OrderedDict() intro_shown = False new_event_list = [] previous_file = None for event in event_list: rsource = {SOURCE.NAME: self._DEFAULT_SOURCE} use_self_source = None new_events = [] toffset = Decimal('0') if ('.' in event and os.path.isfile(event) and not event.endswith('.json')): if not intro_shown: prt.message('converter_info') intro_shown = True prt.message('converting_to_json', [event]) with open(event, 'r') as f: ftxt = f.read() # Try a couple of table formats from astropy. table = None try: table = read(ftxt, Reader=Cds, guess=False) except Exception: pass else: prt.message('convert_cds') flines = [table.colnames] + [ list(x) for x in np.array(table).tolist()] for i in range(len(flines)): flines[i] = [str(x) for x in flines[i]] try: table = read(ftxt, Reader=Latex, guess=False) except Exception: pass else: prt.message('convert_latex') flines = [table.colnames] + [ list(x) for x in np.array(table).tolist()] if table is None: # Count to try and determine delimiter. delims = [' ', '\t', ',', ';', '|', '&'] delimnames = [ 'Space: ` `', 'Tab: `\t`', 'Comma: `,`', 'Semi-colon: `;`', 'Bar: `|`', 'Ampersand: `&`'] delim = None delimcounts = [ftxt.count(x) for x in delims] maxdelimcount = max(delimcounts) delim = delims[delimcounts.index(maxdelimcount)] # If two delimiter options are close in count, ask user. for i, x in enumerate(delimcounts): if x > 0.5 * maxdelimcount and delims[i] != delim: delim = None if delim is None: odelims = list(np.array(delimnames)[ np.array(delimcounts) > 0]) delim = delims[prt.prompt( 'delim', kind='option', options=odelims) - 1] ad = list(delims) ad.remove(delim) ad = ''.join(ad) fsplit = ftxt.splitlines() fsplit = [ x.replace('$', '').replace('\\pm', delim) .replace('±', delim).replace('(', delim + '(') .strip(ad + '()# ').replace('′', "'") for x in fsplit] flines = [] for fs in fsplit: flines.append(list( csv.reader([fs], delimiter=delim))[0]) flines = [[ x.strip(ad + '#$()\\') for x in y] for y in flines] # Find band columns if they exist and insert error columns # if they don't exist. for fi, fl in enumerate(list(flines)): flcopy = list(fl) offset = 0 if not any([is_number(x) for x in fl]): for fci, fc in enumerate(fl): if (fc in self._band_names and (fci == len(fl) - 1 or fl[fci + 1] not in self._emagstrs)): flcopy.insert(fci + 1 + offset, 'e mag') offset += 1 flines[fi] = flcopy # Find the most frequent column count. These are probably # the tables we wish to read. flens = [len(x) for x in flines] ncols = Counter(flens).most_common(1)[0][0] newlines = [] potential_name = None for fi, fl in enumerate(flines): if (len(fl) and flens[fi] == 1 and fi < len(flines) - 1 and flens[fi + 1] == ncols and not len(newlines)): potential_name = fl[0] if flens[fi] == ncols: if potential_name is not None and any( [is_number(x) for x in fl]): newlines.append([potential_name] + list(fl)) else: newlines.append(list(fl)) flines = newlines for fi, fl in enumerate(flines): if len(fl) == ncols and potential_name is not None: if not any([is_number(x) for x in fl]): flines[fi] = ['name'] + list(fl) # If none of the rows contain numeric data, the file # is likely a list of transient names. if (len(flines) and (not any(any([is_number(x) or x == '' for x in y]) for y in flines) or len(flines) == 1)): new_events = [ it for s in flines for it in s] # If last row is numeric, then likely this is a file with # transient data. elif (len(flines) > 1 and any([is_number(x) for x in flines[-1]])): # Check that each row has the same number of columns. if len(set([len(x) for x in flines])) > 1: print(set([len(x) for x in flines])) raise ValueError( 'Number of columns in each row not ' 'consistent!') if len(cidict) and len(new_event_list): msg = ('is_file_same' if previous_file else 'is_event_same') reps = [previous_file] if previous_file else [''.join( new_event_list[-1].split('.')[:-1])] text = prt.text(msg, reps) is_same = prt.prompt(text, message=False, kind='bool') if not is_same: cidict = OrderedDict() # If the first row has no numbers it is likely a header. if not len(cidict): self.assign_columns(cidict, flines) perms = 1 for key in cidict: if isinstance(cidict[key], list) and not isinstance( cidict[key], string_types): if cidict[key][0] != 'j': perms = len(cidict[key]) # Get event name (if single event) or list of names from # table. event_names = [] if ENTRY.NAME in cidict: for fi, fl in enumerate(flines): flines[fi][cidict[ENTRY.NAME]] = name_clean( fl[cidict[ENTRY.NAME]]) event_names = list(sorted(set([ x[cidict[ENTRY.NAME]] for x in flines[ self._first_data:]]))) new_events = [x + '.json' for x in event_names] else: new_event_name = '.'.join(event.split( '.')[:-1]).split('/')[-1] text = prt.message( 'is_event_name', [new_event_name], prt=False) is_name = prt.prompt(text, message=False, kind='bool', default='y') if not is_name: new_event_name = '' while new_event_name.strip() == '': new_event_name = prt.prompt( 'enter_name', kind='string') event_names.append(new_event_name) new_events = [new_event_name + '.json'] # Create a new event, populate the photometry, and dump # to a JSON file in the run directory. entries = OrderedDict([(x, Entry(name=x)) for x in event_names]) # Clean up the data a bit now that we know the column # identities. # Strip common prefixes/suffixes from band names if PHOTOMETRY.BAND in cidict: bi = cidict[PHOTOMETRY.BAND] for d in [True, False]: if not isinstance(bi, (int, np.integer)): break strip_cols = [] lens = [len(x[bi]) for x in flines[self._first_data:]] llen = min(lens) ra = range(llen) if d else range(-1, -llen - 1, -1) for li in ra: letter = None for row in list(flines[self._first_data:]): if letter is None: letter = row[bi][li] elif row[bi][li] != letter: letter = None break if letter is not None: strip_cols.append(li) else: break if len(strip_cols) == llen: break for ri in range(len(flines[self._first_data:])): flines[self._first_data + ri][bi] = ''.join( [c for i, c in enumerate(flines[ self._first_data + ri][bi]) if (i if d else i - len(flines[ self._first_data + ri][bi])) not in strip_cols]) if (PHOTOMETRY.TIME in cidict and (not isinstance(cidict[PHOTOMETRY.TIME], list) or len(cidict[PHOTOMETRY.TIME]) <= 2)): bi = cidict[PHOTOMETRY.TIME] if isinstance(bi, list) and not isinstance( bi, string_types) and isinstance( bi[0], string_types) and bi[0] == 'jd': bi = bi[-1] mmtimes = [float(x[bi]) for x in flines[self._first_data:]] mintime, maxtime = min(mmtimes), max(mmtimes) if mintime < 10000: while True: try: response = prt.prompt( 'small_time_offset', kind='string') if response is not None: toffset = Decimal(response) break except Exception: pass elif maxtime > 60000 and cidict[ PHOTOMETRY.TIME][0] != 'jd': isjd = prt.prompt( 'large_time_offset', kind='bool', default='y') if isjd: toffset = Decimal('-2400000.5') for row in flines[self._first_data:]: photodict = {} rname = (row[cidict[ENTRY.NAME]] if ENTRY.NAME in cidict else event_names[0]) for pi in range(perms): sources = set() for key in cidict: if key in self._bool_keys: rval = row[cidict[key]] if rval in self._FALSE_VALS: rval = False elif rval in self._TRUE_VALS: rval = True if type(rval) != 'bool': try: rval = bool(rval) except Exception: pass if type(rval) != 'bool': try: rval = bool(float(rval)) except Exception: rval = True if not rval: continue row[cidict[key]] = rval elif key == 'reference': if (isinstance(cidict[key], string_types) and len(cidict[key]) == 19): new_src = entries[rname].add_source( bibcode=cidict[key]) sources.update(new_src) row[ cidict[key]] = new_src elif key == ENTRY.NAME: continue elif (isinstance(key, Key) and key.type == KEY_TYPES.TIME and isinstance(cidict[key], list) and not isinstance(cidict[key], string_types)): tval = np.array(row)[np.array(cidict[key][ 1:], dtype=int)] if cidict[key][0] == 'j': date = '-'.join([x.zfill(2) for x in tval]) date = self._month_rep.sub( lambda x: self._MONTH_IDS[ x.group()], date) photodict[key] = str( astrotime(date, format='isot').mjd) elif cidict[key][0] == 'jd': photodict[key] = str( jd_to_mjd(Decimal(tval[-1]))) continue val = cidict[key] if (isinstance(val, list) and not isinstance(val, string_types)): val = val[pi] if isinstance(val, string_types): if val != '': photodict[key] = val else: photodict[key] = row[val] else: if isinstance(val, string_types): if val != '': photodict[key] = val else: photodict[key] = row[val] if self._data_type == 2: if self._zp: photodict[PHOTOMETRY.ZERO_POINT] = self._zp else: photodict[PHOTOMETRY.ZERO_POINT] = ( row[cidict[PHOTOMETRY.ZERO_POINT][pi]] if isinstance(cidict[ PHOTOMETRY.ZERO_POINT], list) else row[cidict[PHOTOMETRY.ZERO_POINT]]) zpp = photodict[PHOTOMETRY.ZERO_POINT] cc = ( row[cidict[PHOTOMETRY.COUNT_RATE][pi]] if isinstance(cidict[ PHOTOMETRY.COUNT_RATE], list) else row[cidict[PHOTOMETRY.COUNT_RATE]]) ecc = ( row[cidict[PHOTOMETRY.E_COUNT_RATE][pi]] if isinstance(cidict[ PHOTOMETRY.E_COUNT_RATE], list) else row[cidict[PHOTOMETRY.E_COUNT_RATE]]) if '<' in cc: set_pd_mag_from_counts( photodict, ec=cc.strip('<'), zp=zpp) else: set_pd_mag_from_counts( photodict, c=cc, ec=ecc, zp=zpp) elif self._data_type == 3: photodict[ PHOTOMETRY.U_FLUX_DENSITY] = self._ufd if PHOTOMETRY.U_FLUX_DENSITY in cidict: photodict[PHOTOMETRY.U_FLUX_DENSITY] = ( row[cidict[ PHOTOMETRY.U_FLUX_DENSITY][pi]] if isinstance(cidict[ PHOTOMETRY. U_FLUX_DENSITY], list) else row[cidict[PHOTOMETRY.U_FLUX_DENSITY]]) if photodict[ PHOTOMETRY.U_FLUX_DENSITY] == '': photodict[ PHOTOMETRY.U_FLUX_DENSITY] = 'µJy' fd = ( row[cidict[PHOTOMETRY.FLUX_DENSITY][pi]] if isinstance(cidict[ PHOTOMETRY.FLUX_DENSITY], list) else row[cidict[PHOTOMETRY.FLUX_DENSITY]]) efd = ( row[cidict[ PHOTOMETRY.E_FLUX_DENSITY][pi]] if isinstance(cidict[ PHOTOMETRY.E_FLUX_DENSITY], list) else row[cidict[PHOTOMETRY.E_FLUX_DENSITY]]) mult = Decimal('1') ufd = photodict[PHOTOMETRY.U_FLUX_DENSITY] if ufd.lower() in [ 'mjy', 'millijy', 'millijansky']: mult = Decimal('1e3') elif ufd.lower() in ['jy', 'jansky']: mult = Decimal('1e6') if '<' in fd: set_pd_mag_from_flux_density( photodict, efd=str( Decimal(fd.strip('<')) * mult)) else: set_pd_mag_from_flux_density( photodict, fd=Decimal(fd) * mult, efd=Decimal(efd) * mult) if not len(sources): if use_self_source is None: sopts = [ ('Bibcode', 'b'), ('Last name', 'l')] if self._require_source: sel_str = 'must_select_source' else: sel_str = 'select_source' text = prt.text(sel_str) skind = prt.prompt( text, kind='option', options=sopts, default='b', none_string=( None if self._require_source else 'Neither, tag MOSFiT as source')) if skind == 'b': rsource = {} bibcode = '' while len(bibcode) != 19: bibcode = prt.prompt( 'bibcode', kind='string', allow_blank=False ) bibcode = bibcode.strip() if (re.search( '[0-9]{4}..........[\.0-9]{4}' '[A-Za-z]', bibcode) is None): bibcode = '' rsource[ SOURCE.BIBCODE] = bibcode use_self_source = False elif skind == 'l': rsource = {} last_name = prt.prompt( 'last_name', kind='string' ) rsource[ SOURCE.NAME] = ( last_name.strip().title() + ' et al., in preparation') use_self_source = False elif skind == 'n': use_self_source = True photodict[ PHOTOMETRY.SOURCE] = entries[ rname].add_source(**rsource) if any([x in photodict.get( PHOTOMETRY.MAGNITUDE, '') for x in ['<', '>']]): photodict[PHOTOMETRY.UPPER_LIMIT] = True photodict[ PHOTOMETRY.MAGNITUDE] = photodict[ PHOTOMETRY.MAGNITUDE].strip('<>') if '<' in photodict.get(PHOTOMETRY.COUNT_RATE, ''): photodict[PHOTOMETRY.UPPER_LIMIT] = True photodict[ PHOTOMETRY.COUNT_RATE] = photodict[ PHOTOMETRY.COUNT_RATE].strip('<') if PHOTOMETRY.E_COUNT_RATE in photodict: del(photodict[PHOTOMETRY.E_COUNT_RATE]) if '<' in photodict.get( PHOTOMETRY.FLUX_DENSITY, ''): photodict[PHOTOMETRY.UPPER_LIMIT] = True photodict[ PHOTOMETRY.FLUX_DENSITY] = photodict[ PHOTOMETRY.FLUX_DENSITY].strip('<') if PHOTOMETRY.E_FLUX_DENSITY in photodict: del(photodict[PHOTOMETRY.E_FLUX_DENSITY]) # Apply offset time if set. if (PHOTOMETRY.TIME in photodict and toffset != Decimal('0')): photodict[PHOTOMETRY.TIME] = str( Decimal(photodict[PHOTOMETRY.TIME]) + toffset) # Skip entries for which key values are not # expected type. if not all([ is_number(photodict.get(x, '')) for x in photodict.keys() if (PHOTOMETRY.get_key_by_name(x).type == KEY_TYPES.NUMERIC)]): continue # Skip placeholder values. if float(photodict.get( PHOTOMETRY.MAGNITUDE, 0.0)) > 50.0: continue # Add system if specified by user. if (self._system is not None and PHOTOMETRY.SYSTEM not in photodict): photodict[PHOTOMETRY.SYSTEM] = self._system # Remove keys not in the `PHOTOMETRY` class. for key in list(photodict.keys()): if key not in PHOTOMETRY.vals(): del(photodict[key]) # Add the photometry. entries[rname].add_photometry( **photodict) merge_with_existing = None for ei, entry in enumerate(entries): entries[entry].sanitize() if os.path.isfile(new_events[ei]): if merge_with_existing is None: merge_with_existing = prt.prompt( 'merge_with_existing', default='y') if merge_with_existing: existing = Entry.init_from_file( catalog=None, name=event_names[ei], path=new_events[ei], merge=False, pop_schema=False, ignore_keys=[ENTRY.MODELS], compare_to_existing=False) Catalog().copy_entry_to_entry( existing, entries[entry]) oentry = entries[entry]._ordered(entries[entry]) entabbed_json_dump( {entry: oentry}, open(new_events[ei], 'w'), separators=(',', ':')) self._converted.extend([ [event_names[x], new_events[x]] for x in range(len(event_names))]) new_event_list.extend(new_events) previous_file = event else: new_event_list.append(event) return new_event_list
def load_data(self, data, event_name='', smooth_times=-1, extrapolate_time=0.0, limit_fitting_mjds=False, exclude_bands=[], exclude_instruments=[], exclude_systems=[], exclude_sources=[], exclude_kinds=[], time_unit=None, time_list=[], band_list=[], band_systems=[], band_instruments=[], band_bandsets=[], band_sampling_points=25, variance_for_each=[], user_fixed_parameters=[], user_released_parameters=[], pool=None): """Load the data for the specified event.""" if pool is not None: self._pool = pool self._printer._pool = pool prt = self._printer prt.message('loading_data', inline=True) # Fix user-specified parameters. fixed_parameters = [] released_parameters = [] for task in self._call_stack: for fi, param in enumerate(user_fixed_parameters): if (task == param or self._call_stack[task].get('class', '') == param): fixed_parameters.append(task) if fi < len(user_fixed_parameters) - 1 and is_number( user_fixed_parameters[fi + 1]): value = float(user_fixed_parameters[fi + 1]) if value not in self._call_stack: self._call_stack[task]['value'] = value if 'min_value' in self._call_stack[task]: del self._call_stack[task]['min_value'] if 'max_value' in self._call_stack[task]: del self._call_stack[task]['max_value'] self._modules[task].fix_value( self._call_stack[task]['value']) for fi, param in enumerate(user_released_parameters): if (task == param or self._call_stack[task].get('class', '') == param): released_parameters.append(task) self.determine_free_parameters(fixed_parameters, released_parameters) for ti, task in enumerate(self._call_stack): cur_task = self._call_stack[task] self._modules[task].set_event_name(event_name) new_per = np.round(100.0 * float(ti) / len(self._call_stack)) prt.message('loading_task', [task, new_per], inline=True) self._kinds_supported |= set(cur_task.get('supports', [])) if cur_task['kind'] == 'data': success = self._modules[task].set_data( data, req_key_values=OrderedDict( (('band', self._bands), ('instrument', self._instruments), ('telescope', self._telescopes))), subtract_minimum_keys=['times'], smooth_times=smooth_times, extrapolate_time=extrapolate_time, limit_fitting_mjds=limit_fitting_mjds, exclude_bands=exclude_bands, exclude_instruments=exclude_instruments, exclude_systems=exclude_systems, exclude_sources=exclude_sources, exclude_kinds=exclude_kinds, time_unit=time_unit, time_list=time_list, band_list=band_list, band_systems=band_systems, band_instruments=band_instruments, band_bandsets=band_bandsets) if not success: return False fixed_parameters.extend( self._modules[task].get_data_determined_parameters()) elif cur_task['kind'] == 'sed': self._modules[task].set_data(band_sampling_points) self._kinds_needed |= self._modules[task]._kinds_needed # Find unsupported wavebands and report to user. unsupported_kinds = self._kinds_needed - self._kinds_supported if unsupported_kinds: prt.message('using_unsupported_kinds' if 'none' in exclude_kinds else 'ignoring_unsupported_kinds', [', '.join(sorted(unsupported_kinds))], warning=True) # Determine free parameters again as setting data may have fixed some # more. self.determine_free_parameters(fixed_parameters, released_parameters) self.exchange_requests() prt.message('finding_bands', inline=True) # Run through once to set all inits. for root in ['output', 'objective']: outputs = self.run_stack( [0.0 for x in range(self._num_free_parameters)], root=root) # Create any data-dependent free parameters. self.adjust_fixed_parameters(variance_for_each, outputs) # Determine free parameters again as above may have changed them. self.determine_free_parameters(fixed_parameters, released_parameters) self.determine_number_of_measurements() self.exchange_requests() # Reset modules for task in self._call_stack: self._modules[task].reset_preprocessed(['photometry']) # Run through inits once more. for root in ['output', 'objective']: outputs = self.run_stack( [0.0 for x in range(self._num_free_parameters)], root=root) # Collect observed band info if self._pool.is_master() and 'photometry' in self._modules: prt.message('bands_used') bis = list( filter(lambda a: a != -1, sorted(set(outputs['all_band_indices'])))) ois = [] for bi in bis: ois.append( any([ y for x, y in zip(outputs['all_band_indices'], outputs['observed']) if x == bi ])) band_len = max([ len(self._modules['photometry']._unique_bands[bi]['origin']) for bi in bis ]) filts = self._modules['photometry'] ubs = filts._unique_bands filterarr = [ (ubs[bis[i]]['systems'], ubs[bis[i]]['bandsets'], filts._average_wavelengths[bis[i]], filts._band_offsets[bis[i]], filts._band_kinds[bis[i]], filts._band_names[bis[i]], ois[i], bis[i]) for i in range(len(bis)) ] filterrows = [ (' ' + (' ' if s[-2] else '*') + ubs[s[-1]]['origin'].ljust(band_len) + ' [' + ', '.join( list( filter(None, ('Bandset: ' + s[1] if s[1] else '', 'System: ' + s[0] if s[0] else '', 'AB offset: ' + pretty_num(s[3]) if (s[4] == 'magnitude' and s[0] != 'AB') else '')))) + ']').replace(' []', '') for s in list(sorted(filterarr)) ] if not all(ois): filterrows.append(prt.text('not_observed')) prt.prt('\n'.join(filterrows)) single_freq_inst = list( sorted( set( np.array(outputs['instruments'])[np.array( outputs['all_band_indices']) == -1]))) if len(single_freq_inst): prt.message('single_freq') for inst in single_freq_inst: prt.prt(' {}'.format(inst)) if ('unmatched_bands' in outputs and 'unmatched_instruments' in outputs): prt.message('unmatched_obs', warning=True) prt.prt(', '.join([ '{} [{}]'.format(x[0], x[1]) if x[0] and x[1] else x[0] if not x[1] else x[1] for x in list( set( zip(outputs['unmatched_bands'], outputs['unmatched_instruments']))) ]), warning=True, prefix=False, wrapped=True) return True