예제 #1
0
def expire_old_transitions(args):
    """
    Expire the old transitions with ids given in the file named
    args.db_expire_id; that is, set their 'valid_to' attribute to the
    day before the mod_date of the data we're uploading.

    """

    if args.dry_run:
        vprint('[DRY RUN] expiring old transitions ...')
    else:
        vprint('expiring old transitions ...')
    # expire_date is to be the day before the modification date of the
    # par file we're uploading:
    expire_date = args.mod_date - datetime.timedelta(1)
    s_expire_date = expire_date.isoformat()
    # read in the IDs of the transitions to expire
    fi_ids = open(args.db_expire_id, 'r')
    for line in fi_ids.readlines():
        expire_id = int(line)
        trans = Trans.objects.all().filter(pk=expire_id).get()
        # set the new expiry date...
        trans.valid_to = s_expire_date
        # ... and save unless we're doing a dry-run
        if not args.dry_run:
            trans.save()
    fi_ids.close()
    vprint('done.')
예제 #2
0
파일: stage_upload.py 프로젝트: xnx/pyHAWKS
def write_db_trans(args, molecule, isos):
    """
    Write the transitions currently in the database and currently valid to a
    file called db_trans_file, which will be compared to the file of
    transitions to be uploaded to decide which are still valid (haven't
    changed) which to expire.

    """

    cursor = connection.cursor()
    vprint("Retrieving existing transitions from database...")

    today = datetime.date.today()
    # db_trans_file will hold a list of the currently valid transitions for
    # the molecule of interest
    fo = open(args.db_trans_file, "w")
    # db_trans_file_id will hold a list of the transition IDs corresponding
    #  to each line of db_trans_file
    fo_id = open(args.db_trans_file_id, "w")
    # fetch the currently valid transitions for all the isotopologues of our
    # molecule
    db_transitions = Trans.objects.filter(iso__in=isos).filter(valid_to__gt=today).order_by("nu")
    n_db_trans = db_transitions.count()
    vprint("%d currently valid transitions found." % n_db_trans)

    vprint("Writing currently valid transitions to %s ..." % args.db_trans_file)
    percent = 0
    percent_thresh = 0  # for the progress indicator
    for i, trans in enumerate(db_transitions):

        # a progress indicator of percent completion
        percent = float(i) / n_db_trans * 100
        if percent > percent_thresh:
            vprint("%d %%" % percent_thresh, 1)
            percent_thresh += 1

        # a bit of translation so that everything we need for the string
        #  representation of the transition is an immediate attribute of trans
        trans.stateIDp = trans.statep.id
        trans.stateIDpp = trans.statepp.id
        trans.molec_id = molecule.id
        trans.local_iso_id = trans.iso.isoID
        try:
            trans.flag = trans.par_line[145]
        except IndexError:
            trans.flag = " "

        # get the parameters for this transition
        for prm in trans_prms:
            command = "SELECT * FROM prm_%s WHERE trans_id=%d" % (prm.lower(), trans.id)
            cursor.execute(command)
            rows = cursor.fetchall()
            if not rows:
                # this parameter apparently doesn't exist for this transition
                continue
            # make a generic Prm object named for the parameter, and with
            # the attributes val, err, ierr and source_id in that order:
            # globals()[prm] = Prm(*rows[0][1:])
            setattr(trans, prm, Prm(*rows[0][1:]))

        # build a list of strings, each of which is a field in the
        # db_trans_file output
        s_vals = []
        for trans_field in trans_fields:
            try:
                val = eval("trans.%s" % trans_field.name)
                if val is None:
                    s_vals.append(trans_field.default)
                else:
                    s_vals.append(trans_field.fmt % val)
            except AttributeError:
                # no value for this field - use the default
                s_vals.append(trans_field.default)
            # except TypeError:
            #    vprint('None value for trans_field %s' % trans_field.name)

        # write the fields, separated by commas in case someone is
        # foolish enough to think it a good idea to read the file in Excel
        print >> fo, ",".join(s_vals)
        # and write the transition ID in the parallel db_trans_file_id file
        print >> fo_id, trans.id
    fo.close()
    fo_id.close()
예제 #3
0
파일: stage_upload.py 프로젝트: xnx/pyHAWKS
def stage_upload(args, molecule, isos):
    """
    Stage the transtions file for upload to the database by identifying
    transitions that are already in the database, and transitions that
    need to be expired.

    """

    # first write the currently valid transitions to db_trans_file and
    #  their IDs to db_trans_file_id
    write_db_trans(args, molecule, isos)

    # find out where the old and new transitions differ
    # I'm not clever enough to do this fast in Python (the files involved
    # can be much larger than the available memory), so farm it out to the
    # Unix tool diff, which produces the diff_file
    vprint("calculating the diff ...")
    os.system("diff %s %s > %s" % (args.db_trans_file, args.trans_file, args.diff_file))
    vprint("done.")

    vprint("Writing expire-ids file and upload transitions file...")
    # get the list of the currently-valid transitions' IDs from
    # db_trans_file_id
    db_trans_ids = []
    fi_id = open(args.db_trans_file_id, "r")
    for line in fi_id:
        db_trans_ids.append(int(line))

    # trans_file_upload will hold the string representations of transitions
    # new or altered transitions to be uploaded to the database
    fo_upload = open(args.trans_file_upload, "w")

    # db_expire_id will hold the IDs of currently-valid transitions which
    # are to be expired (usually because they're being replaced with better
    # versions from trans_file_upload
    fo_expire_id = open(args.db_expire_id, "w")

    # regular expression for the marker in the diff_file indicating changed
    # or deleted lines in the db_trans_file - ie transitions to be expired
    patt = "^(\d+),?(\d+)?[c|d]"
    for line in open(args.diff_file, "r"):
        if line[0] == ">":
            # this line is a transition to be uploaded
            line = line.rstrip()  # strip the EOL
            print >> fo_upload, line[2:]  # remove '> '
            continue

        m = re.match(patt, line)
        if m:
            # l1-l2 is the line number range to which this group of
            # changed or deleted lines applies
            l1 = int(m.group(1))
            l2 = l1
            if m.group(2):
                l2 = int(m.group(2))
            # write the expired lines' ids to the db_expire_id file
            for i in range(l1 - 1, l2):
                print >> fo_expire_id, db_trans_ids[i]

    fo_expire_id.close()
    fo_upload.close()
예제 #4
0
# Christian Hill, 12/4/12
# Department of Physics and Astronomy, University College London
# [email protected]
#
# A script to parse a given .par file in the native HITRAN2004+ format
# and extract the relevant data for update to the relational database.

import sys

from xn_utils import vprint
from par2norm import parse_par
from stage_upload import stage_upload
from upload_data import upload_data

from cmdline import parser, process_args

args = parser.parse_args()
molecule, isos, d_refs = process_args(args)

vprint('\n\n%s - v%s' % (sys.argv[0], version), 5)
vprint('Christian Hill - [email protected]', 3)

if args.parse_par:
    parse_par(args, molecule, isos, d_refs)

if args.stage_upload:
    stage_upload(args, molecule, isos)

if args.upload or args.dry_run:
    upload_data(args, molecule, isos, d_refs)
예제 #5
0
def upload_states(args, isos, cases_list):
    """
    Read in, store, and upload the states to enter the database from the
    .states file.

    Arguments:
    args: the processed command line arguments with the names of files to
    use in uploading the data, the list of HITRAN molecule and isotopologue
    IDs to resolve the global_iso_id to etc...
    isos: a list of Iso objects, ordered by their local isotopologue ID
    cases_list: a list of Case objects, where the index is the case_id (ie
    cases_list[0] is None, cases_list[1] represents the dcs case etc...

    Returns:
    a list of the State objects uploaded.

    """

    if args.dry_run:
        vprint('[DRY RUN] Uploading states...')
    else:
        vprint('Uploading states...')
    # the uploaded states will be stored in this list:
    states = []
    start_time = time.time()
    for line in open(args.states_file, 'r'):
        global_iso_id = int(line[:4])

        # state energy
        try:
            E = float(line[5:15])
        except (TypeError, ValueError):
            # undefined energy for this state
            E = None

        # state degeneracy
        try:
            g = int(line[16:21])
        except (TypeError, ValueError):
            # undefined degeneracy for this state
            g = None

        # state quantum numbers as a string of ';' separated name=value pairs
        s_qns = line[22:].strip()
        if not s_qns:
            # no quantum numbers resolved for this state
            s_qns == None

        # the native HITRAN IDs for molecule and isotopologue:
        molec_id, local_iso_id = args.hitran_ids[global_iso_id]

        # get the right Class to use to describe this state
        CaseClass = hitran_meta.get_case_class(molec_id, local_iso_id)
        # state is one of the hitran_case states (e.g. an HDcs object)
        state = CaseClass(molec_id=molec_id, local_iso_id=local_iso_id,
                          global_iso_id=global_iso_id, E=E, g=g, s_qns=s_qns)
        # retrieve the correct Iso object for this isotopologue
        iso = isos[local_iso_id-1]

        # this_state is a hitranlbl.State object for the MySQL database
        this_state = State(iso=iso, energy=state.E, g=state.g,
                           s_qns=state.s_qns, qns_xml=state.get_qns_xml())
        if not args.dry_run:
            # if we're doing it for real, save the State just created
            this_state.save()

        states.append(this_state)

        # now create the quantum numbers entries for this state
        case = cases_list[state.__class__.caseID]
        # loop over all the possible quantum number names for this case
        # XXX this will fail to include all of the quantum numbers if
        # the case does not have the right quantum numbers in its
        # ordered_qn_list (e.g. asymcs currently only goes to v12...)
        for qn_name in state.__class__.ordered_qn_list:
            # get the value of this quantum number
            qn_val = state.get(qn_name)
            if qn_val is None:
                # if the quantum number isn't defined, move to the next one
                continue
            # get any attribute metadata for this quantum number
            qn_attr = state.serialize_qn_attrs(qn_name)
            if qn_attr:
                # strip the initial '#'
                qn_attr = qn_attr[1:]
            else:
                qn_attr = None

            # get the XML for this quantum number
            xml = state.get_qn_xml(qn_name)
            if not xml:
                xml = None

            # create the quantum number object ...
            qn = Qns(case=case, state=this_state, qn_name=qn_name,
                     qn_val=str(qn_val), qn_attr=qn_attr, xml=xml)
            if not args.dry_run:
                # ... and save it to the database if we're not on a dry run
                qn.save()

    end_time = time.time()
    vprint('%d states read in (%s)' % (len(states),
                timed_at(end_time - start_time)))
    return states
예제 #6
0
def upload_data(args, molecule, isos, d_refs):
    """
    Upload the new transitions and states to the database. Only do this for
    real if args.dry_run = False.

    Arguments:
    args: the processed command line arguments with the names of files to
    use in uploading the data, the list of HITRAN molecule and isotopologue
    IDs to resolve the global_iso_id to etc...
    molecule: the Molecule object for the molecule whose transitions and
    states are to be uploaded.
    isos: a list of Iso objects, ordered by their local isotopologue ID
    cases_list: a list of Case objects, where the index is the case_id (ie
    cases_list[0] is None, cases_list[1] represents the dcs case etc...
    d_refs: a dictionary of RefsMap objects, keyed by HITRAN-style refID,
    e.g. 'O2-gamma_self-2'.

    """

    if args.dry_run:
        vprint('[DRY RUN] Uploading to database...')
    else:
        vprint('Uploading to database...')

    # first, expire old lines
    expire_old_transitions(args)

    # get the all the molecular state description 'cases' in a list indexed
    # by caseID
    cases = Case.objects.all()
    cases_list = [None,]    # caseIDs start at 1, so case_list[0]=None
    for case in cases:
        cases_list.append(case)

    # find out the ID at which we can start adding states
    try:
        first_stateID = State.objects.all().order_by('-id')[0].id + 1
    except IndexError:
        # no states in the database yet, so we start at 1
        first_stateID = 1
    vprint('new states will be added with ids starting at %d' % first_stateID)

    # upload the new states
    states = upload_states(args, isos, cases_list)

    # get the Source objects we'll need to attach to the parameters
    sources = get_sources(d_refs)
    # this is the default Source for when we can't find anything better:
    hitran86_source = Source.objects.all().filter(pk=HITRAN1986_SOURCEID).get()

    # now read in and upload the transitions
    if args.dry_run:
        vprint('[DRY RUN] Uploading transitions ...')
    else:
        cursor = connection.cursor()
        vprint('Uploading transitions ...')
    start_time = time.time()
    ntrans = 0
    for line in open(args.trans_file_upload, 'r'):
        line = line.rstrip() # strip the EOL because the last field is par_line
        trans = HITRANTransition()

        for prm_name in trans_prms:
            # create and attach the HITRANParam objects
            setattr(trans, prm_name, HITRANParam(None))
        fields = line.split(',')
        for i, output_field in enumerate(trans_fields):
            # set the transition attributes
            trans.set_param(output_field.name, fields[i], output_field.fmt)

        # attach the upper state to the transition
        if trans.stateIDp < first_stateID:
            # this state is already in the database: find it
            trans.statep = State.objects.all().get(pk=trans.stateIDp)
        else:
            # new upper state: get it from the states list
            trans.statep = states[trans.stateIDp-first_stateID]

        # attach the lower state to the transition
        if trans.stateIDpp < first_stateID:
            # this state is already in the database: find it
            trans.statepp = State.objects.all().get(pk=trans.stateIDpp)
        else:
            # new lower state: get it from the states list
            trans.statepp = states[trans.stateIDpp-first_stateID]

        # attach the case_module for this transition's states' quantum numbers
        trans.case_module = hitran_meta.get_case_module(trans.molec_id,
                            trans.local_iso_id)

        # fetch the right Iso object
        iso = isos[trans.local_iso_id-1]
        # this_trans is a hitranmeta.Trans object for the MySQL database
        this_trans = Trans(iso=iso, statep=trans.statep, statepp=trans.statepp,
                nu=trans.nu.val, Sw=trans.Sw.val, A=trans.A.val,
                multipole=trans.multipole, Elower=trans.Elower, gp=trans.gp,
                gpp=trans.gpp, valid_from=args.s_mod_date,
                par_line=trans.par_line)
        ntrans += 1
        if not args.dry_run:
            # if we're really uploading, save the transition to the database
            this_trans.save()
        
        # create and execute the INSERT strings for the transition's parameters
        for prm_name in trans_prms:
            val = trans.get_param_attr(prm_name, 'val')
            if val is None:
                # no value for this parameter - move on to the next one
                continue
            # fetch the Source object for this parameter
            source_id = trans.get_param_attr(prm_name, 'source_id')
            if source_id is None:
                # if we can't identify source_id, it's missing from the
                # hitranmeta_refs_map and/or hitransmeta_source tables:
                # this is fatal and we must exit with an error
                print 'Error! no reference specified for', prm_name
                sys.exit(1)

            # create the parameter object for this parameter
            prm_fields = ['trans_id', 'val']
            prm_entries = [str(this_trans.id), str(val)]
            err = trans.get_param_attr(prm_name, 'err')
            if err is not None:
                prm_fields.append('err')
                prm_entries.append(str(err))
            prm_fields.append('ierr')
            prm_entries.append(str(trans.get_param_attr(prm_name, 'ierr')))
            prm_fields.append('source_id')
            prm_entries.append(str(source_id))
            s_insert = 'INSERT INTO prm_%s (%s) VALUES (%s)'\
                             % (prm_name.lower(), ', '.join(prm_fields),
                                ', '.join(prm_entries))
            if not args.dry_run:
                # if we're really uploading, save the prm to the database
                cursor.execute(s_insert)

    end_time = time.time()
    vprint('%d transitions read in (%s)' % (ntrans,
                timed_at(end_time - start_time)))
예제 #7
0
파일: par2norm.py 프로젝트: leerduo/pyHAWKS
def parse_par(args, molecule, isos, d_refs):
    """
    Parse the input .par file, args.par_file, into normalized .states and
    .trans files, checking for the existence of the relevant sources and
    not outputing duplicates. All transitions encountered are written to
    the .trans file, even if they're already in the database - duplicate-
    handling is done upon staging the upload.
    NB the input .par file must be in order of increasing wavenumber
    (an error is raised if this is found not to be the case).

    """

    # get all of the states for this molecule currently in the database
    # as their string representations - these are the keys to the db_stateIDs
    # dictionary, with the corresponding database State ids as their values
    db_stateIDs = {}
    for state in State.objects.filter(iso__in=isos):
        db_stateIDs[state.str_rep()] = state.id
                                
    vprint('%d existing states for %s read in from database'\
                % (len(db_stateIDs), molecule.ordinary_formula))

    vprint('Creating .trans and .states files...')
    vprint('%s\n-> %s\n   %s'\
            % (args.par_file, args.trans_file, args.states_file))

    if not args.overwrite:
        # the .trans and .states files should not already exist
        for filename in (args.trans_file, args.states_file):
            if os.path.exists(filename):
                vprint('File exists:\n%s\nAborting.' % filename, 5)
                sys.exit(1)

    # read the lines and rstrip them of the EOL characters. We don't lstrip
    # because we keep the space in front of molec_ids 1-9
    vprint('reading .par lines from %s ...' % args.par_file)
    lines = [x.rstrip() for x in open(args.par_file, 'r').readlines()]
    ntrans = len(lines)
    vprint('%d lines read in' % ntrans)

    # find out the state ID at which we can start adding states
    try:
        first_stateID = State.objects.all().order_by('-id')[0].id + 1
    except IndexError:
        # no states in the database yet, so let's start at 1
        first_stateID = 1
    vprint('new states will be added with ids starting at %d' % first_stateID)

    fo_s = open(args.states_file, 'w')
    fo_t = open(args.trans_file, 'w')
    start_time = time.time()

    stateID = first_stateID
    last_nu = 0.    # the previous wavenumber read in
    percent_done = 0; percent_increment = 1     # for the progress indicator
    for i, line in enumerate(lines):

        # progress indicator, as a percentage
        percent = float(i)/ntrans * 100.
        if percent - percent_done > percent_increment:
            vprint('%d %%' % percent_done, 1)
            percent_done += percent_increment

        # parse the par_line into a HITRANTransition object
        trans = HITRANTransition.parse_par_line(line)

        if trans is None:
            # blank or comment line
            continue

        # check our wavenumbers are in order
        if trans.nu.val < last_nu:
            vprint('Error: %s transitions file isn\'t ordered by nu.'\
                    % args.trans_file, 5)
            sys.exit(1)
        last_nu = trans.nu.val

        # set the global (ie database-wide) ID for the isotopologue in
        # the transition and its upper and lower state objects
        trans.global_iso_id = args.global_iso_ids[
                                (trans.molec_id, trans.local_iso_id)]
        trans.statep.global_iso_id  = trans.global_iso_id
        trans.statepp.global_iso_id = trans.global_iso_id
        
        # first deal with the upper state: get its string representation ...
        statep_str_rep = trans.statep.str_rep()
        # ... and see if it's in our dictionary:
        if statep_str_rep in db_stateIDs.keys():
            # the upper state is already in the database: set the
            # corresponding state ID in the transition object
            trans.stateIDp = db_stateIDs[statep_str_rep]
        else:
            # the upper state is new: assign it an ID and save it
            trans.stateIDp = trans.statep.id = stateID
            db_stateIDs[statep_str_rep] = stateID
            stateID += 1
            print >>fo_s, statep_str_rep

        # next deal with the lower state: get its string representation ...
        statepp_str_rep = trans.statepp.str_rep()
        # ... and see if it's in our dictionary:
        if statepp_str_rep in db_stateIDs.keys():
            # the lower state is already in the database: set the
            # corresponding state ID in the transition object
            trans.stateIDpp = db_stateIDs[statepp_str_rep]
        else:
            # the lower state is new: assign it an ID and save it
            trans.stateIDpp = trans.statepp.id = stateID
            db_stateIDs[statepp_str_rep] = stateID
            stateID += 1
            print >>fo_s, statepp_str_rep

        # check that the references for this transition's parameters are in
        # the tables hitranmeta_refs_map and hitranmeta_source - if they    
        # aren't this is fatal, so we exit
        for j, prm_name in enumerate(['nu', 'S', 'gamma_air', 'gamma_self',
                                      'n_air', 'delta_air']):
            # the reference fields of the par_line are at character
            # positions 134-146 of the 160-byte par_line, in 2-character fields
            iref = int(trans.par_line[133+2*j:135+2*j])
            # work out which Source in hitranmeta_source this reference id
            # is pointing to, using the hitranmeta_refs_map table to map it
            # to a primary key in the hitranmeta_source table.
            if iref == 0:
                # don't worry about missing 0 refs (which default to the
                # HITRAN 1986 paper)
                source_id = HITRAN1986_SOURCEID
            else:
                # form a HITRAN-style source identifier as
                # <molecule_name>-<prm_name>-<id>, for looking up in the
                # hitranmeta_refs_map table
                sref = '%s-%s-%d' % (molecule.ordinary_formula,
                                     prm_name, iref)
                # we can't use '+' in XML attributes, so replace with 'p'
                sref = sref.replace('+', 'p')
                if sref not in d_refs.keys():
                    # Oops - missing reference: bail.
                    print 'missing reference for %s in hitranmeta_refs_map'\
                          ' table' % sref
                    sys.exit(1)
                # all's well - we have a valid source_id
                source_id = d_refs[sref].source_id

            # TODO avoid exec here
            # Assign the source_id to the parameter object
            if prm_name == 'S':
                exec('trans.Sw.source_id = %d' % source_id)
                exec('trans.A.source_id = %d' % source_id)
            else:
                try:
                    exec('trans.%s.source_id = %d' % (prm_name, source_id))
                except AttributeError:
                    # no parameter object exists for prm_name; this can
                    # happen if e.g. delta_air=0. and none was created, but
                    # it's fine- we just move on
                    pass

        # write the transition to the .trans file, *even if it is already
        # in the database* - this is checked for on upload
        print >>fo_t, trans.to_str(trans_fields, ',')

    fo_t.close()
    fo_s.close()
    vprint('%d new or updated states were identified'\
                    % (stateID-first_stateID))

    end_time = time.time()
    vprint('%d transitions and %d states in %.1f secs'\
                % (len(lines), len(db_stateIDs), end_time - start_time))