Exemplo n.º 1
0
def main():
    logging.basicConfig()
    logging.root.setLevel(logging.INFO)
    args = parse_args()

    # connect to db
    with open(args.config, 'rb') as f:
        db.connect(json.load(f))

    # load translations if present
    translations = {}
    if args.species_translations is not None:
        translations = load_translations(args.species_translations)
        _log.info('Loaded %d species name translations', len(translations))

    # lookup taxons (species) in BLA and local db
    taxons = load_taxons_by_spno(args.csv, translations)
    set_db_id_for_taxons(taxons.itervalues())

    # wipe existing vettings
    db.vettings.delete().where(db.vettings.c.user_id == args.user_id).execute()

    # create new vettings
    for spno, taxon in taxons.iteritems():
        insert_vettings_for_taxon(taxon, spno, args.user_id)
Exemplo n.º 2
0
def main():
    args = parse_args()

    logging.basicConfig()
    logging.root.setLevel(logging.INFO)

    with open(args.config, 'rb') as f:
        config = json.load(f)
        db.connect(config)

    if 'alaVettingSyncUrl' not in config or len(config['alaVettingSyncUrl']) == 0:
        logging.critical('"alaVettingSyncUrl" must be present in the config')
        return

    if 'alaApiKey' not in config or len(config['alaApiKey']) == 0:
        logging.critical('"alaApiKey" must be present in the config')
        return

    while True:
        next_vetting = next_vetting_to_sync()
        if next_vetting is None:
            log_info('=========== No vettings to send. Sleeping for a while.')
            time.sleep(60)
        else:
            send_vetting(next_vetting, config['alaVettingSyncUrl'], config['alaApiKey'])
            db.engine.dispose()
            time.sleep(5)
Exemplo n.º 3
0
def main():
    args = parse_args()

    logging.basicConfig()
    logging.root.setLevel(logging.INFO)

    with open(args.config, 'rb') as f:
        db.connect(json.load(f))

    while True:
        next_species = next_species_row_to_vet()
        if next_species is None:
            log_info('=========== No species need vetting. Sleeping for a while.')
            time.sleep(60)
        else:
            vet_species(next_species)
            db.engine.dispose()
Exemplo n.º 4
0
def main():
    args = parse_args()
    with open(args.config, "rb") as f:
        config = json.load(f)

    db.connect(config)

    if "alaApiKey" in config and config["alaApiKey"] is not None:
        ala.set_api_key(config["alaApiKey"])

    if "logLevel" in config:
        logging.basicConfig()
        logging.root.setLevel(logging.__dict__[config["logLevel"]])

    if "maxRetrySeconds" in config:
        ala.set_max_retry_secs(float(config["maxRetrySeconds"]))

    logging.info("Started at %s", str(datetime.now()))
    connection = db.engine.connect()
    try:
        syncer = sync.Syncer(ala, args.species_type, connection)
        syncer.sync(sync_species=config["updateSpecies"], sync_occurrences=config["updateOccurrences"])
    finally:
        logging.info("Ended at %s", str(datetime.now()))
Exemplo n.º 5
0
def test_suite():
    test_config_path = os.path.abspath(__file__ + "/../../../config.unittests.json")
    with open(test_config_path) as f:
        db.connect(json.load(f))
    return unittest.makeSuite(TestSync)
Exemplo n.º 6
0
def main():
    # make sure this isn't run accidentally
    if '--go' not in sys.argv:
        print
        print "Wipes the database clean and inserts some debug data."
        print "Don't use this in production!"
        print
        print "Usage:"
        print "\t{0} --go [--big]".format(sys.argv[0])
        print
        sys.exit()

    # connect
    with open('config.json', 'rb') as f:
        db.connect(json.load(f))

    # wipe
    db.sensitive_occurrences.delete().execute()
    db.occurrences.delete().execute()
    db.vettings.delete().execute()
    db.species.delete().execute()
    db.sources.delete().execute()

    # insert ALA source
    db.sources.insert().execute(
        name='ALA',
        last_import_time=None)

    # insert species
    db.species.insert().execute(
        scientific_name='Motacilla flava',
        common_name='Yellow Wagtail')

    db.species.insert().execute(
        scientific_name='Ninox (Rhabdoglaux) strenua',
        common_name='Powerful Owl')

    db.species.insert().execute(
        scientific_name='Ninox (Hieracoglaux) connivens',
        common_name='Barking Owl')

    db.species.insert().execute(
        scientific_name='Tyto alba',
        common_name='Barn Owl')

    db.species.insert().execute(
        scientific_name='Falco (Hierofalco) hypoleucos',
        common_name='Grey Falcon')

    # 1000 records, of which 79 are sensitive
    db.species.insert().execute(
        scientific_name='Atrichornis (Atrichornis) rufescens',
        common_name='Rufous Scrub-bird')

    if '--big' in sys.argv:
        # 400k records.
        db.species.insert().execute(
            scientific_name='Cracticus tibicen',
            common_name='Australian Magpie')

        db.species.insert().execute(
            scientific_name='Dacelo (Dacelo) leachii',
            common_name='Blue-winged Kookaburra')

        db.species.insert().execute(
            scientific_name='Tyto (Megastrix) novaehollandiae',
            common_name='Masked Owl')

        db.species.insert().execute(
            scientific_name='Ninox (Ninox) novaeseelandiae',
            common_name='Southern Boobook')

        db.species.insert().execute(
            scientific_name='Corvus tasmanicus',
            common_name='Forest Raven')
def main():
    # make sure this isn't run accidentally
    if '--go' not in sys.argv:
        print
        print "Wipes the database clean and fills database with Costa Rica data."
        print
        print "Assumes input csv is called costa_rica_import.csv, and is in the"
        print "same folder as config.json. The folder you're in now.."
        print
        print "Usage:"
        print "\t{0} --go".format(sys.argv[0])
        print
        sys.exit()

    import_file_path = 'costa_rica_import.csv'
    import_threshold_file_path = 'costa_rica_import_threshold.csv'

    log = logging.getLogger()
    log.setLevel(logging.DEBUG)
    log.addHandler(logging.StreamHandler())

    species_count = 0
    occurrences_count = 0

    # take note of import start time
    import_d = datetime.utcnow()

    # connect
    with open('config.json', 'rb') as f:
        db.connect(json.load(f))

    # wipe
    db.species.delete().execute()
    db.sources.delete().execute()
    db.occurrences.delete().execute()

    # insert COSTA_RICA_CSV with last_import_time.
    result = db.sources.insert().execute(
        name='COSTA_RICA_CSV',
        last_import_time=import_d)

    db_source_id = result.lastrowid


    # open threshold csv..
    with open(import_threshold_file_path, 'rb') as tf:
        # open the costa_rica csv..
        with open(import_file_path, 'rb') as f:
            reader = csv.reader(f)
            # skip the header
            header = reader.next()

            # iterate over the csv rows
            for csv_row_array in reader:

                in_collection_code             = csv_row_array.pop(0)
                in_catalog_number              = csv_row_array.pop(0)
                in_occurrence_remarks          = csv_row_array.pop(0)
                in_record_number               = csv_row_array.pop(0)
                in_event_date                  = csv_row_array.pop(0)
                in_location_id                 = csv_row_array.pop(0)
                in_state_province              = csv_row_array.pop(0)
                in_county                     = csv_row_array.pop(0)
                in_municipality               = csv_row_array.pop(0)
                in_locality                   = csv_row_array.pop(0)
                in_decimal_latitude            = csv_row_array.pop(0)
                in_decimal_longitude           = csv_row_array.pop(0)
                in_scientific_name             = csv_row_array.pop(0)
                in_kingdom                    = csv_row_array.pop(0)
                in_phylum                     = csv_row_array.pop(0)
                in_class                      = csv_row_array.pop(0)
                in_order                      = csv_row_array.pop(0)
                in_family                     = csv_row_array.pop(0)
                in_genus                      = csv_row_array.pop(0)
                in_specific_epithet            = csv_row_array.pop(0)
                in_infraspecific_epithet       = csv_row_array.pop(0)
                in_taxon_rank                  = csv_row_array.pop(0)



                # Add species if necessary..

                # Look up species by scientific_name
                row = db.species.select('id')\
                        .where(db.species.c.scientific_name == in_scientific_name)\
                        .execute().fetchone()

                db_species_id = None
                if row is None:
                    # If we couldn't find it..
                    # so add the species

                    tf.seek(0)
                    threshold_reader = csv.reader(tf)

                    in_threshold = 1  # The max (will wipe out all values)
                    for threshold_csv_row_array in threshold_reader:
                        in_species_name = threshold_csv_row_array[0]
                        in_threshold = threshold_csv_row_array[1]
                        # compare species sci_names
                        conv_in_scientific_name = in_scientific_name.strip()
                        conv_in_scientific_name = conv_in_scientific_name.replace('.', '')
                        conv_in_scientific_name = conv_in_scientific_name.replace(' ', '_')
                        #print conv_in_scientific_name
                        #print in_species_name
                        #print '...........'
                        if conv_in_scientific_name == in_species_name:
                            print '************'
                            print in_species_name
                            if in_threshold == 'na':
                                in_threshold = '1'
                            print in_threshold
                            break
                        sys.stdout.flush()

                    result = db.species.insert().execute(
                        scientific_name=in_scientific_name,
                        distribution_threshold=in_threshold,
                    )

                    species_count = species_count + 1

                    db_species_id = result.lastrowid
                else:
                    # We found it, grab the species id
                    db_species_id = row['id']

                # insert the occurrence into the db.
                # NOTE: Some records have empty in_record_numbers.
                # The sql db validates source_id vs source_record_id
                # data, so if we have an empty source_record_id, leave it as unspecified
                # 

                occurrences_count = occurrences_count + 1
                if in_record_number.strip() != '':
                    result = db.occurrences.insert().execute(
                        species_id=db_species_id,
                        latitude=in_decimal_latitude,
                        longitude=in_decimal_longitude,
                        source_id=db_source_id,
                        source_record_id=in_record_number,
                        classification='irruptive'
                    )
                else:
                    result = db.occurrences.insert().execute(
                        species_id=db_species_id,
                        latitude=in_decimal_latitude,
                        longitude=in_decimal_longitude,
                        source_id=db_source_id,
        #                source_record_id=in_record_number,
                        classification='irruptive'
                    )



    log.debug("Species: %i", species_count)
    log.debug("Occurrences: %i", occurrences_count)