示例#1
0
def export_to_cache():
    for site in SITE_LIST:
        cache.init_cache(CACHE_DATABASE_FILE, ECHO_SQLALCHEMY)
        # print "cache._cache size: %s" % reduce(
        #     (lambda x, y: x+y), (len(v) for v in cache._cache.values()))
        cache.clear_memory_cache()
        # print "gc.get_count(): [%s, %s, %s]" % gc.get_count()
        # print "garbage collecting..."
        gc.collect()
        # print "gc.get_count(): [%s, %s, %s]" % gc.get_count()
        # print "cache._cache size: %s" % reduce(
        #     (lambda x, y: x+y), (len(v) for v in cache._cache.values()))
        commit_data_for_site(site)
示例#2
0
def export_cache():
    global_site_index = 1
    for database_file in CACHE_DATABASE_FILES:
        cache.init_cache(os.path.join(CACHE_DIR, database_file),
                         ECHO_SQLALCHEMY)
        # create an index on timeseries values if it doesn't exist
        try:
            i = Index('ix_value_timeseries_id',
                      cache.DBValue.__table__.c.timeseries_id)
            i.create(cache.engine)
        except OperationalError:
            pass
        sources = cache.db_session.query(cache.DBSource).all()
        for source in sources:
            global_site_index = export_source(source, global_site_index)
示例#3
0
def convert_to_pyhis():
    """convert the tceq database to a pyhis database"""
    # if os.path.exists(CACHE_DATABASE_FILE):
    #     print ("Hold up.. %s already exists. You need to delete or "
    #            "rename it before continuing." % CACHE_DATABASE_FILE)
    #     sys.exit(1)
    # create_pyhis_sites(stations, file_source)

    found = False
    cache.init_cache(CACHE_DATABASE_FILE, ECHO_SQLALCHEMY)
    for tceq_parameter_code in WDFT_PARAMETERS:
        file_source = cache.CacheSource(url=TCEQ_SOURCE)
        parameter = tceq_session.query(Parameter).filter_by(
            parameter_code=tceq_parameter_code).one()
        results_query = parameter.results.filter_by(gtlt='')
        results_count = results_query.count()

        wdft_parameter_code = WDFT_PARAMETERS[tceq_parameter_code][0]
        wdft_parameter_name = PARAMETERS_DICT[wdft_parameter_code]

        tceq_units_code = WDFT_PARAMETERS[tceq_parameter_code][1]
        wdft_converted_units_code = UNITS_DICT[tceq_units_code][1]
        wdft_converted_units_name = UNITS_DICT[wdft_converted_units_code][0]
        conversion_func = UNITS_DICT[tceq_units_code][2]
        if not conversion_func:
            conversion_func = lambda x: x

        print("converting %s values for param: %s (%s)" % (
            results_count, tceq_parameter_code, wdft_parameter_name))

        units = cache.CacheUnits(
            code=wdft_converted_units_code,
            abbreviation=wdft_converted_units_code,
            name=wdft_converted_units_name)

        variable = cache.CacheVariable(
            units=units,
            name=wdft_parameter_name,
            code=wdft_parameter_code,
            vocabulary=TCEQ_VOCABULARY)

        param_total = 0
        param_count = results_count
        for result in page_query(results_query):
            if len(cache.db_session.new) > 5000:
                param_total += len(cache.db_session.new)
                cache.db_session.commit()
                print("committing %s of %s" % (param_total, param_count))

            if result.gtlt != '':
                logger.warning ("result being thrown out, gtlt value. "
                               "result id: %s" % result.id)
                continue

            if not result.event:
                logger.warning("no event found for orphaned result: %s, %s" %
                              (result.id, result.tag_id))
                continue

            event = result.event
            try:
                station = tceq_session.query(Station).filter_by(tceq_station_id=event.station_id).one()
            except NoResultFound:
                logger.warning("station not found for event %s, station_id %s: " %
                              (event.id, event.station_id))
                continue

            if getattr(event, 'start_date', None) and \
                   getattr(event, 'start_time', None):
                timestamp = datetime.datetime.combine(event.start_date,
                                                      event.start_time)
            elif getattr(event, 'end_date', None) and \
                     getattr(event, 'end_time', None):
                timestamp = datetime.datetime.combine(event.end_date,
                                                      event.end_time)
            else:
                logger.warning("event being thrown out, could not determine "
                              "timestamp. event tag_id: %s" % event.tag_id)
                continue

            site = cache.db_session.query(cache.DBSite).filter_by(
                latitude=station.latitude,
                longitude=station.longitude).first()

            if not site:
                site = cache.CacheSite(
                    site_id=station.tceq_station_id,
                    code=station.tceq_station_id,
                    name=station.short_description,
                    network=TCEQ_NETWORK,
                    source=file_source,
                    latitude=station.latitude,
                    longitude=station.longitude,
                    auto_commit=False,
                    auto_add=True)

            timeseries = cache.CacheTimeSeries(
                site=site,
                variable=variable,
                auto_commit=False,
                auto_add=True)

            value = cache.DBValue(
                timestamp=timestamp,
                value=conversion_func(result.value),
                timeseries=timeseries)

        cache.db_session.commit()