def collector(update_on_conflict, ignore_on_conflict, use_existing): "Run a one-off task to synchronize from the fracx data source" # import pandas as pd # import numpy as np # from fracx import create_app # app = create_app() # app.app_context().push() logger.info(conf) endpoint = Endpoint.load_from_config(conf)["frac_schedules"] collector = FracScheduleCollector(endpoint) ftp = Ftp.from_config() latest = ftp.get_latest() rows = BytesFileHandler.xlsx(latest.get("content"), date_columns=endpoint.mappings.get("dates"), sheet_no=1) rows = list(rows) rows = [collector.transform(row) for row in rows] _ = [collector.persist([row]) for row in rows] collector.collect(rows, update_on_conflict, ignore_on_conflict) ftp.cleanup()
def collector(update_on_conflict, ignore_on_conflict, use_existing): "Run a one-off task to synchronize from the fracfocus data source" logger.info(conf) endpoints = Endpoint.load_from_config(conf) coll = FracFocusCollector(endpoints["registry"]) url = util.urljoin(conf.COLLECTOR_BASE_URL, conf.COLLECTOR_URL_PATH) if not use_existing: downloader = ZipDownloader(url) req = downloader.get() filelist = downloader.unpack(req).paths else: downloader = ZipDownloader.from_existing() filelist = downloader.paths coll.collect(filelist, update_on_conflict, ignore_on_conflict)
import logging from celery import Celery from celery.schedules import crontab from celery.signals import after_setup_logger, after_setup_task_logger, beat_init import celery_queue.tasks import loggers from collector import Endpoint from config import get_active_config from ihs import create_app logger = logging.getLogger(__name__) conf = get_active_config() endpoints = Endpoint.load_from_config(conf) def create_celery(app): celery = Celery( app.import_name, broker=app.config["BROKER_URL"], include=app.config["CELERY_TASK_LIST"], ) celery.conf.update(app.config) TaskBase = celery.Task class ContextTask(TaskBase): # noqa abstract = True metadata: Optional[Dict] = None
from ihs import create_app from config import get_active_config from collector import XMLParser, Endpoint, Collector from collector.tasks import run_endpoint_task, get_job_results, submit_job, collect from util import to_json, load_json from time import sleep from api.models import WellHorizontal app = create_app() app.app_context().push() logging.basicConfig(level=20) conf = get_active_config() endpoints = Endpoint.from_yaml("tests/data/collector.yaml") task_name, endpoint_name, transformer = ( "endpoint_check", "well_vertical", WellboreTransformer, ) # endpoint_name, transoformer = "production_horizontal", ProductionTransformer # endpoint_name = "production_vertical" # endpoint_name = "production_horizontal" # endpoint_name = "well_horizontal" # task_name = "endpoint_check" model = endpoints[endpoint_name].model job_config = [ x for x in run_endpoint_task(endpoint_name, task_name) if x is not None ][0]
) return row except Exception as e: logger.exception(f"Transformation error: {e}") raise TransformationError(e) def apply_aliases(self, row: Row) -> Row: return {self.aliases[k]: v for k, v in row.items()} def drop_exclusions(self, row: Row) -> Row: if len(self.exclude) > 0: try: logger.debug(f"Dropping {len(self.exclude)} columns: {self.exclude}") row = {k: v for k, v in row if k not in self.exclude} except Exception as e: msg = f"Failed attempting to drop columns -- {e}" self.errors.append(msg) logger.debug(msg) return row if __name__ == "__main__": from collector import Endpoint ep = Endpoint.load_from_config(conf)["registry"] t = Transformer(ep.mappings.aliases, ep.exclude) t.transform(row)
logger.setLevel(10) from config import get_active_config from attrdict import AttrDict from ihs.config import get_active_config from collector import Endpoint from ihs import create_app, db from collector.xml_query import XMLQuery conf = get_active_config() app = create_app() app.app_context().push() conf = get_active_config() # endpoints = Endpoint.from_yaml("tests/data/collector.yaml") endpoints = Endpoint.from_yaml("config/collector.yaml") # task = endpoints["well_horizontal"].tasks["endpoint_check"] task = endpoints["well_horizontal"].tasks["endpoint_check"] task = Task( model_name="api.models.WellHorizontal", task_name="sync", endpoint_name="well_horizontal", cron={ "minute": 0, "hour": 12 }, options={ "matrix": { "check": { "values":
from collector import Endpoint from util import to_json from collector import XMLParser import pandas as pd import numpy as np conf = get_active_config() app = create_app() app.app_context().push() logging.basicConfig(level=20) eb = ExportBuilder(None) len(eb.list_completed_jobs()) endpoints = Endpoint.load_from_config(conf, load_disabled=True) endpoint = endpoints.get("well_master_horizontal") # cde = CDExporter("2018/10/13", "2019/11/04", endpoint=endpoint) cde = CDExporter("2020/01/01", "2020/03/31", endpoint=endpoint) self = cde results = cde.get_all() # from collector import Collector # records = [] # max_sequence = ChangeDeleteLog.max_sequence() # for r in results: # new = {} # for k, v in r.items(): # if v is not None:
def endpoint(conf): yield Endpoint(name="ep_test", model="api.models.FracSchedule")
def endpoints(): from collector import Endpoint for name, ep in Endpoint.load_from_config(conf).items(): click.secho(name)
def test_create_from_dict(self): Endpoint.from_dict("ep_from_dict", {"model": "api.models.FracSchedule"})
def test_load_from_config(self, conf): ep = Endpoint.load_from_config(conf).get(conf.FRAC_SCHEDULE_TABLE_NAME) assert ep.name == conf.FRAC_SCHEDULE_TABLE_NAME