示例#1
0
def import_data(data_dir, db_params, ed_component_path):
    """
    call the right component to import the data in the directory

    we loop through all files until we recognize one on them
    """
    log = logging.getLogger(__name__)
    files = glob.glob(data_dir + "/*")
    data_type, file_to_load = utils.type_of_data(files)
    if not data_type:
        log.info('unknown data type for dir {}, skipping'.format(data_dir))
        return

    # Note, we consider that we only have to load one kind of data per directory
    import_component = data_type + '2ed'
    if ed_component_path:
        import_component = os.path.join(ed_component_path, import_component)

    if file_to_load.endswith('.zip') or file_to_load.endswith('.geopal'):
        # TODO: handle geopal as non zip
        # if it's a zip, we unzip it
        zip_file = zipfile.ZipFile(file_to_load)
        zip_file.extractall(path=data_dir)
        file_to_load = data_dir

    if launch_exec(
        import_component, ["-i", file_to_load, "--connection-string", db_params.old_school_cnx_string()], log
    ):
        raise Exception('Error: problem with running {}, stoping'.format(import_component))
示例#2
0
def import_in_mimir(_file, instance, asynchronous=True):
    """
    Import pt data stops to autocomplete
    """
    datatype, _ = utils.type_of_data(_file)
    family_type = utils.family_of_data(datatype)

    current_app.logger.debug("Import {} data to mimir".format(family_type))

    action = None

    if family_type == 'pt':
        if instance.import_ntfs_in_mimir:
            action = ntfs2mimir.si(instance.name, _file)
        if instance.import_stops_in_mimir and not instance.import_ntfs_in_mimir:
            action = stops2mimir.si(instance.name, _file)
    elif family_type == 'poi':
        action = poi2mimir.si(instance.name, _file)
    else:
        current_app.logger.warning("Unsupported family_type {}".format(family_type))

    if asynchronous:
        return action.delay()
    else:
        # all job are run in sequence and import_in_mimir will only return when all the jobs are finish
        return action.apply()
示例#3
0
def import_data(data_dir, db_params, ed_component_path):
    # type: (str, DbParams, str) -> None
    """
    Call the right binary for its data (all the "*2ed") to create data then load it in the database.

    :param data_dir: the directory containing the data for "*2ed"
    :param db_params: the parameters of the database
    :param ed_component_path: the path of the directory containing the binary "*2ed"
    """
    files = glob.glob(data_dir + "/*")  # type: List[str]
    data_type, file_to_load = utils.type_of_data(files)  # type: str,str
    if not data_type:
        logger.info('unknown data type for dir {}, skipping'.format(data_dir))
        return

    # we consider that we only have to load one kind of data per directory
    import_component = data_type + '2ed'  # type: str
    if ed_component_path:
        import_component = os.path.join(ed_component_path, import_component)

    if file_to_load.endswith('.zip') or file_to_load.endswith('.geopal'):
        # TODO: handle geopal as non zip ; if it's a zip, we unzip it
        zip_file = zipfile.ZipFile(file_to_load)  # type: zipfile.ZipFile
        zip_file.extractall(path=data_dir)
        file_to_load = data_dir

    if launch_exec.launch_exec(import_component, [
            "-i", file_to_load, "--connection-string",
            db_params.old_school_cnx_string()
    ], logger):
        raise Exception(
            'Error: problem with running {}, stoping'.format(import_component))
示例#4
0
文件: tasks.py 项目: thmsct/navitia
def import_in_mimir(_file, instance, asynchronous=True):
    """
    Import pt data stops to autocomplete
    """
    datatype, _ = utils.type_of_data(_file)
    family_type = utils.family_of_data(datatype)

    current_app.logger.debug("Import {} data to mimir".format(family_type))

    action = None

    if family_type == 'pt':
        if instance.import_ntfs_in_mimir:
            action = ntfs2mimir.si(instance.name, _file)
        # Deprecated: https://github.com/CanalTP/mimirsbrunn/blob/4430eed1d81247fffa7cf32ba675a9c5ad8b1cbe/documentation/components.md#stops2mimir
        if instance.import_stops_in_mimir and not instance.import_ntfs_in_mimir:
            action = stops2mimir.si(instance.name, _file)
    elif family_type == 'poi':
        action = poi2mimir.si(instance.name, _file)
    else:
        current_app.logger.warning("Unsupported family_type {}".format(family_type))

    if asynchronous:
        return action.delay()
    else:
        # all job are run in sequence and import_in_mimir will only return when all the jobs are finish
        return action.apply()
示例#5
0
文件: tasks.py 项目: xlqian/navitia
def import_in_mimir(_file, instance, asynchronous=True):
    """
    Import pt data stops to autocomplete
    """
    datatype, _ = utils.type_of_data(_file)
    family_type = utils.family_of_data(datatype)
    current_app.logger.debug("Import {} data to mimir".format(family_type))

    actions = []

    for version in (2, 7):
        if not is_activate_autocomplete_version(version):
            logging.getLogger(__name__).info(
                "Disable import mimir version {}".format(version))
            continue
        if family_type == 'pt':
            if instance.import_ntfs_in_mimir:
                actions.append(ntfs2mimir.si(instance.name, _file, version))
            # Deprecated: https://github.com/hove-io/mimirsbrunn/blob/4430eed1d81247fffa7cf32ba675a9c5ad8b1cbe/documentation/components.md#stops2mimir
            if instance.import_stops_in_mimir and not instance.import_ntfs_in_mimir:
                actions.append(stops2mimir.si(instance.name, _file, version))
        elif family_type == 'poi':
            actions.append(poi2mimir.si(instance.name, _file, version))
        else:
            current_app.logger.warning(
                "Unsupported family_type {}".format(family_type))

    if asynchronous:
        return chain(*actions).delay()
    else:
        # all job are run in sequence and import_in_mimir will only return when all the jobs are finish
        return chain(*actions).apply()
示例#6
0
        'fusio': fusio2ed,
        'osm': osm2ed,
        'geopal': geopal2ed,
        'fare': fare2ed,
        'poi': poi2ed,
        'synonym': synonym2ed,
        'shape': shape2ed,
    }

    for _file in files:
        filename = None

        dataset = models.DataSet()
        # NOTE: for the moment we do not use the path to load the data here
        # but we'll need to refactor this to take it into account
        dataset.type, _ = utils.type_of_data(_file)
        dataset.family_type = utils.family_of_data(dataset.type)
        if dataset.type in task:
            if backup_file:
                filename = move_to_backupdirectory(_file,
                                                   instance_config.backup_directory)
            else:
                filename = _file
            actions.append(task[dataset.type].si(instance_config, filename, dataset_uid=dataset.uid))
        else:
            #unknown type, we skip it
            current_app.logger.debug("unknwn file type: {} for file {}"
                                     .format(dataset.type, _file))
            continue

        #currently the name of a dataset is the path to it
示例#7
0
def import_data(
    files, instance, backup_file, asynchronous=True, reload=True, custom_output_dir=None, skip_mimir=False
):
    """
    import the data contains in the list of 'files' in the 'instance'

    :param files: files to import
    :param instance: instance to receive the data
    :param backup_file: If True the files are moved to a backup directory, else they are not moved
    :param asynchronous: If True all jobs are run in background, else the jobs are run in sequence the function
     will only return when all of them are finish
    :param reload: If True kraken would be reload at the end of the treatment
    :param custom_output_dir: subdirectory for the nav file created. If not given, the instance default one is taken
    :param skip_mimir: skip importing data into mimir

    run the whole data import process:

    - data import in bdd (fusio2ed, gtfs2ed, poi2ed, ...)
    - export bdd to nav file
    - update the jormungandr db with the new data for the instance
    - reload the krakens
    """
    actions = []
    job = models.Job()
    instance_config = load_instance_config(instance.name)
    job.instance = instance
    job.state = 'running'
    task = {
        'gtfs': gtfs2ed,
        'fusio': fusio2ed,
        'osm': osm2ed,
        'geopal': geopal2ed,
        'fare': fare2ed,
        'poi': poi2ed,
        'synonym': synonym2ed,
        'shape': shape2ed,
    }

    for _file in files:
        filename = None

        dataset = models.DataSet()
        # NOTE: for the moment we do not use the path to load the data here
        # but we'll need to refactor this to take it into account
        try:
            dataset.type, _ = utils.type_of_data(_file)
            dataset.family_type = utils.family_of_data(dataset.type)
        except Exception:
            if backup_file:
                move_to_backupdirectory(_file, instance_config.backup_directory)
            current_app.logger.debug(
                "Corrupted source file : {} moved to {}".format(_file, instance_config.backup_directory)
            )
            continue

        if dataset.type in task:
            if backup_file:
                filename = move_to_backupdirectory(_file, instance_config.backup_directory)
            else:
                filename = _file
            actions.append(task[dataset.type].si(instance_config, filename, dataset_uid=dataset.uid))
        else:
            # unknown type, we skip it
            current_app.logger.debug("unknown file type: {} for file {}".format(dataset.type, _file))
            continue

        # currently the name of a dataset is the path to it
        dataset.name = filename
        models.db.session.add(dataset)
        job.data_sets.append(dataset)

    if actions:
        models.db.session.add(job)
        models.db.session.commit()
        # We pass the job id to each tasks, but job need to be commited for having an id
        for action in actions:
            action.kwargs['job_id'] = job.id
        # Create binary file (New .nav.lz4)
        binarisation = [ed2nav.si(instance_config, job.id, custom_output_dir)]
        actions.append(chain(*binarisation))
        # Reload kraken with new data after binarisation (New .nav.lz4)
        if reload:
            actions.append(reload_data.si(instance_config, job.id))

        if not skip_mimir:
            for dataset in job.data_sets:
                actions.extend(send_to_mimir(instance, dataset.name, dataset.family_type))
        else:
            current_app.logger.info("skipping mimir import")

        actions.append(finish_job.si(job.id))
        if asynchronous:
            return chain(*actions).delay()
        else:
            # all job are run in sequence and import_data will only return when all the jobs are finish
            return chain(*actions).apply()
示例#8
0
文件: tasks.py 项目: xlqian/navitia
def import_data(
    files,
    instance,
    backup_file,
    asynchronous=True,
    reload=True,
    custom_output_dir=None,
    skip_mimir=False,
    skip_2ed=False,
):
    """
    import the data contains in the list of 'files' in the 'instance'

    :param files: files to import
    :param instance: instance to receive the data
    :param backup_file: If True the files are moved to a backup directory, else they are not moved
    :param asynchronous: If True all jobs are run in background, else the jobs are run in sequence the function
     will only return when all of them are finish
    :param reload: If True kraken would be reload at the end of the treatment
    :param custom_output_dir: subdirectory for the nav file created. If not given, the instance default one is taken
    :param skip_mimir: skip importing data into mimir
    :param skip_2ed: skip inserting last_load_dataset files into ed database
    run the whole data import process:

    - data import in bdd (fusio2ed, gtfs2ed, poi2ed, ...)
    - export bdd to nav file
    - update the jormungandr db with the new data for the instance
    - reload the krakens
    """
    actions = []
    job = models.Job()
    instance_config = load_instance_config(instance.name)
    job.instance = instance
    job.state = 'running'
    task = {
        'gtfs': gtfs2ed,
        'fusio': fusio2ed,
        'osm': osm2ed,
        'geopal': geopal2ed,
        'fare': fare2ed,
        'poi': poi2ed,
        'synonym': synonym2ed,
        'shape': shape2ed,
    }

    def process_ed2nav():
        models.db.session.add(job)
        models.db.session.commit()
        # We pass the job id to each tasks, but job need to be commited for having an id
        for action in actions:
            action.kwargs['job_id'] = job.id
        # Create binary file (New .nav.lz4)
        binarisation = [ed2nav.si(instance_config, job.id, custom_output_dir)]
        actions.append(chain(*binarisation))
        # Reload kraken with new data after binarisation (New .nav.lz4)
        if reload:
            actions.append(reload_data.si(instance_config, job.id))

        if not skip_mimir:
            for dataset in job.data_sets:
                actions.extend(
                    send_to_mimir(instance, dataset.name, dataset.family_type))
        else:
            current_app.logger.info("skipping mimir import")

        actions.append(finish_job.si(job.id))

        # We should delete old backup directories related to this instance
        actions.append(
            purge_instance.si(
                instance.id,
                current_app.config['DATASET_MAX_BACKUPS_TO_KEEP']))
        if asynchronous:
            return chain(*actions).delay()
        else:
            # all job are run in sequence and import_data will only return when all the jobs are finish
            return chain(*actions).apply()

    if skip_2ed:
        # For skip_2ed, skip inserting last_load_dataset files into ed database
        return process_ed2nav()
    for _file in files:
        filename = None

        dataset = models.DataSet()
        # NOTE: for the moment we do not use the path to load the data here
        # but we'll need to refactor this to take it into account
        try:
            dataset.type, _ = utils.type_of_data(_file)
            dataset.family_type = utils.family_of_data(dataset.type)
        except Exception:
            if backup_file:
                move_to_backupdirectory(_file,
                                        instance_config.backup_directory)
            current_app.logger.debug(
                "Corrupted source file : {} moved to {}".format(
                    _file, instance_config.backup_directory))
            continue

        if dataset.type in task:
            if backup_file:
                filename = move_to_backupdirectory(
                    _file,
                    instance_config.backup_directory,
                    manage_sp_char=True)
            else:
                filename = _file

            has_pt_planner_loki = (
                hasattr(instance, 'pt_planners_configurations')
                and "loki" in instance.pt_planners_configurations)
            if has_pt_planner_loki:
                loki_data_source = instance.pt_planners_configurations.get(
                    'loki', {}).get('data_source')
                if loki_data_source is not None:
                    if loki_data_source == "minio":
                        if dataset.type == "fusio":
                            actions.append(
                                fusio2s3.si(instance_config,
                                            filename,
                                            dataset_uid=dataset.uid))
                        if dataset.type == "gtfs":
                            actions.append(
                                gtfs2s3.si(instance_config,
                                           filename,
                                           dataset_uid=dataset.uid))
                    elif loki_data_source == "local" and dataset.type in [
                            "fusio", "gtfs"
                    ]:
                        zip_file = zip_if_needed(filename)
                        dest = os.path.join(
                            os.path.dirname(instance_config.target_file),
                            "ntfs.zip")
                        shutil.copy(zip_file, dest)
                    else:
                        current_app.logger.debug(
                            "unknown loki data_source '{}' for coverage '{}'".
                            format(loki_data_source, instance.name))

            actions.append(task[dataset.type].si(instance_config,
                                                 filename,
                                                 dataset_uid=dataset.uid))
        else:
            # unknown type, we skip it
            current_app.logger.debug(
                "unknown file type: {} for file {}".format(
                    dataset.type, _file))
            continue

        # currently the name of a dataset is the path to it
        dataset.name = filename
        dataset.state = "pending"
        models.db.session.add(dataset)
        job.data_sets.append(dataset)

    if actions:
        return process_ed2nav()