示例#1
0
def move_data_into_categorized_directories():
    """Sets up a temporary directory for categorized, preprocessed images.

    If ../temp/deep/categorized/ does not exist, then it is created and
    filled with preprocessed training and test images in the appropriate
    categories.

    Returns the directories for the train and test images."""

    dirname = "../temp/deep/categorized/"

    train_dirname = dirname + "train/"
    test_dirname = dirname + "test/"

    if util.ensure_directory_exists(dirname):
        logger.info("found categorized images in " + dirname + ", proceeding")
        return train_dirname, test_dirname

    logger.info(
        "couldn't find categorized images in {}, copying images now".format(
            dirname))

    util.ensure_directory_exists(train_dirname)
    util.ensure_directory_exists(test_dirname)

    for label, sublabels in util.cloud_kinds.items():
        logger.debug("processing {}".format(label))
        train_labeldirname = train_dirname + label + "/"
        test_labeldirname = test_dirname + label + "/"

        os.makedirs(train_labeldirname)
        os.makedirs(test_labeldirname)

        for cloud_type in sublabels:
            logger.debug("current cloud type: {}".format(cloud_type))
            sublabeldirname = "../data/" + cloud_type + "/"
            for filename in os.listdir(sublabeldirname):
                if random() > 0.2:
                    # Training image; crop and split into left and right half...

                    img = util.cropImage(imread(sublabeldirname + filename))
                    width = img.shape[1]
                    imsave(train_labeldirname + "l-" + filename,
                           img[:, :(width // 2), :])
                    imsave(train_labeldirname + "r-" + filename,
                           img[:, (width // 2):, :])

                else:
                    # Test image, just crop

                    imsave(test_labeldirname + filename,
                           util.cropImage(imread(sublabeldirname + filename)))

    logger.info("finished copying images")

    return train_dirname, test_dirname
示例#2
0
def replicate_pipelines(
    cfg_set,
    concourse_cfg,
    job_mapping,
    definitions_root_dir,
    template_path,
    template_include_dir,
    unpause_pipelines: bool = True,
    expose_pipelines: bool = True,
):
    ensure_directory_exists(definitions_root_dir)
    team_name = job_mapping.team_name()
    team_credentials = concourse_cfg.team_credentials(team_name)

    pipeline_names = set()

    for rendered_pipeline, _, pipeline_metadata in generate_pipelines(
            definitions_root_dir=definitions_root_dir,
            job_mapping=job_mapping,
            template_path=template_path,
            template_include_dir=template_include_dir,
            config_set=cfg_set,
    ):
        pipeline_name = pipeline_metadata.pipeline_name
        pipeline_names.add(pipeline_name)
        info('deploying pipeline {p} to team {t}'.format(p=pipeline_name,
                                                         t=team_name))
        deploy_pipeline(
            pipeline_definition=rendered_pipeline,
            pipeline_name=pipeline_name,
            concourse_cfg=concourse_cfg,
            team_credentials=team_credentials,
            unpause_pipeline=unpause_pipelines,
            expose_pipeline=expose_pipelines,
        )

    concourse_api = client.ConcourseApi(base_url=concourse_cfg.external_url(),
                                        team_name=team_name)
    concourse_api.login(team=team_name,
                        username=team_credentials.username(),
                        passwd=team_credentials.passwd())

    # rm pipelines that were not contained in job_mapping
    pipelines_to_remove = set(concourse_api.pipelines()) - pipeline_names

    for pipeline_name in pipelines_to_remove:
        info('removing pipeline: {p}'.format(p=pipeline_name))
        concourse_api.delete_pipeline(pipeline_name)

    # order pipelines alphabetically
    pipeline_names = list(concourse_api.pipelines())
    pipeline_names.sort()
    concourse_api.order_pipelines(pipeline_names)
示例#3
0
def replicate_pipeline_definitions(
    definition_dir: str,
    cfg_dir: str,
    cfg_name: str,
):
    '''
    replicates pipeline definitions from cc-pipelines to component repositories.
    will only be required until definitions are moved to component repositories.
    '''
    util.ensure_directory_exists(definition_dir)
    util.ensure_directory_exists(cfg_dir)

    cfg_factory = ConfigFactory.from_cfg_dir(cfg_dir)
    cfg_set = cfg_factory.cfg_set(cfg_name)
    github_cfg = cfg_set.github()

    github = _create_github_api_object(github_cfg=github_cfg)

    repo_mappings = util.parse_yaml_file(os.path.join(definition_dir, '.repository_mapping'))

    for repo_path, definition_file in repo_mappings.items():
        # hack: definition_file is a list with always exactly one entry
        definition_file = util.ensure_file_exists(os.path.join(definition_dir, definition_file[0]))
        with open(definition_file) as f:
            definition_contents = f.read()

        repo_owner, repo_name = repo_path.split('/')


        helper = GitHubHelper(
            github=github,
            repository_owner=repo_owner,
            repository_name=repo_name,
        )
        # only do this for branch 'master' to avoid merge conflicts
        for branch_name in ['master']: #branches(github_cfg, repo_owner, repo_name):
            util.info('Replicating pipeline-definition: {r}:{b}'.format(
                    r=repo_path,
                    b=branch_name,
                )
            )
            # create pipeline definition file in .ci/pipeline_definitions
            try:
                helper.create_or_update_file(
                    repository_branch=branch_name,
                    repository_version_file_path='.ci/pipeline_definitions',
                    file_contents=definition_contents,
                    commit_message="Import cc-pipeline definition"
                )
            except:
                pass # keep going
示例#4
0
def determine_mail_recipients(src_dir, github_cfg_name):
    '''
    returns a generator yielding all email addresses for the given (git) repository work tree
    Email addresses are looked up:
    - from head commit: author and committer
    - from *CODEOWNERS files [0]

    Email addresses are not de-duplicated (this should be done by consumers)

    [0] https://help.github.com/articles/about-codeowners/
    '''
    cfg_factory = ctx().cfg_factory()

    github_cfg = cfg_factory.github(github_cfg_name)
    github_api = githubutil._create_github_api_object(github_cfg)

    # commiter/author from head commit
    repo = git.Repo(ensure_directory_exists(src_dir))
    head_commit = repo.commit(repo.head)
    yield head_commit.author.email.lower()
    yield head_commit.committer.email.lower()

    # codeowners
    parser = CodeownersParser(repo_dir=src_dir)
    resolver = CodeOwnerEntryResolver(github_api=github_api)

    codeowner_entries = parser.parse_codeowners_entries()
    yield from resolver.resolve_email_addresses(codeowner_entries)
def get_logger():
    global _LOGGER #pylint: disable=global-statement

    # Check if the logger has already been created
    if not _LOGGER:
        # Get a logger and name it
        logger = logging.getLogger('main')

        # Set the logging level for the current program
        logger.setLevel(logging.DEBUG)

        # Clean user input and make sure the directory exists
        log_dir = util.clean_directory_name(FLAGS.log_dir) # pylint: disable=no-member
        util.ensure_directory_exists(log_dir) # pylint: disable=no-member

        # Clean user input and get just the name
        if FLAGS.log_name.endswith('/'):
            log_name = os.path.dirname(FLAGS.log_name)
        else:
            log_name = os.path.basename(FLAGS.log_name)

        # Add the output handler.
        _file_handler = logging.handlers.RotatingFileHandler(
            os.path.join(log_dir, log_name),
            maxBytes=FLAGS.log_size,
            backupCount=FLAGS.num_logs
        )
        _file_handler.setFormatter(logging.Formatter(
            fmt='%(asctime)s.%(msecs)03d %(message)s',
            datefmt='%Y_%m_%d %H:%M:%S'
        ))
        _file_handler.setLevel(logging.DEBUG)
        logger.addHandler(_file_handler)

        # Add the output handler.
        _handler = logging.StreamHandler(sys.stdout)
        _handler.setFormatter(logging.Formatter(fmt='%(message)s'))
        _handler.setLevel(FLAGS.log_level)
        logger.addHandler(_handler)

        # Get a lock on the logger
        with _LOGGER_LOCK:
            # Set the global logger
            _LOGGER = logger

    return _LOGGER
示例#6
0
文件: ctx.py 项目: swapnilgm/cc-utils
def _cfg_factory_from_dir():
    if not args or not args.cfg_dir:
        return None

    from util import ensure_directory_exists
    cfg_dir = ensure_directory_exists(args.cfg_dir)

    from model import ConfigFactory
    factory = ConfigFactory.from_cfg_dir(cfg_dir=cfg_dir)
    return factory
示例#7
0
def load_extracted_features():
    """Loads the extracted features from ../temp/deep/features if they
    were already extracted, and else extracts & saves the features.

    Returns the training features, training labels, validation features,
    and validation labels.
    """

    dirname = "../temp/deep/features/"
    if util.ensure_directory_exists(dirname):
        logger.info("found extracted features in " + dirname + ", proceeding")
        loaded = np.load(dirname + "training.npz")
        tr_features = loaded['tr_features']
        tr_labels = loaded['tr_labels']
        val_features = loaded['val_features']
        val_labels = loaded['val_labels']
        return tr_features, tr_labels, val_features, val_labels

    logger.info("did not find extracted features in " + dirname)

    train_image_dir, test_image_dir = move_data_into_categorized_directories()

    # mehr data augmentation optionen:
    # rotation_range
    # width_shift_range
    # height_shift_range
    # shear_range
    # zoom_range
    # vertical_flip
    train_datagen = ImageDataGenerator(fill_mode='nearest',
                                       horizontal_flip=True,
                                       rotation_range=10.0,
                                       shear_range=5.0,
                                       zoom_range=0.2,
                                       rescale=1. / 255,
                                       validation_split=0.2)

    num_train = 4096
    logger.info("extracting {} training features".format(num_train))
    tr_features, tr_labels = extract_features(train_datagen, train_image_dir,
                                              "training", num_train)

    num_val = int(num_train * 0.2)
    logger.info("extracting {} validation features".format(num_val))
    val_features, val_labels = extract_features(train_datagen, train_image_dir,
                                                "validation", num_val)

    np.savez_compressed(dirname + "training.npz",
                        tr_features=tr_features,
                        tr_labels=tr_labels,
                        val_features=val_features,
                        val_labels=val_labels)

    return tr_features, tr_labels, val_features, val_labels
def save_resized_pictures(height, width):
    for cloud_kind, subkinds in util.cloud_kinds.items():
        if util.ensure_directory_exists("../temp/classic/" + cloud_kind):
            # directory exists already, we assume that the resized pictures
            # are in there
            continue

        logger.info("saving resized %s pictures" % cloud_kind)

        counter = 0

        for subkind in subkinds:
            for element in glob.glob("../data/" + subkind + "/*"):
                img = Image.open(element)
                resized = img.resize((height, width), PIL.Image.ANTIALIAS)
                path = "../temp/classic/%s/%s%s.jpg" % (cloud_kind, cloud_kind,
                                                        counter)
                resized.save(path)
                counter += 1
示例#9
0
    def from_cfg_dir(cfg_dir: str, cfg_types_file='config_types.yaml'):
        cfg_dir = ensure_directory_exists(os.path.abspath(cfg_dir))
        cfg_types_dict = parse_yaml_file(os.path.join(cfg_dir, cfg_types_file))
        raw = {}

        raw[ConfigFactory.CFG_TYPES] = cfg_types_dict

        def parse_cfg(cfg_type):
            # assume for now that there is exactly one cfg source (file)
            cfg_sources = list(cfg_type.sources())
            if not len(cfg_sources) == 1:
                raise ValueError('currently, only exactly one cfg file is supported per type')

            cfg_file = cfg_sources[0].file()
            parsed_cfg =  parse_yaml_file(os.path.join(cfg_dir, cfg_file), as_snd=False)
            return parsed_cfg

        # parse all configurations
        for cfg_type in map(ConfigType, cfg_types_dict.values()):
            cfg_name = cfg_type.cfg_type_name()
            raw[cfg_name] = parse_cfg(cfg_type)


        return ConfigFactory(raw_dict=raw)