def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    issues = utils.read_csv_ignore_headers(
        ISSUES_TO_COLLECT_COMMENTS_FOR_CSV_FILE_PATH,
        ISSUES_CSV_FILE_FIELD_NAMES)
    print("Getting all comments for issues...")
    total = len(issues)
    count = 0
    print('Skipping thru read ones...')
    for issue in issues:
        try:
            count += 1
            issue_id = issue['id']
            repo_name = issue['repo_name']
            comments_url = issue['comments_url']
            output_file_name = "{}/comments@issue@{}@{}.json".format(
                OUTPUT_FOLDER_PATH, issue_id, repo_name.replace('/', '@'))
            if utils.file_or_read_file_already_exists(output_file_name):
                continue
            print("\t{}/{} repo={}".format(count, total, repo_name))
            issue_comments = get_comments(comments_url)
            utils.write_to_json_file(output_file_name, issue_comments)
        except Exception as e:
            print("ERROR: Failed getting comments for issue={}".format(
                issue['id']))
    print("Done")
Пример #2
0
def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    if LIBRARIES_IO_ACCESS_TOKEN is None:
        raise Exception("No LIBRARIES_IO_ACCESS_TOKEN")
    entity_to_collect_for = utils.read_csv_ignore_headers(
        INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES)
    total = len(entity_to_collect_for)
    count = 0
    for x in entity_to_collect_for:
        try:
            repo_half_name = x['repo_name']
            count += 1
            print("\t{}/{} repo={}".format(count, total, repo_half_name))
            repo_full_name = get_repo_full_name_from_libraries_io(
                repo_half_name)

            output_file_name = "{}/{}.json".format(
                OUTPUT_FOLDER_PATH, repo_full_name.replace('/', '@'))
            if utils.file_or_read_file_already_exists(output_file_name):
                print("File Exists - Continuing")
                continue
            dependents = get_dependents(repo_full_name)
            utils.write_to_json_file(output_file_name, dependents)
        except Exception as e:
            print("ERROR: Failed for repo={}".format(x['repo_name']))
    print("Done")
Пример #3
0
 def download(source):
     res_factory = utils.BlueprintResourceFactory()
     resource_base_dir = res_factory._get_resources_dir(TEST_SERVICE_NAME)
     resource_path = os.path.join(resource_base_dir, 'tmp-res-name')
     utils.mkdir(resource_base_dir)
     utils.write_to_json_file('port: 8080', resource_path)
     return resource_path
def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    commits_to_collect = utils.read_csv_ignore_headers(
        INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES)
    print("Getting all comments for issues...")
    total = len(commits_to_collect)
    count = 0
    for c in commits_to_collect:
        try:
            issue_id = c['issue_id']
            repo_name = c['repo_name']
            sha = c['commit_id']
            count += 1
            print("\t{}/{} sha={}".format(count, total, sha))
            output_file_name = "{}/commit@{}@{}.json".format(
                OUTPUT_FOLDER_PATH, sha, repo_name.replace('/', '@'))
            if utils.file_or_read_file_already_exists(output_file_name):
                print("File Exists - Continuing")
                continue
            commit_url = f'https://api.github.com/repos/{repo_name}/commits/{sha}'
            commit_json = get_commit_json(commit_url)
            utils.write_to_json_file(output_file_name, commit_json)
        except Exception as e:
            print("ERROR: Failed getting comments for issue={}".format(
                c['issue_id']))
    print("Done")
 def download(source):
     resource_base_dir = utils.resource_factory.get_resources_dir(
             TEST_SERVICE_NAME)
     resource_path = os.path.join(resource_base_dir, 'tmp-res-name')
     utils.mkdir(resource_base_dir)
     utils.write_to_json_file('port: 8080', resource_path)
     return resource_path
Пример #6
0
def resample_train_test(data_path: str, lengths: [int, int]) -> [str, str]:
    with open(data_path, mode='r', encoding='utf-8') as fp:
        ppdb_pairs = json.load(fp)
        ppdb_size = len(ppdb_pairs)
        print(f"load {ppdb_size} ppdb pairs")
    train_size, test_size = lengths[0], lengths[1]
    train_pairs = list(np.random.choice(ppdb_pairs, train_size, replace=False))
    test_pairs = list(np.random.choice(ppdb_pairs, test_size, replace=True))
    P = Path(data_path)
    train_path = os.path.join(P.parent, "ppdb_train")
    test_path = os.path.join(P.parent, "ppdb_test")
    write_to_json_file(train_path, train_pairs)
    write_to_json_file(test_path, test_pairs)
    return train_path, test_path
Пример #7
0
def main():
    parser = argparse.ArgumentParser(description="Program to build docker images")
    parser.add_argument("--buildspec", type=str)
    parser.add_argument("--framework", type=str)
    parser.add_argument("--device_types", type=str, default=constants.ALL)
    parser.add_argument("--image_types", type=str, default=constants.ALL)
    parser.add_argument("--py_versions", type=str, default=constants.ALL)

    args = parser.parse_args()

    device_types = args.device_types.split(",") if not args.device_types == constants.ALL else args.device_types
    image_types = args.image_types.split(",") if not args.image_types == constants.ALL else args.image_types
    py_versions = args.py_versions.split(",") if not args.py_versions == constants.ALL else args.py_versions
    # create the empty json file for images
    build_context = os.getenv("BUILD_CONTEXT")
    ei_dedicated = os.getenv("EIA_DEDICATED") == "True"
    neuron_dedicated = os.getenv("NEURON_DEDICATED") == "True"

    # Get config value options
    frameworks_to_skip = parse_dlc_developer_configs("build", "skip_frameworks")
    ei_build_mode = parse_dlc_developer_configs("dev", "ei_mode")
    neuron_build_mode = parse_dlc_developer_configs("dev", "neuron_mode")

    # A general will work if in non-EI and non-NEURON mode and its framework not been disabled
    general_builder_enabled = (
        not ei_dedicated
        and not neuron_dedicated
        and not ei_build_mode
        and not neuron_build_mode
        and args.framework not in frameworks_to_skip
    )
    # An EI dedicated builder will work if in EI mode and its framework not been disabled
    ei_builder_enabled = (
        ei_dedicated and ei_build_mode and args.framework not in frameworks_to_skip
    )

    # A NEURON dedicated builder will work if in NEURON mode and its framework has not been disabled
    neuron_builder_enabled = (
        neuron_dedicated
        and neuron_build_mode
        and args.framework not in frameworks_to_skip
    )

    utils.write_to_json_file(constants.TEST_TYPE_IMAGES_PATH, {})
    # A builder will always work if it is in non-PR context
    if general_builder_enabled or ei_builder_enabled or neuron_builder_enabled or build_context != "PR":
        utils.build_setup(
            args.framework, device_types=device_types, image_types=image_types, py_versions=py_versions,
        )
        image_builder(args.buildspec)
def parse_blog(path):
    if is_file_exist(path + '/linked_papers.json'):
        return
    if not is_file_exist(path + '/urls.json'):
        return

    with open(path + '/urls.json') as f:
        urls = json.load(f)

    output = []
    dir_path = path + '/linked_papers.json'

    for url in urls:
        output.append(crawl_paper_links(url))
        write_to_json_file(dir_path, output)
Пример #9
0
def main():
    gk_issues = utils.read_csv_ignore_headers(GREENKEEPER_ISSUES_FILE_PATH, GREENKEEPER_ISSUES_FIELD_NAMES)
    library_names = list()
    for issue in gk_issues:
        if issue['issue_dependency_name'] not in library_names:
            library_names.append(issue['issue_dependency_name'])
    total = len(library_names)
    count = 0
    for lib_name in library_names:
        count += 1
        try:
            print("\t{}/{} pr_url={}".format(count, total, lib_name))
            if not lib_name:
                continue
            lib_name_for_url = lib_name.replace('/', '%2F')
            url = 'https://libraries.io/api/NPM/{}?api_key={}'.format(lib_name_for_url, LIBRARIES_IO_ACCESS_TOKEN)
            lib_info = utils.send_request(url, None, ignore_token=True, sleep_time=1.2)
            lib_name_for_file = lib_name.replace('/', '%2F')
            file_name = "{}/{}.json".format(OUTPUT_FOLDER_PATH, lib_name_for_file)
            utils.write_to_json_file(file_name, lib_info)
        except Exception as e:
            print("Error on {}".format(lib_name))
def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    repos = utils.read_csv_ignore_headers(REPOS_FILE_PATH, REPOs_FILED_NAMES)
    print("Getting all Issues for repos...")
    total = len(repos)
    count = 0
    for repo in repos:
        try:
            repo_name = repo['dependent']
            count += 1
            print("\t{}/{} repo={}".format(count, total, repo_name))
            output_file_name = "{}/issues@{}.json".format(OUTPUT_FOLDER_PATH, repo_name.replace('/', '@'))
            if utils.file_or_read_file_already_exists(output_file_name):
                print("File Exists - Continuing -- repo={}".format(repo_name))
                continue
            repo_issues = get_issues(repo_name)
            utils.write_to_json_file(output_file_name, repo_issues)
        except Exception as e:
            print("ERROR: Failed getting issues for repo={}".format(repo['repo']))
    print("Done")
Пример #11
0
        task_name: create_task(
            task_name, args, datasets[task_name]["nclasses"], emb_layer
        )
        for task_name in args.task
    }

    model = EmmentalModel(name="TC_task")

    if Meta.config["model_config"]["model_path"]:
        model.load(Meta.config["model_config"]["model_path"])
    else:
        for task_name, task in tasks.items():
            model.add_task(task)

    emmental_learner = EmmentalLearner()
    emmental_learner.learn(model, dataloaders)

    scores = model.score(dataloaders)
    logger.info(f"Metrics: {scores}")
    write_to_json_file(f"{Meta.log_path}/metrics.txt", scores)

    if args.checkpointing:
        logger.info(
            f"Best metrics: "
            f"{emmental_learner.logging_manager.checkpointer.best_metric_dict}"
        )
        write_to_file(
            f"{Meta.log_path}/best_metrics.txt",
            emmental_learner.logging_manager.checkpointer.best_metric_dict,
        )
Пример #12
0
    config.read('config.ini')

    researchers_to_bows = []
    papers = []

    for author in os.listdir(papers_DIR):
        papers_sub_dir = os.path.join(papers_DIR, author, "")
        if os.path.isdir(papers_sub_dir) and os.listdir(papers_sub_dir):
            bow_sub_DIR = os.path.join(bow_DIR, author, "")
            all_texts = prepare_bow_content(papers_sub_dir, bow_sub_DIR)

            # Persist to intermediate file under subfolder
            outfile_location = os.path.join(bow_sub_DIR, "_.json")
            content = {author: ' '.join(all_texts)}

            utils.write_to_json_file(file_location=outfile_location,
                                     data=content)

            # Add to master list
            researchers_to_bows.append({
                'researcher': author,
                'bow_content': ' '.join(all_texts)
            })
            papers.extend(all_texts)

    RESET_BOW = config['PREPROCESSING'].getboolean('RESET_BOW')

    try:
        if RESET_BOW:
            data = []
        else:
            data = utils.read_json_file(master_output_file)
Пример #13
0
def save_abilities(file_name="abilities.json"):
    new_abilities_dict = {
        'values': ability_values,
        'variable_values': variable_ability_values.keys()
    }
    write_to_json_file(new_abilities_dict, 'abilities.json')
Пример #14
0
def train(dataset,
          classifier, 
          encoder,
          featurizer,
          path = "./model_checkpoint",
          epochs=100, 
          lr=0.01,
          batch_size=1024,
          ):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using {device} for training")
    # Construct PyTorch DataLoader
    train_test_split = [int(len(dataset) * 0.8), len(dataset) - int(len(dataset) * 0.8)]
    train_set, val_set = torch.utils.data.random_split(dataset, train_test_split)

    train_dataloader = DataLoader(train_set, 
        sampler=RandomSampler(train_set), 
        batch_size=batch_size,
        num_workers = 4)

    eval_dataloader = DataLoader(val_set, 
        sampler=RandomSampler(val_set),
        batch_size=batch_size,
        num_workers = 4)

    loss_function = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    total_step = len(train_dataloader)
    classifier.to(device)
    encoder.to(device)
    classifier.train()

    # Setup train loss, eval loss tracking every epoch
    train_loss = []
    file_name_head = f"{type(encoder).__name__}-{featurizer.__name__}-{type(classifier).__name__}"
    # eval_loss = [] 
    for epoch in trange(epochs, desc='Epochs'):
        tr_loss = 0.
        nb_tr_examples, nb_tr_steps = 0, 0
        print(f"Start Training Epoch {epoch}")
        for step,  (X, X_mask, labels) in enumerate(tqdm(train_dataloader, desc="Iteration")):
            X = X.to(device)
            X_mask = X_mask.to(device)
            # BERT Encoder
            with torch.no_grad():
                final_hidden_states, cls_output = encoder(X, attention_mask = X_mask)

            inputs = featurizer(final_hidden_states) # cls_token

            inputs = inputs.to(device)
            labels = labels.to(device)
            classifier.zero_grad()
            outputs = classifier(inputs)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
            tr_loss += loss.item()
            nb_tr_examples += inputs.size(0)
            nb_tr_steps += 1
            train_loss.append((loss.item(), nb_tr_steps))   

        logger.info('Total loss at epoch %d: %.5f' % (epoch+1, tr_loss))
        logger.info('Avrg  loss at epoch %d: %.5f' % (epoch+1, tr_loss / nb_tr_examples))
        
        # Evaluate the model f-1
        start = time.time()
        print("Testing {}".format(epoch))
        f1_test, acc_test = test(eval_dataloader, classifier, featurizer, encoder, device)
        f1_train, acc_train = test(train_dataloader, classifier, featurizer, encoder, device)
        logger.info('[F1, Accuracy] score at epoch %d | train: (%.5f, %.5f) | test: (%.5f, %.5f)' \
            % (epoch+1, f1_test, f1_train, acc_test, acc_train))
        end = time.time()
        if epoch == 0: print(f"Test cost {end-start}")

        if epoch % 1 == 0:
            # Save Model Checkpoint
            create_directory(path)
            torch.save(model.state_dict(), os.path.join(
                path, f"{file_name_head}-{epoch+1}"))
    # Write train loss per step      
    write_to_json_file(os.path.join(path,
        f"{file_name_head}_train_loss_per_epoch"), train_loss)
Пример #15
0
def main():
    parser = argparse.ArgumentParser(
        description="Program to build docker images")
    parser.add_argument("--buildspec", type=str)
    parser.add_argument("--framework", type=str)
    parser.add_argument("--device_types", type=str, default=constants.ALL)
    parser.add_argument("--image_types", type=str, default=constants.ALL)
    parser.add_argument("--py_versions", type=str, default=constants.ALL)

    args = parser.parse_args()

    device_types = args.device_types.split(
        ",") if not args.device_types == constants.ALL else args.device_types
    image_types = args.image_types.split(
        ",") if not args.image_types == constants.ALL else args.image_types
    py_versions = args.py_versions.split(
        ",") if not args.py_versions == constants.ALL else args.py_versions
    # create the empty json file for images
    build_context = os.getenv("BUILD_CONTEXT")
    ei_dedicated = os.getenv("EIA_DEDICATED", "false").lower() == "true"
    neuron_dedicated = os.getenv("NEURON_DEDICATED", "false").lower() == "true"
    graviton_dedicated = os.getenv("GRAVITON_DEDICATED",
                                   "false").lower() == "true"
    habana_dedicated = os.getenv("HABANA_DEDICATED", "false").lower() == "true"

    # Get config value options
    frameworks_to_skip = parse_dlc_developer_configs("build",
                                                     "skip_frameworks")
    ei_build_mode = parse_dlc_developer_configs("dev", "ei_mode")
    neuron_build_mode = parse_dlc_developer_configs("dev", "neuron_mode")
    graviton_build_mode = parse_dlc_developer_configs("dev", "graviton_mode")
    habana_build_mode = parse_dlc_developer_configs("dev", "habana_mode")

    # Write empty dict to JSON file, so subsequent buildspec steps do not fail in case we skip this build
    utils.write_to_json_file(constants.TEST_TYPE_IMAGES_PATH, {})

    # Skip tensorflow-1 PR jobs, as there are no longer patch releases being added for TF1
    # Purposefully not including this in developer config to make this difficult to enable
    # TODO: Remove when we remove these jobs completely
    build_arn = utils.get_codebuild_build_arn()
    if build_context == "PR":
        tf_1_build_regex = re.compile(r"dlc-pr-tensorflow-1:")
        if tf_1_build_regex.search(build_arn):
            return

    # A general will work if in non-EI, non-NEURON and non-GRAVITON mode and its framework not been disabled
    general_builder_enabled = (not ei_dedicated and not neuron_dedicated
                               and not graviton_dedicated
                               and not habana_dedicated and not ei_build_mode
                               and not neuron_build_mode
                               and not graviton_build_mode
                               and not habana_build_mode
                               and args.framework not in frameworks_to_skip)
    # An EI dedicated builder will work if in EI mode and its framework not been disabled
    ei_builder_enabled = ei_dedicated and ei_build_mode and args.framework not in frameworks_to_skip

    # A NEURON dedicated builder will work if in NEURON mode and its framework has not been disabled
    neuron_builder_enabled = neuron_dedicated and neuron_build_mode and args.framework not in frameworks_to_skip

    # A GRAVITON dedicated builder will work if in GRAVITON mode and its framework has not been disabled
    graviton_builder_enabled = graviton_dedicated and graviton_build_mode and args.framework not in frameworks_to_skip

    # A HABANA dedicated builder will work if in HABANA mode and its framework has not been disabled
    habana_builder_enabled = habana_dedicated and habana_build_mode and args.framework not in frameworks_to_skip

    # A builder will always work if it is in non-PR context
    if (general_builder_enabled or ei_builder_enabled or neuron_builder_enabled
            or graviton_builder_enabled or habana_builder_enabled
            or build_context != "PR"):
        utils.build_setup(
            args.framework,
            device_types=device_types,
            image_types=image_types,
            py_versions=py_versions,
        )
        image_builder(args.buildspec)
Пример #16
0
def save_abilities(file_name="abilities.json"):
    new_abilities_dict = {
        'values': ability_values,
        'variable_values': variable_ability_values.keys()
    }
    write_to_json_file(new_abilities_dict, 'abilities.json')
Пример #17
0
    device_types = (args.device_types.split(",")
                    if not args.device_types == constants.ALL else
                    args.device_types)
    image_types = (args.image_types.split(",") if
                   not args.image_types == constants.ALL else args.image_types)
    py_versions = (args.py_versions.split(",") if
                   not args.py_versions == constants.ALL else args.py_versions)
    # create the empty json file for images
    build_context = os.getenv("BUILD_CONTEXT")
    ei_dedicated = os.getenv("EIA_DEDICATED") == "True"

    # A general/non-EI builder will work if in non-EI mode and its framework not been disabled
    non_ei_builder_enabled = not ei_dedicated and \
                             not build_config.ENABLE_EI_MODE and \
                             args.framework not in build_config.DISABLE_FRAMEWORK_TESTS
    # An EI dedicated builder will work if in EI mode and its framework not been disabled
    ei_builder_enabled = ei_dedicated and \
                         build_config.ENABLE_EI_MODE and \
                         args.framework not in build_config.DISABLE_FRAMEWORK_TESTS

    utils.write_to_json_file(constants.TEST_TYPE_IMAGES_PATH, {})
    # A builder will always work if it is in non-PR context
    if non_ei_builder_enabled or ei_builder_enabled or build_context != "PR":
        utils.build_setup(
            args.framework,
            device_types=device_types,
            image_types=image_types,
            py_versions=py_versions,
        )
        image_builder(args.buildspec)
    stream_of_docs = [
        paper for x in researchers_to_bow for paper in x["bow_content"]
    ]  # Flatten a list of list
    tokenized_docs, bigram_model = bow_phrases.text_preprocess_with_phrases(
        stream_of_docs)
    pickle.dump(bigram_model, open("bigram_model.p", "wb"))

    papers = [' '.join(x) for x in tokenized_docs]

    unflattened_docs = split_list_by_indices(tokenized_docs, num_publications)

    authors = [x['researcher'] for x in researchers_to_bow]

    researchers_to_bow_with_phrases = []
    for author, text_tokens in zip(authors, unflattened_docs):
        aggregated_texts = ' '.join(
            [word for paper in text_tokens for word in paper])
        researchers_to_bow_with_phrases.append({
            'researcher': author,
            'bow_content': aggregated_texts
        })
        utils.write_to_file(
            os.path.join(data_directory, author, "_with_phrases.all"),
            aggregated_texts)

    utils.write_to_json_file(file_location="data_with_phrases.json",
                             data=researchers_to_bow_with_phrases)

    utils.write_to_json_file(file_location="papers_with_phrases.json",
                             data=papers)
def serialized_train(dataset,
          classifier, 
          encoder,
          featurizer,
          path = "./model_checkpoint",
          epochs=100, 
          lr=0.01,
          batch_size=8,
          ):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using {device} for training on serialized data")
    # Construct PyTorch DataLoader
    train_test_split = [int(len(dataset) * 0.8), len(dataset) - int(len(dataset) * 0.8)]
    train_set, val_set = torch.utils.data.random_split(dataset, train_test_split)

    train_dataloader = DataLoader(train_set, 
        sampler=RandomSampler(train_set), 
        batch_size=batch_size,
        num_workers = 1)

    eval_dataloader = DataLoader(val_set, 
        sampler=RandomSampler(val_set),
        batch_size=batch_size,
        num_workers = 1)

    loss_function = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    total_step = len(train_dataloader)
    classifier.to(device)
    classifier.train()

    # Setup train loss, eval loss tracking every epoch
    train_loss = []
    file_name_head = f"{encoder}-{featurizer}-{type(classifier).__name__}"
    # eval_loss = [] 
    train_f1_monitor, test_f1_monitor = [], []
    for epoch in trange(epochs, desc='Epochs'):
        tr_loss = 0.
        nb_tr_examples, nb_tr_steps = 0, 0
        print(f"Start Training Epoch {epoch}")
        for step,  (inputs, labels) in enumerate(tqdm(train_dataloader, desc="Iteration")):
            inputs = inputs.to(device)
            labels = labels.to(device)
            classifier.zero_grad()
            outputs = classifier(inputs)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
            tr_loss += loss.item()
            nb_tr_examples += inputs.size(0)
            nb_tr_steps += 1
            train_loss.append((loss.item(), nb_tr_steps))   

        logger.info('Total loss at epoch %d: %.5f' % (epoch+1, tr_loss))
        logger.info('Avrg  loss at epoch %d: %.5f' % (epoch+1, tr_loss / nb_tr_examples))
        
        # Evaluate the model f-1
        start = time.time()
        f1_test, acc_test = test(eval_dataloader, classifier, device)
        f1_train, acc_train = test(train_dataloader, classifier, device)
        train_f1_monitor.append(f1_train)
        test_f1_monitor.append(f1_test)
        logger.info('[F1, Accuracy] score at epoch %d | train: (%.5f, %.5f) | test: (%.5f, %.5f)' \
            % (epoch+1, f1_test, f1_train, acc_test, acc_train))
        end = time.time()
        if epoch == 0: print(f"Test cost {end-start}")

        if epoch % 1 == 0:
            # Save Model Checkpoint
            create_directory(path)
            torch.save(model.state_dict(), os.path.join(
                path, f"{file_name_head}-{epoch+1}"))
        if (epoch+1) % 10 == 0: adjust_learning_rate(optimizer, lr*0.1) # every 10 epochs reduce lr by factor of 10

        early_stop = early_stopping_monitor({'val_f1': test_f1_monitor, 'train_f1': train_f1_monitor})
        if early_stop:
            break
    # Write train loss per step      
    write_to_json_file(os.path.join(path,
        f"{file_name_head}_train_loss_per_epoch"), train_loss)
Пример #20
0
from card import Card, save_abilities
from utils import write_to_json_file, read_from_json_file

if __name__ == '__main__':
    spec_list = None

    spec_list = read_from_json_file('cards.json')

    cards = [Card(**card_spec) for card_spec in spec_list]

    for card in cards:
        print str(card)

    save_abilities()

    card_list = [card.serialize() for card in cards]

    write_to_json_file(card_list, 'cards.json')
Пример #21
0
                                                    finale_population_size=number_of_birds,
                                                    percentage_for_parenting=percentage_for_parenting)

        pipe_surface = pygame.image.load("assets/pipe-green.png").convert()
        pipe_surface = pygame.transform.scale2x(pipe_surface)
        pipe_list = []

        pipe_height = [i for i in range(400, 850, 50)]

        while True:
            if stop_evolution_flag:
                for index in range(len(bird_chromosomes)):
                    if active_birds[index]:
                        bird_chromosomes[index].complete_training(score)
                bird_chromosomes.sort(reverse=True)
                write_to_json_file([bird_chromosomes[i].to_dict() for i in range(number_of_birds)])
                break
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    sys.exit()

                if event.type == SPAWN_PIPE:
                    pipe_list.extend(create_pipe())
                    if len(pipe_list) > 8:
                        del pipe_list[0]
                        del pipe_list[0]
                if event.type in FLY:
                    ind = event.type - FLY[0]
                    bird_movement[ind] = 0
                    bird_movement[ind] -= up_velocity
def eval_genomes(genomes, config):
    global current_generation, high_score, game_active, bird_movement, dt, floor_x, get_ticks_last_frame, pipe_list, score
    current_generation += 1

    # start by creating lists holding the genome itself, the
    # neural network associated with the genome and the
    # bird object that uses that network to play
    nets = []
    bird_cromoshomes = []
    ge = []
    for genome_id, genome in genomes:
        genome.fitness = 0  # start with fitness level of 0
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        nets.append(net)
        bird_cromoshomes.append(Chromosome(NeuralBird()))
        ge.append(genome)

    print("CURRENT GENERATION: {}".format(current_generation))
    bird_rects = [
        bird_surface.get_rect(center=(100, 512))
        for _ in range(number_of_birds)
    ]
    active_birds = [True] * number_of_birds
    pipe_list = []
    score = 0

    # pygame.time.set_timer(FLY, 850)

    # bird_cromoshomes = [Chromosome(NeuralBird()) for _ in range(number_of_birds)]
    # bird_cromoshomes = Chromosome.read_from_file("training.json", population_size=number_of_birds)

    # bird_cromoshomes = one_generation_evolution(bird_cromoshomes,
    #                                             crossover_probability=crossover_probability,
    #                                             mutation_probability=mutation_probability,
    #                                             finale_population_size=number_of_birds,
    #                                             percentage_for_parenting=percentage_for_parenting)

    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()

            if event.type == SPAWNPIPE:
                pipe_list.extend(create_pipe())
                if len(pipe_list) > 8:
                    del pipe_list[0]
                    del pipe_list[0]
            if event.type in FLY:
                ind = event.type - FLY[0]
                bird_movement[ind] = 0
                bird_movement[ind] -= up_velocity * dt
        screen.blit(background_sf, (0, 0))

        if game_active:
            # Bird
            bird_movement = list(map(lambda x: x + gravity * dt,
                                     bird_movement))
            rotated_bird = [
                rotate_bird(bird_surface, i) for i in range(number_of_birds)
            ]
            for index in range(number_of_birds):
                if active_birds[index]:
                    bird_rects[index].centery += bird_movement[index]
                    screen.blit(rotated_bird[index], bird_rects[index])
                    # te uiti la coliziuni
                    if check_collision(pipe_list, bird_rects[index]):
                        active_birds[index] = False
                        ge[index].fitness -= 1
                        # nets.pop(index)
                        # ge.pop(index)
                        # bird_cromoshomes[index].complete_training(score)

                    distance = 100
                    pipe_up = 100
                    pipe_down = 100

                    # dai update la neuronii de input
                    for i in range(0, len(pipe_list), 2):
                        distance = pipe_list[i].bottomleft[0] - bird_rects[
                            index].bottomright[0]
                        pipe_down = abs(pipe_list[i].topright[1] -
                                        bird_rects[index].bottomleft[1])
                        pipe_up = abs(pipe_list[i + 1].bottomright[1] -
                                      bird_rects[index].topleft[1])
                        if distance > 0:
                            break

                    output = nets[index].activate(
                        (distance, pipe_down, pipe_up))

                    if output[
                            0] > 0.5:  # we use a tanh activation function so result will be between -1 and 1. if over 0.5 jump
                        pygame.event.post(fly_events[index])

                    # dai compute la noua valoare. Daca e True generezi event
                    # print("Bird {}: {}".format(index, bird_cromoshomes[index].bird.compute_output()))
                    if bird_cromoshomes[index].bird.compute_output():
                        pygame.event.post(fly_events[index])

            # cand mor toti faci gameActive = false
            game_active = any(active_birds)

            # Pipes
            pipe_list = move_pipes(pipe_list)
            draw_pipes(pipe_list)

            score += 0.01
            score_display(score, high_score)
        else:
            # alg genetic

            # salvare parametrii cel mai bun fitness

            # resetare populatie
            game_active = True

            bird_cromoshomes.sort(reverse=True)
            write_to_json_file([
                bird_cromoshomes[i].to_dict() for i in range(number_of_birds)
            ])
            if score > high_score:
                high_score = score
            print("game over")
            score = 0
            break
            pygame.quit()
            sys.exit()
        # Floor
        floor_x -= 1
        draw_floor()
        if floor_x <= -576:
            floor_x = 0

        pygame.display.update()
        t = pygame.time.get_ticks()
        dt = (t - get_ticks_last_frame) / 1000
        get_ticks_last_frame = t

        if score > 100:
            pickle.dump(nets[0], open("best.pickle", "wb"))
            break
Пример #23
0
from card import Card, save_abilities
from utils import write_to_json_file, read_from_json_file


if __name__ == '__main__':
    spec_list = None

    spec_list = read_from_json_file('cards.json')

    cards = [Card(**card_spec) for card_spec in spec_list]

    for card in cards:
        print str(card)

    save_abilities()

    card_list = [card.serialize() for card in cards]

    write_to_json_file(card_list, 'cards.json')
Пример #24
0
    for paper_filename in glob.glob(author_dir + '*.pdf'):
        paper_title = os.path.basename(paper_filename).replace(".pdf", "")
        if not is_paper_seen(papers_collection, paper_title):
            papers_collection.append(paper_title)

            pdfPath = paper_filename
            print "pdfPath = " + pdfPath
            fileNameOut = dataset_DIR + "p" + str(index) + ".txt"
            print 'converting (%s %s -> %s)' % ("pdftotext", pdfPath,
                                                fileNameOut)
            try:
                subprocess.check_call(["pdftotext", pdfPath, fileNameOut])
                papers_id.append(index)
                index += 1
                # os.system(""" %s "%s" "%s" """ % ("pdftotext", pdfPath, fileNameOut))
            except subprocess.CalledProcessError:
                logging.error("Syntax Error: Couldn't parse pdf file. PDF: %s",
                              paper_title)
                # papers_id.remove(index)
                # index -= 1
        else:
            paper_id = papers_collection.index(paper_title)
            papers_id.append(paper_id)
            print "***** Paper " + paper_title + " already exists. Paperid=" + str(
                paper_id)

    author_papers_dict[author] = papers_id

utils.write_to_json_file("dataset/authors.json", author_papers_dict)