예제 #1
0
 def test_add_gap_1(self):
     profile = Profile(["agct", "ag-t"])
     profile.add_gap(0)
     self.assertEqual(GAP, profile.seqs[0][0], "Gap in seq1 not inserted")
     self.assertEqual(GAP, profile.seqs[1][0], "Gap in seq1 not inserted")
     self.assertEqual(5, len(profile.sp_scores),
                      "SP-Scores has not correct length")
     self.assertEqual(score_dna_match, profile.sp_scores[2],
                      "SP-Score not recomputed")
     self.assertEqual(5, len(profile), "Profile has not correct size")
예제 #2
0
def optimize_agent(seqs, configurations, print_console, print_graph,
                   create_agent_trainer):
    """
    Optimize one agent in different parametrization on the provided sequences from a benchmark
    :param seqs: sequences to optimize the agents on
    :param configurations: parameter configurations to be tested for optimality
    :param print_console: print log of training to the console
    :param print_graph: output the performance of each configurations as a graph
    :param create_agent_trainer: method to create the agent from the configurations and the according trainer
    :return: align-table with new alignments, optimal configuration and the according results (score, profile) and
        the scores of all other non-optimal configurations in the order they where tested
    """
    align_tables = {
        "SP": {
            "Refinement": HashAlignTable(Profile(seqs)),
            "Progressive": HashAlignTable(seqs)
        },
        "CS": {
            "Refinement": HashAlignTable(Profile(seqs)),
            "Progressive": HashAlignTable(seqs)
        }
    }
    best_config = None
    best_score = (0, (0, 0))
    best_profile = None
    profile_scores = []

    # try all given configurations to fine the best performing one
    for config in configurations:
        agent, agent_trainer = create_agent_trainer(seqs, config)

        # train the agent ...
        agent_trainer.set_align_table(
            align_tables[get_score(config)][get_c_type(config)])
        performance = agent_trainer.run(print_console, print_graph)
        align_tables[get_score(config)][get_c_type(
            config)] = agent_trainer.get_align_table()

        # ... and compute the final alignment
        (profile, permutation), _ = agent_trainer.evaluate_training()
        profile_scores.append(profile.score())

        # check for being better than the actual best configuration
        act_score = (profile.score()[config.score],
                     score_learning(*performance))
        if act_score > best_score:
            best_score = act_score
            best_config = config
            best_profile = profile
    return align_tables, best_config, best_score, best_profile, profile_scores
예제 #3
0
    def train(self, print_progress):
        """
        Performs the learning process.
        :return: the returns, losses and invalid action ratios computed during the training process
            (usable for analytical and optimization tasks)
        """
        episode_reward, episode_loss, episode_fails = 0, (0, 0), 0
        avg_rewards, avg_losses, avg_fails = [], [], []

        # play as many games as specified for the training
        for step in range(self.games):
            # print the progress the model made while learning
            if (step +
                    1) % self.plot_size == 0 or self.env.align_table.is_full():
                tmp_reward, tmp_loss, tmp_fail = self.print_progress(
                    print_progress, step, episode_reward, episode_loss,
                    episode_fails)
                avg_rewards.append(tmp_reward)
                avg_losses.append(tmp_loss)
                avg_fails.append(tmp_fail)
                episode_reward, episode_loss, episode_fails = 0, (0, 0), 0

                # if all alignments have been found exit
                if self.env.align_table.is_full():
                    if self.env.best_alignment == (Profile([]), None):
                        self.env.best_alignment = self.env.align_table.get_best(
                            self.score)
                    if print_progress:
                        print(
                            "Search exited. All alignments have been visited and optimality is guaranteed."
                        )
                    break

            game_reward, state, _, done = self.env.soft_reset()

            # play new game
            tmp_erb = []
            while not done:
                # compute the action, perform it in the environment and add all stats to the local replay-buffer
                action, value, log_prob = self.act(state, self.env.available)
                prev_state = np.array(state)
                game_reward, state, _, done = self.env.step(action)
                tmp_erb.append(
                    (linearize_state(prev_state, self.num_seqs), action,
                     game_reward[self.score],
                     linearize_state(state,
                                     self.num_seqs), done, value, log_prob))

            # update reward according to the received reward
            episode_reward += game_reward[self.score]

            if not self.refinement and len(state) != self.num_seqs:
                episode_fails += 1

            # learn from the played game
            episode_loss = [
                e + l for e, l in zip(episode_loss, self.learn(tmp_erb))
            ]
        return avg_rewards, avg_losses, avg_fails
예제 #4
0
def optimize_mcts_agent(seqs, configurations: List[MCTSTC], align_table=None):
    """
    Optimize the MCTS learning agent over the given configurations of hyperparameters
    :param seqs: sequences to use for alignment
    :param configurations: configurations to test while training
    :param align_table: table containing previously computed alignments
    :return: table containing the computed alignments and the results of the aligning
    """
    align_tables = {
        "SP": {
            "Refinement": HashAlignTable(Profile(seqs)),
            "Progressive": HashAlignTable(seqs)
        },
        "CS": {
            "Refinement": HashAlignTable(Profile(seqs)),
            "Progressive": HashAlignTable(seqs)
        }
    }
    best_config = None
    best_score = 0

    # try all defined configurations of hyperparameter to find the best performing one
    for config in configurations:
        agent = MCTSAgent(seqs,
                          simulations=config.simulations,
                          rollouts=config.rollouts,
                          c=config.c,
                          score=config.score,
                          refinement=config.refinement,
                          adjust=config.adjust)
        agent.set_align_table(
            align_tables[get_score(config)][get_c_type(config)])
        if config.refinement:
            pass
        else:
            # compute the alignment using UCT-MCTS to find the most promising sequence
            env = AlignmentWrapper(seqs, agent, config.score)
            score, _, _, done = env.run()
            align_tables[get_score(config)][get_c_type(
                config)] = agent.get_align_table()

            if score > best_score and done:
                best_score = score
                best_config = config
    return align_table, best_config, best_score
예제 #5
0
def run_regressions(logDirectoryPath):
    start = time.time()
    games = 0
    profile = Profile()
    profile.__enter__()
    # Look for all .log files in the directory
    files = sorted(Path(logDirectoryPath).iterdir(), key=os.path.getmtime)
    logfiles = [f for f in files if f.suffix == '.log']
    logfiles = logfiles[-30:]

    # For each log file, see if there is a csv file
    for file in logfiles:
        # If there is a matching csv file, read it for results
        log_path = logDirectoryPath + file.stem + file.suffix
        csv_path = logDirectoryPath + file.stem + ".csv"
        csv_exists = os.path.exists(csv_path)

        if csv_exists:
            known_results = read_game_results(csv_path)
            #with Profile():
            try:
                simulated_results = simulate_game_from_log(log_path)
                print(
                    f"{file.stem} results, {compare_results(known_results, simulated_results)}"
                )
            except Exception as e:
                print(f"{file.stem} exception {e}")

            games += 1
    end_time = time.time() - start
    print(
        f"Elapsed Time: {end_time}\nGames: {games}\nTime per game: {end_time / games}"
    )
    profile.__exit__()
예제 #6
0
def align_progressive(permutation, seqs, align_table=None, pw_perm=None, m_perm=None):
    """
    align the given sequences progressive using the specified permutation and the alignment-table
    :param permutation: permutation specifying the sequence how to align the sequences
    :param seqs: sequences to align
    :param align_table: hash-table to use in alignment to prevent computations of previous aligned sequences
    :param pw_perm: permutation to solve conflicts in the direction-matrix in the pairwise sequence alignment
    :param m_perm: permutation to solve conflicts in the direction-matrix in the sequence-profile alignment
    :return: profile of all aligned sequences
    """
    score_gap = score_dna_gap if seqs[0].islower() else score_protein_gap
    if len(permutation) < 2:
        return Profile([seqs[permutation[0]]])
    if pw_perm is None:
        pw_perm = [LEFT, DIAG, UP]
    if m_perm is None:
        m_perm = [LEFT, DIAG, UP]

    prof = Profile([])
    # find previous alignments...
    if align_table is not None:
        prof = align_table.get(permutation)[0]

    # otherwise initialize the alignment with the first two sequences
    if prof.size() == 0:
        prof, _ = align_seq_seq(seqs[permutation[0]], seqs[permutation[1]], False, pw_perm, score_gap_loc=score_gap)
        if align_table is not None:
            align_table.set(permutation[:2], prof)

    # align the remaining sequences in the given sequence
    start = max(2, prof.size())
    for i in range(start, len(permutation)):
        prof = align_prof_seq(seqs[permutation[i]], prof, m_perm, score_gap_loc=score_gap)
        if align_table is not None:
            align_table.set(permutation[:(i + 1)], prof)
    return prof
예제 #7
0
 def test_fuzzer(self, count=1):
     '''
     for _ in range(count):
         num_seqs = np.random.randint(1, 5)
         seqs_count = np.random.randint(num_seqs, 10)
         profile = Profile(get_sequences(length=10, count=num_seqs, different=True))
         permutation = random.sample(range(seqs_count), num_seqs)
         self.test_get(profile, permutation, )
         self.test_set(profile, permutation)
     '''
     sequences = ["ACGT", "AGT", "ACT"]
     permutation = [2, 0, 1]
     profile = Profile(["AC-T", "ACGT", "A-GT"])
     self.test_set(profile, permutation, sequences)
     self.test_get(profile, permutation, sequences)
예제 #8
0
    def get_best(self, score):
        """
        Get the best profile from the leafs of this tree/table
        :param score: score to use to find the best profile
        :return: best profile according to the specified score and its permutation
        """
        best, best_permutation = Profile([]), []

        for permutation in itertools.permutations(list(range(len(self.seqs)))):
            permutation = list(permutation)
            if len(best_permutation) == 0:
                best, changed = self.get(permutation)[0], True
                best_permutation = permutation
            else:
                (best, best_permutation), _ = compare_alignments(
                    (best, best_permutation),
                    (self.get(permutation)[0], permutation), score)
        return best, best_permutation
예제 #9
0
def read_drl_results(balibase_dir, results_dir, agent_name, df):
    """
    Insert results into the dataframe from the comparison of the reference tools
    :param balibase_dir: directory of the balibase database
    :param results_dir: directory of the result files from this tool
    :param agent_name: agent prefix inserted into the result files
    :param df: dataframe from the comparison
    :return: extended dataframe
    """
    # if necessary, insert the according columns
    if agent_name + "_q" not in df.columns:
        df[agent_name + "_q"] = 0
        df[agent_name + "_tc"] = 0
        df[agent_name + "_cs"] = 0
        df[agent_name + "_sp"] = 0
        df[agent_name + "_time"] = 0

    # iterate on all the files
    for file_name in sorted(
        [f for f in os.listdir(results_dir) if agent_name in f]):
        print(file_name)
        # preprocess the analysis
        key = "RV" + file_name[-9:-7] + "/" + file_name[-11:-4]
        optimal = os.path.join(balibase_dir, key + ".msf")
        write_fasta_seqs(read_msf(optimal), "~/ref_seqs.fa")
        copyfile(os.path.join(results_dir, file_name),
                 os.path.expanduser("~/output_seqs.fa"))

        # score the alignment
        sp, cs, _, _ = Profile(list(read_fasta_seqs().values())).score()
        q, tc = score_alignment()

        # insert the data into the  dataframe
        df.loc[df.name == key, agent_name + "_q"] = q
        df.loc[df.name == key, agent_name + "_tc"] = tc
        df.loc[df.name == key, agent_name + "_cs"] = cs
        df.loc[df.name == key, agent_name + "_sp"] = sp

    os.remove(os.path.expanduser("~/output_seqs.fa"))
    os.remove(os.path.expanduser("~/ref_seqs.fa"))

    return df
예제 #10
0
    def get(self, permutation, cutoff=2):
        """
        get the profile according to the permutation in the actual instance of the problem
        if original permutation not contained try sub-permutation iteratively
        by removing the last sequence from the permutation
        :param permutation: query-permutation
        :param cutoff: minimal number of sequences in permutations list, that should be searched for
        :return: empty profile if nothing found, otherwise return largest applicable profile
        """
        # linearize and compute the hash-value of the actual permutation
        state_hash = hash_state_fast(permutation, self.num_seqs)

        # search for permutation
        while state_hash not in self.table and len(permutation) > cutoff:
            # if not found, shorten the permutation
            permutation = permutation[:-1]
            state_hash = hash_state_fast(permutation, self.num_seqs)

        # return the according profile or the empty profile
        return (self.table[state_hash].to_profile(self.seqs), len(permutation)) \
            if state_hash in self.table else (Profile([]), 0)
예제 #11
0
def merge_profiles(profile1, profile2):
    """
    Merge two profiles that share the first sequence in common
    :param profile1: first profile to merge
    :param profile2: second profile to merge
    :return: merged profile
    """
    sequences = ["" for _ in range(profile1.size() + profile2.size() - 1)]
    col1, col2, j = 0, 0, 0
    while col1 < len(profile1) and col2 < len(profile2):
        p1_col = profile1[col1]
        p2_col = profile2[col2]

        # both leading sequences are equal
        if p1_col[0] == p2_col[0]:
            for i in range(len(p1_col)):
                sequences[i] += p1_col[i]
            for i in range(1, len(p2_col)):
                sequences[i + len(p1_col) - 1] += p2_col[i]
            col1 += 1
            col2 += 1

        # first leading sequence has additional gap
        elif p1_col[0] == GAP:
            for i in range(len(p1_col)):
                sequences[i] += p1_col[i]
            for i in range(1, len(p2_col)):
                sequences[i + len(p1_col) - 1] += GAP
            col1 += 1

        # second leading sequence has additional gap
        elif p2_col[0] == GAP:
            for i in range(len(p1_col)):
                sequences[i] += GAP
            for i in range(1, len(p2_col)):
                sequences[i + len(p1_col) - 1] += p2_col[i]
            col2 += 1
        i += 1

    return Profile(sequences)
예제 #12
0
 def __init__(self,
              sequences,
              simulations=0,
              rollouts=1,
              c=1,
              score=SP_SCORE,
              refinement=False,
              console=False,
              adjust=True):
     """
     initialize the agent
     :param sequences: sequences to align
     :param simulations: number of simulations to make before selecting an action
     :param rollouts: number of rollouts to perform in each simulation
     :param c: UCB-Parameter to balance exploration/exploitation
     :param score: score to optimize for
     :param refinement: flag indicating to train for refinement
     :param console: flag indicating commandline outputs
     """
     super().__init__(sequences, refinement)
     self.state = []
     self.children = []
     self.align_table = HashAlignTable(
         Profile(sequences) if self.refinement else sequences)
     self.simulations = self.num_seqs * 50 if simulations == 0 else simulations
     self.rollouts = rollouts
     self.c = c
     self.score = score
     self.steps = 0
     self.console = console
     self.adjust = adjust and score == SP_SCORE
     if self.adjust:
         self.min_score = self.estimate_min()
         self.max_score = center_star(self.sequences).score()[SP_SCORE]
         if self.max_score < 0:
             self.max_score /= 2
         else:
             self.max_score *= 2
예제 #13
0
                                 FiendishServant(),
                                 DragonspawnLieutenant(),
                                 DragonspawnLieutenant(),
                                 RedWhelp(),
                                 RedWhelp()
                             ])
player_board_1 = PlayerBoard(player_id=1,
                             hero=None,
                             life_total=12,
                             rank=4,
                             minions=[
                                 DragonspawnLieutenant(),
                                 Mecharoo(),
                                 FiendishServant(),
                                 DragonspawnLieutenant(),
                                 FiendishServant(),
                                 RighteousProtector(),
                                 RighteousProtector()
                             ])

simulation = Simulation(player_board=player_board_0,
                        opponent_board=player_board_1,
                        max_simulations=50)

# logging.DEBUG will show all steps in combat
logging.basicConfig(level=logging.DEBUG, format="%(message)s")

with Profile():
    print(simulation.simulate()
          )  # List of tuples with outcome and the frequency of that outcome
예제 #14
0
                                                length=6,
                                                different=True),
                        network_object=TinyREINFORCENetwork)
    pat = PolicyAgentTrainer(agent,
                             value_gamma=0.99,
                             value_alpha=0.8,
                             baseline=True)
    pat.run()

    # compute the resulting multiple sequence alignment
    (best_profile, best_permutation), _ = pat.evaluate_training()
    reward = best_profile.score()
    print(str(best_profile))
    print("Score:", reward[score], F"({best_permutation})")

    start = Profile(["ctattg", "ctaccg", "ctatgt"])
    print(start)
    print("Score:", start.score()[score])
    agent = PolicyAgent(sequences=start,
                        network_object=TinyREINFORCENetwork,
                        refinement=True)
    tat = PolicyAgentTrainer(agent,
                             value_alpha=0.01,
                             value_gamma=0.9,
                             epsilon_end=0.1,
                             refinement=True)
    tat.run(True, False)
    (best_ref_profile, best_ref_permutation), _ = tat.evaluate_training()
    print(str(best_ref_profile))
    reward = best_ref_profile.score()
    print("Score:", reward[score], F"({best_permutation})")
예제 #15
0
def multithread_agent_on_benchmarks(benchmark_ids, configurations, best,
                                    settings, data_file):
    """
    run a single agent configuration on all benchmarks in parallel
    :param benchmark_ids: benchmark ids
    :param configurations: configuration(s)
    :param best: best results in the actual optimization setting
    :param settings: settings of this search
    :param data_file: file to store all data about the computations
    :return: - a bool flag indicating that a new best alignments has been found
             - the actual comparison of agents on different benchmarks
             - the actual best in JSON format
    """
    global number
    print("train one agent on multiple benchmarks")

    # initialize the multithreading tools needed
    pool = ThreadPool(
        processes=cpu_count() if settings.Multi == 1 else settings.Multi)
    tasks = [None for _ in range(len(benchmark_ids))]

    # set empty variables to use for the statistics of the computation
    name, sequences, sequence_names, base_data = "", [], [], ()
    comparison = {
        "SP": {
            "Refinement": [],
            "Progressive": []
        },
        "CS": {
            "Refinement": [],
            "Progressive": []
        }
    }
    '''
    lists to store the essential data from the benchmarks per benchmark as they are not created in different disjoint 
    for-loops and therefore has to be conserved, this is done for the comparison and the best-marks
    '''
    names = [None for _ in range(len(benchmark_ids))]
    sequence_names = [None for _ in range(len(benchmark_ids))]
    base_dataset = [None for _ in range(len(benchmark_ids))]
    bench_comp = [None for _ in range(len(benchmark_ids))]
    bench_best = [None for _ in range(len(benchmark_ids))]
    config = configurations[0]
    changed = False

    for i, (b_id, agent_ids) in enumerate(benchmark_ids.items()):
        # initialize the files based on the actual benchmark
        names[i], sequences, sequence_names[i], base_dataset[i], bench_comp[i], bench_best[i] = \
            initialize_benchmark(b_id, best)

        # if the configuration is not to be used on this benchmark, insert into the statistics and continue with next
        if 0 not in agent_ids:
            number += 1
            if config.refinement:
                get_from_config(bench_comp[i],
                                config)[TABLE_AGENT + 1] = ("-", "-", "-", "-")
            else:
                get_from_config(bench_comp[i],
                                config)[TABLE_AGENT] = ("-", "-", "-", "-")
            continue

        # if indicated perform iterative refinement
        if config.refinement:
            if bench_best[i]["SP" if config.score ==
                             SP_SCORE else "CS"]["Progressive"][1] is None:
                print(
                    "WARNING: Cannot compute Refinement without basic profile of progressive alignment"
                )
                continue
            tasks[i] = pool.apply_async(
                run_agent,
                (config, bench_best[i][get_score(config)]["Refinement"][0],
                 HashAlignTable(Profile(sequences)), names[i],
                 bench_best[i][get_score(config)]["Refinement"][0].score(),
                 settings.Update, len(configurations), settings.Individual,
                 data_file))
        else:
            # else compute a progressive alignment
            tasks[i] = pool.apply_async(
                run_agent,
                (config, sequences, HashAlignTable(sequences), names[i],
                 bench_best[i][get_score(config)]["Progressive"][0].score(),
                 settings.Update, len(configurations), settings.Individual,
                 data_file))

    # iterate over the processes and collect the results
    for i in range(len(benchmark_ids.items())):
        if config.refinement:
            # update the statistics of refinement analysis
            message, scoring, _, profile, permutation = tasks[i].get()
            get_from_config(bench_comp[i], config)[TABLE_AGENT + 1] = scoring
            bench_best[i][get_score(config)]["Refinement"], change = \
                compare_alignments(bench_best[i][get_score(config)]["Refinement"],
                                   (profile, permutation, bench_best[i][get_score(config)]["Progressive"][1], config),
                                   config.score)
        else:
            # or the results of progressive alignments
            message, scoring, _, profile, permutation = tasks[i].get()
            get_from_config(bench_comp[i], config)[TABLE_AGENT] = scoring
            bench_best[i][get_score(config)]["Progressive"], change = \
                compare_alignments(bench_best[i][get_score(config)]["Progressive"], (profile, permutation, config),
                                   config.score)

        # update the global statistics
        comparison, best = update_comparison(names[i], base_dataset[i], best,
                                             comparison, bench_comp[i],
                                             bench_best[i])

        if settings.Folder is not None:
            profile.store(settings.Folder, i, config, names[i],
                          sequence_names[i], permutation)
        changed |= change

    return changed, comparison, best
예제 #16
0
def initialize_benchmark(b_id, best):
    """
    Initialize the execution of any agents on the benchmarks by creating necessary fields needed for search and training
    :param b_id: benchmark-id to train on
    :param best: best result from previous training runs on that benchmark
    :return: - the name of the benchmark used for evaluation tables at the end
             - the sequences of the benchmark as list of stings
             - the names according to the sequences in the same order ( = permutation [0,1,...,n-1,n])
             - basic data of the benchmark-sequences, namely type, count and average length
             - basic comparison of available results for this benchmark
             - best result from previous run on this benchmark
    """
    '''
    Things like comparison of different agents, best-marks on benchmarks and align-tables are stored in such nested
    HashMaps and are accessed and modified according to the actually used configuration and its scoring and aligning
    
    The benchmark_best contains the best results on a benchmark for each alignment setting. Such an refinement-tuple 
    consists of the Profile from the iterative aligning, the according iterative permutation, the permutation for the 
    starting alignment and the configuration that led to the optimal alignment
    '''
    comparison = {
        "SP": {
            "Refinement": {},
            "Progressive": {}
        },
        "CS": {
            "Refinement": {},
            "Progressive": {}
        }
    }
    benchmark_best = {
        "SP": {
            "Refinement": (Profile([]), None, None, None),
            "Progressive": (Profile([]), None, None)
        },
        "CS": {
            "Refinement": (Profile([]), None, None, None),
            "Progressive": (Profile([]), None, None)
        }
    }
    # if the benchmark is of this work and known because it is used while development, extract data from constants
    if isinstance(b_id, int):
        name, b, seqs_file = names[b_id], benchmarks[b_id], seq_files[b_id]
        sequences, sequence_names = read_fasta_data(seqs_file)

        # Insert the base-data for each setting, this is the left-hand side of the tables outputted at the end
        comparison["SP"]["Progressive"] = {
            RL: b[0:2],
            DRL: b[2:4],
            CLUSTALW: b[4:6],
            MAFFT: b[6:8],
            MUSCLE: b[8:10]
        }
        comparison["CS"]["Progressive"] = {
            RL: b[0:2],
            DRL: b[2:4],
            CLUSTALW: b[4:6],
            MAFFT: b[6:8],
            MUSCLE: b[8:10]
        }
        comparison["SP"]["Refinement"] = {
            RL: b[0:2],
            DRL: b[2:4],
            CLUSTALW: b[4:6],
            MAFFT: b[6:8],
            MUSCLE: b[8:10]
        }
        comparison["CS"]["Refinement"] = {
            RL: b[0:2],
            DRL: b[2:4],
            CLUSTALW: b[4:6],
            MAFFT: b[6:8],
            MUSCLE: b[8:10]
        }
        base_data = (b_id, types[b_id], sizes[b_id])
    # else read in the benchmark sequences and compute the base data
    else:
        name, (sequences, sequence_names
               ) = os.path.basename(b_id).split(".")[0], read_fasta_data(b_id)

        # again the base data as left-hand side of the output tables, but here with zeros as they are not performed
        comparison["SP"]["Progressive"] = {
            RL: (0, 0),
            DRL: (0, 0),
            CLUSTALW: (0, 0),
            MAFFT: (0, 0),
            MUSCLE: (0, 0)
        }
        comparison["CS"]["Progressive"] = {
            RL: (0, 0),
            DRL: (0, 0),
            CLUSTALW: (0, 0),
            MAFFT: (0, 0),
            MUSCLE: (0, 0)
        }
        comparison["SP"]["Refinement"] = {
            RL: (0, 0),
            DRL: (0, 0),
            CLUSTALW: (0, 0),
            MAFFT: (0, 0),
            MUSCLE: (0, 0)
        }
        comparison["CS"]["Refinement"] = {
            RL: (0, 0),
            DRL: (0, 0),
            CLUSTALW: (0, 0),
            MAFFT: (0, 0),
            MUSCLE: (0, 0)
        }
        base_data = (b_id, get_sequence_type(sequences),
                     get_sequence_size(sequences))
    '''
    Find and fill in the best alignments per optimization setting that can be found for the individual benchmark in the 
    store of best alignments. If it is not known, the (Profile([]), None, None) tuple remains
    '''
    if name in best["SP"]["Progressive"]:
        tmp = best["SP"]["Progressive"][name]
        benchmark_best["SP"]["Progressive"] = (align_progressive(
            tmp["Permutation"],
            sequences), tmp["Permutation"], from_dict(tmp["Configuration"]))
    if name in best["CS"]["Progressive"]:
        tmp = best["CS"]["Progressive"][name]
        benchmark_best["CS"]["Progressive"] = (align_progressive(
            tmp["Permutation"],
            sequences), tmp["Permutation"], from_dict(tmp["Configuration"]))
    if name in best["SP"]["Refinement"]:
        tmp = best["SP"]["Refinement"][name]
        benchmark_best["SP"]["Refinement"] = \
            (align_iterative(tmp["Permutation"], align_progressive(tmp["BasePermutation"], sequences)),
             tmp["Permutation"], tmp["BasePermutation"], from_dict(tmp["Configuration"]))
    if name in best["CS"]["Refinement"]:
        tmp = best["CS"]["Refinement"][name]
        benchmark_best["CS"]["Refinement"] = \
            (align_iterative(tmp["Permutation"], align_progressive(tmp["BasePermutation"], sequences)),
             tmp["Permutation"], tmp["BasePermutation"], from_dict(tmp["Configuration"]))
    '''
    insert the baseline (aka score of best progressive alignment) into the results-table to see from which value the 
    agent stared its alignment and to be able to argue on whether the alignment has been improved or not
    '''
    comparison["SP"]["Refinement"][TABLE_AGENT] = (
        *(benchmark_best["SP"]["Progressive"][0].score()[0:2]), 0, 0)
    comparison["CS"]["Refinement"][TABLE_AGENT] = (
        *(benchmark_best["CS"]["Progressive"][0].score()[0:2]), 0, 0)

    return name, sequences, sequence_names, base_data, comparison, benchmark_best
예제 #17
0
def align_prof_seq(seq, prof, perm=None, score_gap_loc=score_dna_gap):
    """
    align a sequence again an already aligned profile
    this code extends the Needleman-Wunsch-Algorithm (1970) and
    the review by Waterman (1976) to a profile-sequence alignment
    :param seq: sequence to align
    :param prof: profile to align
    :param perm: permutation to use to solve conflicts in the direction-matrix
    :param score_gap_loc: local scoring for a gap between the two sequences
    :return: profile of the profile-sequence alignment
    """
    if perm is None:
        perm = [LEFT, DIAG, UP]
    s_matrix = np.zeros((len(seq) + 1, len(prof) + 1))
    d_matrix = np.zeros((len(seq) + 1, len(prof) + 1), dtype='int32')

    # fill the edges of the matrices
    for i in range(1, len(seq) + 1):
        s_matrix[i, 0] = i * score_gap_loc
        d_matrix[i, 0] = 4
    for j in range(1, len(prof) + 1):
        s_matrix[0, j] = s_matrix[0, j - 1] + prof.align(GAP, j - 1)
        d_matrix[0, j] = 2

    # fill the matrix according to the algorithms
    for i in range(1, len(seq) + 1):
        for j in range(1, len(prof) + 1):
            # compute the possible scores for the alignment
            d = s_matrix[i - 1, j - 1] + prof.align(seq[i - 1], j - 1)
            l = s_matrix[i, j - 1] + prof.align(GAP, j - 1)
            u = s_matrix[i - 1, j] + score_gap_loc * prof.size()

            # fill the matrix with the maximal value
            s_matrix[i, j] = max(d, l, u)

            # fill the according value from the computation into the direction-matrix
            if s_matrix[i, j] == d:
                d_matrix[i, j] += DIAG
            if s_matrix[i, j] == u:
                d_matrix[i, j] += UP
            if s_matrix[i, j] == l:
                d_matrix[i, j] += LEFT

    aligned_seq = ""
    a_seq_index = len(seq)
    a_prof_index = len(prof)
    # Backtracking of the alignment through the Direction-Matrix and synchronously enlarging the sequences as needed
    while a_seq_index >= 0 and a_prof_index >= 0 and a_seq_index + a_prof_index != 0:
        for i in perm:
            # go diagonal in the alignment
            if i == DIAG and d_matrix[a_seq_index, a_prof_index] & DIAG != 0:
                a_seq_index -= 1
                a_prof_index -= 1
                aligned_seq = seq[a_seq_index] + aligned_seq
                break
            # go left in the alignment, i.e. insert a gap in the sequence
            if i == LEFT and d_matrix[a_seq_index, a_prof_index] & LEFT != 0:
                a_prof_index -= 1
                aligned_seq = "-" + aligned_seq
                break
            # go up in the alignment, i.e. insert a gap in the existing profile
            if i == UP and d_matrix[a_seq_index, a_prof_index] & UP != 0:
                a_seq_index -= 1
                aligned_seq = seq[a_seq_index] + aligned_seq
                prof.add_gap(a_prof_index)
                break
    # return the new computed profile of the two profiles
    return Profile(prof.seqs + [aligned_seq])
예제 #18
0
def run_balibase(balibase_dir, code, table, results_dir):
    """
    Compute the performance of the three different reference tools (CLUSTAL, MAFFT, MUSCLE)
    :param balibase_dir: base-directory of the BAliBASE benchmark directory
    :param code: file-code from file to test
    :param table: table to store alignment results in
    """
    codes = [
        "RV20/BB20001", "RV20/BB20020", "RV40/BB40010", "RV40/BB40014",
        "RV40/BB40018", "RV50/BB50004"
    ]
    if code not in codes:
        return
    print(code)

    # read the reference alignment and write it to the output
    base = os.path.join(balibase_dir, code)
    optimal = base + ".msf"
    input_file = base + ".tfa"
    input_seqs, _ = read_fasta_seqs(input_file)
    optimal_seqs = read_msf(optimal)
    ref_sp, ref_cs, _, _ = Profile(list(optimal_seqs.values())).score()
    write_fasta_seqs(optimal_seqs, "~/ref_seqs.fa")
    write_fasta_seqs(input_seqs)

    # perform the alignment using CLUSTAL
    start = time.time()
    os.system(run_clustal)
    clustal_time = time.time() - start
    clustal_sp, clustal_cs, _, _ = Profile(list(
        read_fasta_seqs().values())).score()
    clustal_q, clustal_ts = score_alignment()
    if results_dir is not None:
        os.rename(
            os.path.expanduser("~/output_seqs.fa"),
            os.path.join(results_dir, "CLUSTAL_" + code.split("/")[1] + ".fa"))
    else:
        os.remove(os.path.expanduser("~/output_seqs.fa"))
    print("CLUSTAL:", clustal_q, ",\t", clustal_ts, ",\t", clustal_cs, ",\t",
          clustal_sp)

    # perform the alignment using MAFFT
    start = time.time()
    os.system(run_mafft)
    mafft_time = time.time() - start
    mafft_sp, mafft_cs, _, _ = Profile(list(
        read_fasta_seqs().values())).score()
    mafft_q, mafft_ts = score_alignment()
    if results_dir is not None:
        os.rename(
            os.path.expanduser("~/output_seqs.fa"),
            os.path.join(results_dir, "MAFFT_" + code.split("/")[1] + ".fa"))
    else:
        os.remove(os.path.expanduser("~/output_seqs.fa"))
    print("MAFFT:\t", mafft_q, ",\t", mafft_ts, ",\t", mafft_cs, ",\t",
          mafft_sp)

    # perform the alignment using MUSCLE
    start = time.time()
    os.system(run_muscle)
    muscle_time = time.time() - start
    muscle_sp, muscle_cs, _, _ = Profile(list(
        read_fasta_seqs().values())).score()
    muscle_q, muscle_ts = score_alignment()
    if results_dir is not None:
        os.rename(
            os.path.expanduser("~/output_seqs.fa"),
            os.path.join(results_dir, "MUSCLE_" + code.split("/")[1] + ".fa"))
    else:
        os.remove(os.path.expanduser("~/output_seqs.fa"))
    print("MUSCLE:\t", muscle_q, ",\t", muscle_ts, ",\t", muscle_cs, ",\t",
          muscle_sp)

    # delete not necessary files and save the results in a table
    os.remove(os.path.expanduser("~/input_seqs.fa"))
    os.remove(os.path.expanduser("~/ref_seqs.fa"))
    if code not in table["name"]:
        table.loc[len(table)] = [
            code,
            len(optimal_seqs),
            get_sequence_size(optimal_seqs), ref_cs, ref_sp,
            compute_identity(balibase_dir,
                             code), clustal_q, clustal_ts, clustal_cs,
            clustal_sp, clustal_time, mafft_q, mafft_ts, mafft_cs, mafft_sp,
            mafft_time, muscle_q, muscle_ts, muscle_cs, muscle_sp, muscle_time
        ]
예제 #19
0
 def test_align_prof_seq(self):
     profile = Profile(["gc-gc-cc", "gccgcgcc"])
     seq = "gcgccc"
     profile = align_prof_seq(seq, profile, [LEFT, DIAG, UP])
     self.assertEqual(["gc-gc-cc", "gccgcgcc", "gc-gc-cc"],
                      profile.get_sequences())
예제 #20
0
def align_seq_seq(seq_a, seq_b, dna, perm=None, score_match_loc=score_dna_match, score_mismatch_loc=score_dna_mismatch,
                  score_gap_loc=score_dna_gap):
    """
    align a sequence again an already aligned profile
    this code follows the Needleman-Wunsch-Algorithm (1970) and the review by Waterman (1976)
    :param seq_a: first sequence to align
    :param seq_b: second sequence to align
    :param dna: flag indicating that the sequences has to ba handles as dna sequences
    :param perm: permutation to use to solve conflicts in the direction-matrix
    :param score_match_loc: local scoring for a match between two bases or nucleotides
    :param score_mismatch_loc: local scoring for a mismatch between two bases or nucleotides
    :param score_gap_loc: local scoring for a gap between the two sequences
    :return: profile of the profile-sequence alignment
    """
    if perm is None:
        perm = [LEFT, DIAG, UP]
    s_matrix = np.zeros((len(seq_a) + 1, len(seq_b) + 1))
    d_matrix = np.zeros((len(seq_a) + 1, len(seq_b) + 1), dtype='int32')

    dna = seq_a[0].islower() or dna

    # fill the edges of the matrices
    for i in range(1, len(seq_a) + 1):
        s_matrix[i, 0] = i * score_gap_loc
        d_matrix[i, 0] = UP
    for j in range(len(seq_b) + 1):
        s_matrix[0, j] = j * score_gap_loc
        d_matrix[0, j] = LEFT

    # fill the matrix according to the algorithms
    for i in range(1, len(seq_a) + 1):
        for j in range(1, len(seq_b) + 1):
            # compute the possible scores for the alignment
            d = s_matrix[i - 1, j - 1] + ((score_match_loc if seq_a[i - 1] == seq_b[j - 1] else score_mismatch_loc)
                                          if dna else score(seq_a[i - 1], seq_b[j - 1]))
            l = s_matrix[i, j - 1] + score_gap_loc
            u = s_matrix[i - 1, j] + score_gap_loc

            # fill the matrix with the maximal value
            s_matrix[i, j] = max(d, l, u)

            # fill the according value from the computation into the direction-matrix
            if s_matrix[i, j] == d:
                d_matrix[i, j] += DIAG
            if s_matrix[i, j] == l:
                d_matrix[i, j] += LEFT
            if s_matrix[i, j] == u:
                d_matrix[i, j] += UP

    seq_a_idx, seq_b_idx = len(seq_a), len(seq_b)
    aligned_a, aligned_b = "", ""
    # Backtracking of the alignment through the Direction-Matrix and synchronously enlarging the sequences as needed
    while seq_a_idx != 0 or seq_b_idx != 0:
        for p in perm:
            # go diagonal in the alignment
            if p == DIAG and d_matrix[seq_a_idx, seq_b_idx] & DIAG != 0:
                seq_a_idx -= 1
                seq_b_idx -= 1
                aligned_a = seq_a[seq_a_idx] + aligned_a
                aligned_b = seq_b[seq_b_idx] + aligned_b
                break
            # go left in the alignment, i.e. insert a gap in the sequence
            elif p == UP and d_matrix[seq_a_idx, seq_b_idx] & UP != 0:
                seq_a_idx -= 1
                aligned_a = seq_a[seq_a_idx] + aligned_a
                aligned_b = '-' + aligned_b
                break
            # go up in the alignment, i.e. insert a gap in the existing profile
            elif p == LEFT and d_matrix[seq_a_idx, seq_b_idx] & LEFT != 0:
                seq_b_idx -= 1
                aligned_a = '-' + aligned_a
                aligned_b = seq_b[seq_b_idx] + aligned_b
                break
    return Profile([aligned_a, aligned_b]), s_matrix[len(seq_a), len(seq_b)]
예제 #21
0
    print("########################################################")
    print("##Starting training of a Monte-Carlo Tree-Search agent##")
    print("########################################################")
    print()

    score = SP_SCORE

    seqs = get_sequences(count=3, length=6, different=True)
    agent = MCTSAgent(seqs, rollouts=2, adjust=True)
    env = AlignmentWrapper(seqs, agent, score)

    start = time.time()
    reward, permutation, profile, _ = env.run()
    end = time.time()

    print(str(profile))
    print("Score:", reward[score], F"({permutation})")
    print("Trainer ran for %.2f seconds" % (end - start))

    start = Profile(["ctattg", "ctaccg", "ctatgt"])
    agent = MCTSAgent(sequences=start, refinement=True)
    env = RefinementWrapper(start, agent, score)

    start = time.time()
    reward, permutation, profile, _ = env.run()
    end = time.time()

    print(str(profile))
    print("Score:", reward[score], F"({permutation})")
    print("Trainer ran for %.2f seconds" % (end - start))
예제 #22
0
def main():
    logPath = "C:/Users/scott/Desktop/hearthstone_games/Recorded_games/Power_game_15.log"
    #logPath = "C:/Program Files (x86)/Hearthstone/Logs/Power_old.log"

    #logging.basicConfig(level=logging.DEBUG, format="%(message)s")

    print("Reading Log: ", logPath)
    logreader = LogReader(logPath)
    turns = 0

    profile = Profile()
    profile.__enter__()

    while True:
        print(
            "------------------------------------------------------------------------"
        )
        board_state = logreader.watch_log_file_for_combat_state()

        if not board_state:
            print("\n*** Game Over ***")
            profile.__exit__()
            continue

        player_board_0 = PlayerBoard(
            player_id=0,
            hero=board_state.friendlyHero,
            life_total=board_state.friendlyPlayerHealth,
            rank=board_state.friendlyTechLevel,
            minions=board_state.friendlyBoard,
            enemy_is_deathwing=board_state.enemyHero is HeroType.DEATHWING)

        player_board_1 = PlayerBoard(
            player_id=1,
            hero=board_state.enemyHero,
            life_total=board_state.enemyPlayerHealth,
            rank=board_state.enemyTechLevel,
            minions=board_state.enemyBoard,
            enemy_is_deathwing=board_state.friendlyHero is HeroType.DEATHWING)

        print("Enemy board")
        print(player_board_1)

        print("Friendly board")
        print(player_board_0)

        # turns += 1
        # if turns < 9:
        #     continue

        try:
            single_threaded = True
            games = 10_000
            game_state = (player_board_0, player_board_1)
            pickled_state = pickle.dumps(game_state)

            if single_threaded:
                results = []
                start = time.time()
                for _ in range(games):
                    results.append(Simulator.Simulate(pickled_state))
            else:
                start = time.time()
                pool = Pool()
                results = pool.map(Simulator.Simulate,
                                   repeat(pickled_state, games))
                pool.close()
                pool.join()

            counter = Counter(results)
            results = sorted(counter.items(), key=lambda x: x[0])

            wins, losses, ties, enemy_lethal, friendly_lethal = 0.0, 0.0, 0.0, 0.0, 0.0
            for result in results:
                damage = result[0]
                game_count = result[1]

                if damage > 0:
                    wins += game_count
                    if damage > player_board_1.life_total:
                        enemy_lethal += game_count
                elif damage < 0:
                    losses += game_count
                    if (damage * -1) > player_board_0.life_total:
                        friendly_lethal += game_count
                else:
                    ties += game_count
            end = time.time()

            print("Win", 100 * wins / games, "Tie", 100 * ties / games, "Loss",
                  100 * losses / games, "Elapsed:", end - start)
            print("We kill enemy:", 100 * enemy_lethal / games,
                  "Enemy kills us:", 100 * friendly_lethal / games)
            print(
                "------------------------------------------------------------------------\n"
            )
        except Exception as e:
            print(e)