示例#1
0
    def __init__(self,
                 results,
                 threshold=None,
                 k=10,
                 solver="glpk",
                 verbosity=0):
        """
        :param result: Epitope prediction result object from which the epitope selection should be performed
        :type result: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        :param dict(str,float) threshold: A dictionary scoring the binding thresholds for each HLA
                                          :class:`~Fred2.Core.Allele.Allele` key = allele name; value = the threshold
        :param int k: The number of epitopes to select
        :param str solver: The solver to be used (default glpk)
        :param int verbosity: Integer defining whether additional debugg prints are made >0 => debug mode
        """

        #check input data
        if not isinstance(results, EpitopePredictionResult):
            raise ValueError(
                "first input parameter is not of type EpitopePredictionResult")

        _alleles = copy.deepcopy(results.columns.values.tolist())

        #test if allele prob is set, if not set allele prob uniform
        #if only partly set infer missing values (assuming uniformity of missing value)
        prob = []
        no_prob = []
        for a in _alleles:
            if a.prob is None:
                no_prob.append(a)
            else:
                prob.append(a)

        if len(no_prob) > 0:
            #group by locus
            no_prob_grouped = {}
            prob_grouped = {}
            for a in no_prob:
                no_prob_grouped.setdefault(a.locus, []).append(a)
            for a in prob:
                prob_grouped.setdefault(a.locus, []).append(a)

            for g, v in no_prob_grouped.items():
                total_loc_a = len(v)
                if g in prob_grouped:
                    remaining_mass = 1.0 - sum(a.prob for a in prob_grouped[g])
                    for a in v:
                        a.prob = remaining_mass / total_loc_a
                else:
                    for a in v:
                        a.prob = 1.0 / total_loc_a
        probs = {a.name: a.prob for a in _alleles}
        if verbosity:
            for a in _alleles:
                print(a.name, a.prob)

        #start constructing model
        self.__solver = SolverFactory(solver)
        self.__verbosity = verbosity
        self.__changed = True
        self.__alleleProb = _alleles
        self.__k = k
        self.__result = None
        self.__thresh = {} if threshold is None else threshold

        # Variable, Set and Parameter preparation
        alleles_I = {}
        variations = []
        epi_var = {}
        imm = {}
        peps = {}
        cons = {}

        #unstack multiindex df to get normal df based on first prediction method
        #and filter for binding epitopes
        method = results.index.values[0][1]
        res_df = results.xs(results.index.values[0][1], level="Method")
        res_df = res_df[res_df.apply(
            lambda x: any(x[a] > self.__thresh.get(a.name, -float("inf"))
                          for a in res_df.columns),
            axis=1)]

        for tup in res_df.itertuples():
            p = tup[0]
            seq = str(p)
            peps[seq] = p
            for a, s in itr.izip(res_df.columns, tup[1:]):
                if method in ["smm", "smmpmbec", "arb", "comblibsidney"]:
                    try:
                        thr = min(
                            1.,
                            max(
                                0.0, 1.0 -
                                math.log(self.__thresh.get(a.name), 50000))
                        ) if a.name in self.__thresh else -float("inf")
                    except:
                        thr = 0

                    if s >= thr:
                        alleles_I.setdefault(a.name, set()).add(seq)
                    imm[seq, a.name] = min(1.,
                                           max(0.0, 1.0 - math.log(s, 50000)))
                else:
                    if s > self.__thresh.get(a.name, -float("inf")):
                        alleles_I.setdefault(a.name, set()).add(seq)
                    imm[seq, a.name] = s

            prots = set(pr for pr in p.get_all_proteins())
            cons[seq] = len(prots)
            for prot in prots:
                variations.append(prot.gene_id)
                epi_var.setdefault(prot.gene_id, set()).add(seq)
        self.__peptideSet = peps

        #calculate conservation
        variations = set(variations)
        total = len(variations)
        for e, v in cons.items():
            try:
                cons[e] = v / total
            except ZeroDivisionError:
                cons[e] = 1
        model = ConcreteModel()

        #set definition
        model.Q = Set(initialize=variations)

        model.E = Set(initialize=set(peps.keys()))

        model.A = Set(initialize=list(alleles_I.keys()))
        model.E_var = Set(model.Q, initialize=lambda mode, v: epi_var[v])
        model.A_I = Set(model.A, initialize=lambda model, a: alleles_I[a])

        #parameter definition
        model.k = Param(initialize=self.__k,
                        within=PositiveIntegers,
                        mutable=True)
        model.p = Param(model.A, initialize=lambda model, a: probs[a])

        model.c = Param(model.E,
                        initialize=lambda model, e: cons[e],
                        mutable=True)

        #threshold parameters
        model.i = Param(model.E,
                        model.A,
                        initialize=lambda model, e, a: imm[e, a])
        model.t_allele = Param(initialize=0,
                               within=NonNegativeIntegers,
                               mutable=True)
        model.t_var = Param(initialize=0,
                            within=NonNegativeIntegers,
                            mutable=True)
        model.t_c = Param(initialize=0.0,
                          within=NonNegativeReals,
                          mutable=True)

        # Variable Definition
        model.x = Var(model.E, within=Binary)
        model.y = Var(model.A, within=Binary)
        model.z = Var(model.Q, within=Binary)

        # Objective definition
        model.Obj = Objective(
            rule=lambda model: sum(model.x[e] * sum(model.p[a] * model.i[e, a]
                                                    for a in model.A)
                                   for e in model.E),
            sense=maximize)

        #Obligatory Constraint (number of selected epitopes)
        model.NofSelectedEpitopesCov = Constraint(
            rule=lambda model: sum(model.x[e] for e in model.E) <= model.k)

        #optional constraints (in basic model they are disabled)
        model.IsAlleleCovConst = Constraint(
            model.A,
            rule=lambda model, a: sum(model.x[e]
                                      for e in model.A_I[a]) >= model.y[a])
        model.MinAlleleCovConst = Constraint(rule=lambda model: sum(
            model.y[a] for a in model.A) >= model.t_allele)

        model.IsAntigenCovConst = Constraint(
            model.Q,
            rule=lambda model, q: sum(model.x[e]
                                      for e in model.E_var[q]) >= model.z[q])
        model.MinAntigenCovConst = Constraint(
            rule=lambda model: sum(model.z[q] for q in model.Q) >= model.t_var)
        model.EpitopeConsConst = Constraint(
            model.E,
            rule=lambda model, e:
            (1 - model.c[e]) * model.x[e] <= 1 - model.t_c)

        #generate instance
        self.instance = model
        if self.__verbosity > 0:
            print("MODEL INSTANCE")
            self.instance.pprint()

        #constraints
        self.instance.IsAlleleCovConst.deactivate()
        self.instance.MinAlleleCovConst.deactivate()
        self.instance.IsAntigenCovConst.deactivate()
        self.instance.MinAntigenCovConst.deactivate()
        self.instance.EpitopeConsConst.deactivate()
    def __init__(self,
                 results,
                 threshold=None,
                 dist_threshold=1.0,
                 distance={},
                 expression={},
                 uncertainty={},
                 overlap=0,
                 k=10,
                 k_taa=0,
                 solver="glpk",
                 verbosity=0,
                 include=[]):
        """
        :param results: Epitope prediction result object from which the epitope selection should be performed
        :type results: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        :param dict(str,float) threshold: A dictionary scoring the binding thresholds for each HLA
                                          :class:`~Fred2.Core.Allele.Allele` key = allele name; value = the threshold
        :param float dist_threshold: Distance threshold: an epitope gets excluded if an epitope has dist-2-self score
                                    smaller or equal to this threshold for any HLA allele
        :param dict((str,str),float) distance: A dictionary with key: (peptide sequence, HLA name)
                                               and value the distance2self
        :param dict(str, float) expression: A dictionary with key: gene ID, and value: Gene expression
                                            in FPKM/RPKM or TPM
        :param dict((str,str),float) uncertainty: A dictionary with key (peptide seq, HLA name), and value the
                                                  associated uncertainty of the immunogenicity prediction
        :param int k: The number of epitopes to select
        :param int k_taa: The number of TAA epitopes to select
        :param str solver: The solver to be used (default glpk)
        :param int verbosity: Integer defining whether additional debug prints are made >0 => debug mode
        """

        # check input data
        if not isinstance(results, EpitopePredictionResult):
            raise ValueError(
                "first input parameter is not of type EpitopePredictionResult")

        _alleles = results.columns.values.tolist()

        # generate abundance dictionary of HLA alleles default is 2.0 as values will be log2 transformed
        probs = {
            a.name: 2.0 if a.get_metadata("abundance", only_first=True) is None
            else a.get_metadata("abundance", only_first=True)
            for a in _alleles
        }

        # start constructing model
        self.__solver = SolverFactory(solver)
        self.__verbosity = verbosity
        self.__changed = True
        self.__alleleProb = _alleles
        self.__k = k
        self.__k_taa = k_taa
        self.__result = None
        self.__thresh = {} if threshold is None else threshold
        self.__included = include
        self.overlap = overlap

        # variable, set and parameter preparation
        alleles_I = {}
        variations = []
        epi_var = {}
        imm = {}
        peps = {}
        taa = []
        var_epi = {}
        cons = {}

        for a in _alleles:
            alleles_I.setdefault(a.name, set())

        # unstack multiindex df to get normal df based on first prediction method
        # and filter for binding epitopes
        method = results.index.values[0][1]
        res_df = results.xs(results.index.values[0][1], level="Method")

        # if predcitions are not available for peptides/alleles, replace by 0
        res_df.fillna(0, inplace=True)

        res_df = res_df[res_df.apply(
            lambda x: any(x[a] > self.__thresh.get(a.name, -float("inf"))
                          for a in res_df.columns),
            axis=1)]

        res_df.fillna(0, inplace=True)
        # transform scores to 1-log50k(IC50) scores if neccassary
        # and generate mapping dictionaries for Set definitions
        for tup in res_df.itertuples():
            p = tup[0]
            seq = str(p)

            if any(
                    distance.get((seq, a.name), 1.0) <= dist_threshold
                    for a in _alleles):
                continue
            peps[seq] = p
            if p.get_metadata("taa", only_first=True):
                taa.append(seq)
            for a, s in itr.izip(res_df.columns, tup[1:]):
                if method in ["smm", "smmpmbec", "arb", "comblibsidney"]:
                    try:
                        thr = min(
                            1.,
                            max(
                                0.0, 1.0 -
                                math.log(self.__thresh.get(a.name), 50000))
                        ) if a.name in self.__thresh else -float("inf")
                    except:
                        thr = 0

                    if s >= thr:
                        alleles_I.setdefault(a.name, set()).add(seq)
                    imm[seq, a.name] = min(1.,
                                           max(0.0, 1.0 - math.log(s, 50000)))
                else:
                    if s > self.__thresh.get(a.name, -float("inf")):
                        alleles_I.setdefault(a.name, set()).add(seq)
                    imm[seq, a.name] = s

            prots = set(pr for pr in p.get_all_proteins())
            cons[seq] = len(prots)
            for prot in prots:
                variations.append(prot.gene_id)
                epi_var.setdefault(prot.gene_id, set()).add(seq)
                var_epi.setdefault(str(seq), set()).add(prot.gene_id)
        self.__peptideSet = peps

        # calculate conservation
        variations = set(variations)
        total = len(variations)
        for e, v in cons.iteritems():
            try:
                cons[e] = v / total
            except ZeroDivisionError:
                cons[e] = 1
        model = ConcreteModel()

        ######################################
        #
        # MODEL DEFINITIONS
        #
        ######################################

        # set definition
        model.Q = Set(initialize=variations)
        model.E = Set(initialize=set(peps.keys()))
        model.TAA = Set(initialize=set(taa))
        model.A = Set(initialize=alleles_I.keys())
        model.G = Set(model.E, initialize=lambda model, e: var_epi[e])
        model.E_var = Set(model.Q, initialize=lambda mode, v: epi_var[v])
        model.A_I = Set(model.A, initialize=lambda model, a: alleles_I[a])

        if self.__included is not None:
            if len(self.__included) > k:
                raise ValueError(
                    "More epitopes to include than epitopes to select! "
                    "Either raise k or reduce epitopes to include.")
        model.Include = Set(within=model.E, initialize=self.__included)

        if overlap > 0:

            def longest_common_substring(model):
                result = []
                for s1, s2 in itr.combinations(model.E, 2):
                    if s1 != s2:
                        if s1 in s2 or s2 in s1:
                            result.append((s1, s2))
                        m = [[0] * (1 + len(s2)) for i in xrange(1 + len(s1))]
                        longest, x_longest = 0, 0
                        for x in xrange(1, 1 + len(s1)):
                            for y in xrange(1, 1 + len(s2)):
                                if s1[x - 1] == s2[y - 1]:
                                    m[x][y] = m[x - 1][y - 1] + 1
                                    if m[x][y] > longest:
                                        longest = m[x][y]
                                        x_longest = x
                                else:
                                    m[x][y] = 0
                        if len(s1[x_longest - longest:x_longest]) >= overlap:
                            result.append((s1, s2))
                return set(result)

            model.O = Set(dimen=2, initialize=longest_common_substring)

        # parameter definition
        model.k = Param(initialize=self.__k,
                        within=PositiveIntegers,
                        mutable=True)
        model.k_taa = Param(initialize=self.__k_taa,
                            within=NonNegativeIntegers,
                            mutable=True)
        model.p = Param(
            model.A,
            initialize=lambda model, a: max(0, math.log(probs[a] + 0.001, 2)))
        model.c = Param(model.E,
                        initialize=lambda model, e: cons[e],
                        mutable=True)
        model.sigma = Param(model.E,
                            model.A,
                            initialize=lambda model, e, a: uncertainty.get(
                                (e, a), 0))
        model.i = Param(model.E,
                        model.A,
                        initialize=lambda model, e, a: imm[e, a])
        model.t_allele = Param(initialize=0,
                               within=NonNegativeIntegers,
                               mutable=True)
        model.t_var = Param(initialize=0,
                            within=NonNegativeIntegers,
                            mutable=True)
        model.t_c = Param(initialize=0.0,
                          within=NonNegativeReals,
                          mutable=True)
        model.abd = Param(model.Q,
                          initialize=lambda model, g: max(
                              0, math.log(expression.get(g, 2) + 0.001, 2)))
        model.eps1 = Param(initialize=1e6, mutable=True)
        model.eps2 = Param(initialize=1e6, mutable=True)

        # variable Definition
        model.x = Var(model.E, within=Binary)
        model.y = Var(model.A, within=Binary)
        model.z = Var(model.Q, within=Binary)

        # objective definition
        model.Obj1 = Objective(rule=lambda model: -sum(model.x[e] * sum(
            model.abd[g] for g in model.G[e]) * sum(model.p[a] * model.i[e, a]
                                                    for a in model.A)
                                                       for e in model.E),
                               sense=minimize)
        model.Obj2 = Objective(
            rule=lambda model: sum(model.x[e] * sum(model.sigma[e, a]
                                                    for a in model.A)
                                   for e in model.E),
            sense=minimize)

        # constraints
        # obligatory Constraint (number of selected epitopes)
        model.NofSelectedEpitopesCov1 = Constraint(
            rule=lambda model: sum(model.x[e] for e in model.E) >= model.k)
        model.NofSelectedEpitopesCov2 = Constraint(
            rule=lambda model: sum(model.x[e] for e in model.E) <= model.k)
        model.NofSelectedTAACov = Constraint(rule=lambda model: sum(
            model.x[e] for e in model.TAA) <= model.k_taa)

        # optional constraints (in basic model they are disabled)
        model.IsAlleleCovConst = Constraint(
            model.A,
            rule=lambda model, a: sum(model.x[e]
                                      for e in model.A_I[a]) >= model.y[a])

        model.MinAlleleCovConst = Constraint(rule=lambda model: sum(
            model.y[a] for a in model.A) >= model.t_allele)

        model.IsAntigenCovConst = Constraint(
            model.Q,
            rule=lambda model, q: sum(model.x[e]
                                      for e in model.E_var[q]) >= model.z[q])
        model.MinAntigenCovConst = Constraint(
            rule=lambda model: sum(model.z[q] for q in model.Q) >= model.t_var)

        model.EpitopeConsConst = Constraint(
            model.E,
            rule=lambda model, e:
            (1 - model.c[e]) * model.x[e] <= 1 - model.t_c)

        if overlap > 0:
            model.OverlappingConstraint = Constraint(
                model.O,
                rule=lambda model, e1, e2: model.x[e1] + model.x[e2] <= 1)

        # constraints for Pareto optimization
        model.ImmConst = Constraint(rule=lambda model: sum(model.x[e] * sum(
            model.abd[g] for g in model.G[e]) * sum(model.p[a] * model.i[
                e, a] for a in model.A) for e in model.E) <= model.eps1)
        model.UncertaintyConst = Constraint(
            rule=lambda model: sum(model.x[e] * sum(model.sigma[
                e, a] for a in model.A) for e in model.E) <= model.eps2)
        self.__objectives = [model.Obj1, model.Obj2]
        self.__constraints = [model.UncertaintyConst, model.ImmConst]
        self.__epsilons = [model.eps2, model.eps1]

        # include constraint
        model.IncludeEpitopeConstraint = Constraint(
            model.Include, rule=lambda model, e: model.x[e] >= 1)

        # generate instance
        self.instance = model
        if self.__verbosity > 0:
            print "MODEL INSTANCE"
            self.instance.pprint()

        # constraints
        self.instance.Obj2.deactivate()
        self.instance.ImmConst.deactivate()
        self.instance.UncertaintyConst.deactivate()
        self.instance.IsAlleleCovConst.deactivate()
        self.instance.MinAlleleCovConst.deactivate()
        self.instance.IsAntigenCovConst.deactivate()
        self.instance.MinAntigenCovConst.deactivate()
        self.instance.EpitopeConsConst.deactivate()
示例#3
0
文件: OptiTope.py 项目: FRED-2/Fred2
    def __init__(self, results,  threshold=None, k=10, solver="glpk", verbosity=0):
        """
        :param result: Epitope prediction result object from which the epitope selection should be performed
        :type result: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        :param dict(str,float) threshold: A dictionary scoring the binding thresholds for each HLA
                                          :class:`~Fred2.Core.Allele.Allele` key = allele name; value = the threshold
        :param int k: The number of epitopes to select
        :param str solver: The solver to be used (default glpk)
        :param int verbosity: Integer defining whether additional debugg prints are made >0 => debug mode
        """

        #check input data
        if not isinstance(results, EpitopePredictionResult):
            raise ValueError("first input parameter is not of type EpitopePredictionResult")

        _alleles = copy.deepcopy(results.columns.values.tolist())

        #test if allele prob is set, if not set allele prob uniform
        #if only partly set infer missing values (assuming uniformity of missing value)
        prob = []
        no_prob = []
        for a in _alleles:
            if a.prob is None:
                no_prob.append(a)
            else:
                prob.append(a)

        if len(no_prob) > 0:
            #group by locus
            no_prob_grouped = {}
            prob_grouped = {}
            for a in no_prob:
                no_prob_grouped.setdefault(a.locus, []).append(a)
            for a in prob:
                prob_grouped.setdefault(a.locus, []).append(a)

            for g, v in no_prob_grouped.iteritems():
                total_loc_a = len(v)
                if g in prob_grouped:
                    remaining_mass = 1.0 - sum(a.prob for a in prob_grouped[g])
                    for a in v:
                        a.prob = remaining_mass/total_loc_a
                else:
                    for a in v:
                        a.prob = 1.0/total_loc_a
        probs = {a.name:a.prob for a in _alleles}
        if verbosity:
            for a in _alleles:
                print a.name, a.prob

        #start constructing model
        self.__solver = SolverFactory(solver)
        self.__verbosity = verbosity
        self.__changed = True
        self.__alleleProb = _alleles
        self.__k = k
        self.__result = None
        self.__thresh = {} if threshold is None else threshold

        # Variable, Set and Parameter preparation
        alleles_I = {}
        variations = []
        epi_var = {}
        imm = {}
        peps = {}
        cons = {}

        #unstack multiindex df to get normal df based on first prediction method
        #and filter for binding epitopes
        method = results.index.values[0][1]
        res_df = results.xs(results.index.values[0][1], level="Method")
        res_df = res_df[res_df.apply(lambda x: any(x[a] > self.__thresh.get(a.name, -float("inf"))
                                                   for a in res_df.columns), axis=1)]

        for tup in res_df.itertuples():
            p = tup[0]
            seq = str(p)
            peps[seq] = p
            for a, s in itr.izip(res_df.columns, tup[1:]):
                if method in ["smm", "smmpmbec", "arb", "comblibsidney"]:
                    try:
                        thr = min(1., max(0.0, 1.0 - math.log(self.__thresh.get(a.name),
                                                      50000))) if a.name in self.__thresh else -float("inf")
                    except:
                        thr = 0

                    if s >= thr:
                        alleles_I.setdefault(a.name, set()).add(seq)
                    imm[seq, a.name] = min(1., max(0.0, 1.0 - math.log(s, 50000)))
                else:
                    if s > self.__thresh.get(a.name, -float("inf")):
                        alleles_I.setdefault(a.name, set()).add(seq)
                    imm[seq, a.name] = s

            prots = set(pr for pr in p.get_all_proteins())
            cons[seq] = len(prots)
            for prot in prots:
                variations.append(prot.gene_id)
                epi_var.setdefault(prot.gene_id, set()).add(seq)
        self.__peptideSet = peps

        #calculate conservation
        variations = set(variations)
        total = len(variations)
        for e, v in cons.iteritems():
            try:
                cons[e] = v / total
            except ZeroDivisionError:
                cons[e] = 1
        model = ConcreteModel()

        #set definition
        model.Q = Set(initialize=variations)

        model.E = Set(initialize=set(peps.keys()))

        model.A = Set(initialize=alleles_I.keys())
        model.E_var = Set(model.Q, initialize=lambda mode, v: epi_var[v])
        model.A_I = Set(model.A, initialize=lambda model, a: alleles_I[a])


        #parameter definition
        model.k = Param(initialize=self.__k, within=PositiveIntegers, mutable=True)
        model.p = Param(model.A, initialize=lambda model, a: probs[a])

        model.c = Param(model.E, initialize=lambda model, e: cons[e],mutable=True)

        #threshold parameters
        model.i = Param(model.E, model.A, initialize=lambda model, e, a: imm[e, a])
        model.t_allele = Param(initialize=0, within=NonNegativeIntegers, mutable=True)
        model.t_var = Param(initialize=0, within=NonNegativeIntegers, mutable=True)
        model.t_c = Param(initialize=0.0, within=NonNegativeReals, mutable=True)

        # Variable Definition
        model.x = Var(model.E, within=Binary)
        model.y = Var(model.A, within=Binary)
        model.z = Var(model.Q, within=Binary)

        # Objective definition
        model.Obj = Objective(
            rule=lambda model: sum(model.x[e] * sum(model.p[a] * model.i[e, a] for a in model.A) for e in model.E),
            sense=maximize)


        #Obligatory Constraint (number of selected epitopes)
        model.NofSelectedEpitopesCov = Constraint(rule=lambda model: sum(model.x[e] for e in model.E) <= model.k)

        #optional constraints (in basic model they are disabled)
        model.IsAlleleCovConst = Constraint(model.A,
                                            rule=lambda model, a: sum(model.x[e] for e in model.A_I[a]) >= model.y[a])
        model.MinAlleleCovConst = Constraint(rule=lambda model: sum(model.y[a] for a in model.A) >= model.t_allele)

        model.IsAntigenCovConst = Constraint(model.Q,
                                             rule=lambda model, q: sum(model.x[e] for e in model.E_var[q]) >= model.z[q])
        model.MinAntigenCovConst = Constraint(rule=lambda model: sum(model.z[q] for q in model.Q) >= model.t_var)
        model.EpitopeConsConst = Constraint(model.E,
                                            rule=lambda model, e: (1 - model.c[e]) * model.x[e] <= 1 - model.t_c)

        #generate instance
        self.instance = model
        if self.__verbosity > 0:
            print "MODEL INSTANCE"
            self.instance.pprint()

        #constraints
        self.instance.IsAlleleCovConst.deactivate()
        self.instance.MinAlleleCovConst.deactivate()
        self.instance.IsAntigenCovConst.deactivate()
        self.instance.MinAntigenCovConst.deactivate()
        self.instance.EpitopeConsConst.deactivate()
  def __init__(self, results, threshold=None, dist_threshold=1.0, distance={}, expression={}, uncertainty={}, overlap=0, k=10, k_taa=0,
               solver="glpk", verbosity=0, include=[]):
        """
        :param results: Epitope prediction result object from which the epitope selection should be performed
        :type results: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        :param dict(str,float) threshold: A dictionary scoring the binding thresholds for each HLA
                                          :class:`~Fred2.Core.Allele.Allele` key = allele name; value = the threshold
        :param float dist_threshold: Distance threshold: an epitope gets excluded if an epitope has dist-2-self score
                                    smaller or equal to this threshold for any HLA allele
        :param dict((str,str),float) distance: A dictionary with key: (peptide sequence, HLA name)
                                               and value the distance2self
        :param dict(str, float) expression: A dictionary with key: gene ID, and value: Gene expression
                                            in FPKM/RPKM or TPM
        :param dict((str,str),float) uncertainty: A dictionary with key (peptide seq, HLA name), and value the
                                                  associated uncertainty of the immunogenicity prediction
        :param int k: The number of epitopes to select
        :param int k_taa: The number of TAA epitopes to select
        :param str solver: The solver to be used (default glpk)
        :param int verbosity: Integer defining whether additional debug prints are made >0 => debug mode
        """

        # check input data
        if not isinstance(results, EpitopePredictionResult):
            raise ValueError("first input parameter is not of type EpitopePredictionResult")

        _alleles = results.columns.values.tolist()

        # generate abundance dictionary of HLA alleles default is 2.0 as values will be log2 transformed
        probs = {a.name:2.0 if a.get_metadata("abundance", only_first=True) is None else
                 a.get_metadata("abundance", only_first=True) for a in _alleles}

        # start constructing model
        self.__solver = SolverFactory(solver)
        self.__verbosity = verbosity
        self.__changed = True
        self.__alleleProb = _alleles
        self.__k = k
        self.__k_taa = k_taa
        self.__result = None
        self.__thresh = {} if threshold is None else threshold
        self.__included = include
        self.overlap=overlap

        # variable, set and parameter preparation
        alleles_I = {}
        variations = []
        epi_var = {}
        imm = {}
        peps = {}
        taa = []
        var_epi = {}
        cons = {}

        for a in _alleles:
            alleles_I.setdefault(a.name, set())

        # unstack multiindex df to get normal df based on first prediction method
        # and filter for binding epitopes
        method = results.index.values[0][1]
        res_df = results.xs(results.index.values[0][1], level="Method")

        # if predcitions are not available for peptides/alleles, replace by 0
        res_df.fillna(0, inplace=True)

        res_df = res_df[res_df.apply(lambda x: any(x[a] > self.__thresh.get(a.name, -float("inf"))
                                                   for a in res_df.columns), axis=1)]

        res_df.fillna(0, inplace=True)
        # transform scores to 1-log50k(IC50) scores if neccassary
        # and generate mapping dictionaries for Set definitions
        for tup in res_df.itertuples():
            p = tup[0]
            seq = str(p)

            if any(distance.get((seq, a.name), 1.0) <= dist_threshold for a in _alleles):
                continue
            peps[seq] = p
            if p.get_metadata("taa",only_first=True):
                taa.append(seq)
            for a, s in itr.izip(res_df.columns, tup[1:]):
                if method in ["smm", "smmpmbec", "arb", "comblibsidney"]:
                    try:
                        thr = min(1., max(0.0, 1.0 - math.log(self.__thresh.get(a.name),
                                                      50000))) if a.name in self.__thresh else -float("inf")
                    except:
                        thr = 0

                    if s >= thr:
                        alleles_I.setdefault(a.name, set()).add(seq)
                    imm[seq, a.name] = min(1., max(0.0, 1.0 - math.log(s, 50000)))
                else:
                    if s > self.__thresh.get(a.name, -float("inf")):
                        alleles_I.setdefault(a.name, set()).add(seq)
                    imm[seq, a.name] = s

            prots = set(pr for pr in p.get_all_proteins())
            cons[seq] = len(prots)
            for prot in prots:
                variations.append(prot.gene_id)
                epi_var.setdefault(prot.gene_id, set()).add(seq)
                var_epi.setdefault(str(seq), set()).add(prot.gene_id)
        self.__peptideSet = peps

        # calculate conservation
        variations = set(variations)
        total = len(variations)
        for e, v in cons.iteritems():
            try:
                cons[e] = v / total
            except ZeroDivisionError:
                cons[e] = 1
        model = ConcreteModel()

        ######################################
        #
        # MODEL DEFINITIONS
        #
        ######################################

        # set definition
        model.Q = Set(initialize=variations)
        model.E = Set(initialize=set(peps.keys()))
        model.TAA = Set(initialize=set(taa))
        model.A = Set(initialize=alleles_I.keys())
        model.G = Set(model.E, initialize=lambda model, e: var_epi[e])
        model.E_var = Set(model.Q, initialize=lambda mode, v: epi_var[v])
        model.A_I = Set(model.A, initialize=lambda model, a: alleles_I[a])

        if self.__included is not None:
            if len(self.__included) > k:
                raise ValueError("More epitopes to include than epitopes to select! "
                                 "Either raise k or reduce epitopes to include.")
        model.Include = Set(within=model.E, initialize=self.__included)

        if overlap > 0:
            def longest_common_substring(model):
                result = []
                for s1,s2 in itr.combinations(model.E,2):
                    if s1 != s2:
                        if s1 in s2 or s2 in s1:
                            result.append((s1,s2))
                        m = [[0] * (1 + len(s2)) for i in xrange(1 + len(s1))]
                        longest, x_longest = 0, 0
                        for x in xrange(1, 1 + len(s1)):
                            for y in xrange(1, 1 + len(s2)):
                                if s1[x - 1] == s2[y - 1]:
                                    m[x][y] = m[x - 1][y - 1] + 1
                                    if m[x][y] > longest:
                                        longest = m[x][y]
                                        x_longest = x
                                else:
                                    m[x][y] = 0
                        if len(s1[x_longest - longest: x_longest]) >= overlap:
                            result.append((s1,s2))
                return set(result)
            model.O = Set(dimen=2, initialize=longest_common_substring)

        # parameter definition
        model.k = Param(initialize=self.__k, within=PositiveIntegers, mutable=True)
        model.k_taa = Param(initialize=self.__k_taa, within=NonNegativeIntegers, mutable=True)
        model.p = Param(model.A, initialize=lambda model, a: max(0, math.log(probs[a]+0.001,2)))
        model.c = Param(model.E, initialize=lambda model, e: cons[e],mutable=True)
        model.sigma = Param (model. E, model.A, initialize=lambda model, e, a: uncertainty.get((e,a), 0))
        model.i = Param(model.E, model.A, initialize=lambda model, e, a: imm[e, a])
        model.t_allele = Param(initialize=0, within=NonNegativeIntegers, mutable=True)
        model.t_var = Param(initialize=0, within=NonNegativeIntegers, mutable=True)
        model.t_c = Param(initialize=0.0, within=NonNegativeReals, mutable=True)
        model.abd = Param(model.Q, initialize=lambda model, g: max(0, math.log(expression.get(g, 2)+0.001, 2)))
        model.eps1 = Param(initialize=1e6, mutable=True)
        model.eps2 = Param(initialize=1e6, mutable=True)

        # variable Definition
        model.x = Var(model.E, within=Binary)
        model.y = Var(model.A, within=Binary)
        model.z = Var(model.Q, within=Binary)

        # objective definition
        model.Obj1 = Objective(
            rule=lambda model: -sum(model.x[e] * sum(model.abd[g] for g in model.G[e])
                             * sum(model.p[a] * model.i[e, a] for a in model.A) for e in model.E),
            sense=minimize)
        model.Obj2 = Objective(
            rule=lambda model: sum(model.x[e]*sum(model.sigma[e,a] for a in model.A) for e in model.E),
            sense=minimize)

        # constraints
        # obligatory Constraint (number of selected epitopes)
        model.NofSelectedEpitopesCov1 = Constraint(rule=lambda model: sum(model.x[e] for e in model.E) >= model.k)
        model.NofSelectedEpitopesCov2 = Constraint(rule=lambda model: sum(model.x[e] for e in model.E) <= model.k)
        model.NofSelectedTAACov = Constraint(rule=lambda model: sum(model.x[e] for e in model.TAA) <= model.k_taa)

        # optional constraints (in basic model they are disabled)
        model.IsAlleleCovConst = Constraint(model.A,
                                            rule=lambda model, a: sum(model.x[e] for e in model.A_I[a]) >= model.y[a])

        model.MinAlleleCovConst = Constraint(rule=lambda model: sum(model.y[a] for a in model.A) >= model.t_allele)

        model.IsAntigenCovConst = Constraint(model.Q,
                                             rule=lambda model, q: sum(model.x[e] for e in model.E_var[q]) >= model.z[q])
        model.MinAntigenCovConst = Constraint(rule=lambda model: sum(model.z[q] for q in model.Q) >= model.t_var)

        model.EpitopeConsConst = Constraint(model.E,
                                            rule=lambda model, e: (1 - model.c[e]) * model.x[e] <= 1 - model.t_c)

        if overlap > 0:
            model.OverlappingConstraint = Constraint(model.O, rule=lambda model, e1, e2: model.x[e1]+model.x[e2] <= 1)

        # constraints for Pareto optimization
        model.ImmConst = Constraint(rule=lambda model: sum(model.x[e] * sum(model.abd[g] for g in model.G[e])
                                                       * sum(model.p[a] * model.i[e, a]
                                                       for a in model.A) for e in model.E) <= model.eps1)
        model.UncertaintyConst = Constraint(rule=lambda model:sum(model.x[e]*sum(model.sigma[e,a]
                                                                                 for a in model.A)
                                                                  for e in model.E) <= model.eps2)
        self.__objectives = [model.Obj1, model.Obj2]
        self.__constraints = [model.UncertaintyConst, model.ImmConst]
        self.__epsilons = [model.eps2, model.eps1]

        # include constraint
        model.IncludeEpitopeConstraint = Constraint(model.Include, rule=lambda model, e: model.x[e] >= 1)

        # generate instance
        self.instance = model
        if self.__verbosity > 0:
            print "MODEL INSTANCE"
            self.instance.pprint()

        # constraints
        self.instance.Obj2.deactivate()
        self.instance.ImmConst.deactivate()
        self.instance.UncertaintyConst.deactivate()
        self.instance.IsAlleleCovConst.deactivate()
        self.instance.MinAlleleCovConst.deactivate()
        self.instance.IsAntigenCovConst.deactivate()
        self.instance.MinAntigenCovConst.deactivate()
        self.instance.EpitopeConsConst.deactivate()