示例#1
0
文件: SVM.py 项目: SteffenK12/Fred2
    def predict(self, peptides, alleles=None, **kwargs):

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            if any(not isinstance(p, Peptide) for p in peptides):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p):p for p in peptides}

        if alleles is None:
            al = [Allele("HLA-"+a) for a in self.supportedAlleles]
            allales_string = {conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string ={conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(alleles),alleles)}

        #group peptides by length and
        result = {}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)):
            #load svm model

            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name))
                continue

            encoding = self.encode(peps)

            for a in allales_string.keys():
                model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(a,length))
                if not os.path.exists(model_path):
                    warnings.warn("No model exists for peptides of length %i or allele %s."%(length,
                                                                                            allales_string[a].name))
                    continue
                model = svmlight.read_model(model_path)


                model = svmlight.read_model(model_path)
                pred = svmlight.classify(model, encoding.values())
                result[allales_string[a]] = {}
                for pep, score in itertools.izip(encoding.keys(), pred):
                    result[allales_string[a]][pep_seqs[pep]] = score

        if not result:
            raise ValueError("No predictions could be made for given input. Check your "
                             "epitope length and HLA allele combination.")
        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index],
                                                        names=['Seq', 'Method'])
        return df_result
示例#2
0
    def predict(self, peptides,  **kwargs):

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            if any(not isinstance(p, Peptide) for p in peptides):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p):p for p in peptides}

        #group peptides by length and

        result = {self.name:{}}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)):
            #load svm model
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name))
                continue


            encoding = self.encode(peps)

            model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(self.name, length))
            model = svmlight.read_model(model_path)

            pred = svmlight.classify(model, encoding.values())
            result[self.name] = {}
            for pep, score in itertools.izip(encoding.keys(), pred):
                    result[self.name][pep_seqs[pep]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made with "+self.name+" for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result
示例#3
0
def ball_only_classifier(circles, color_image, bonus_radius):
    model = svmlight.read_model("./output/best_single_cup_model_for_ball")
    ff = find_features()
    # TODO: fix
    label = 0
    best_classification = 0.5
    best_circle = None
    best_circle_pixels = None
    for c in circles[:6]:
        pixels, circle = find_pixels(c, color_image, bonus_radius)
        # create features for that circle
        features = ff.generate_features(pixels, label)
        features = parse_one_line(features)
        print features
        # run the classifier on that circle
        classification = svmlight.classify(model, [features])
        print classification
        if classification[0] > best_classification:
            best_classification = classification
            best_circle = [c]
            best_circle_pixels = pixels
        # make a decision about whether that circle is circly enough
        # cv2.imshow("Image processed", circle)
        # cv2.waitKey()

    # for the strict form of the classifier, I require that all of the detected circles
    # are in fact circles.  other classifiers may be more lenient
    return best_circle, best_classification, best_circle_pixels
示例#4
0
def test(test_data, fmodel_name):
  print ('[ test ] ===================')
  model = svmlight.read_model(fmodel_name)

  # classify the test data. this function returns a list of numbers, which represent
  # the classifications.
  predictions = svmlight.classify(model, test_data)
  for p in predictions:
      print '%.8f' % p
示例#5
0
def test(test_data, fmodel_name):
    print('[ test ] ===================')
    model = svmlight.read_model(fmodel_name)

    # classify the test data. this function returns a list of numbers, which represent
    # the classifications.
    predictions = svmlight.classify(model, test_data)
    for p in predictions:
        print '%.8f' % p
示例#6
0
    def __init__(self,
                 doc2vec_args={},
                 doc2vec_model=None,
                 svm_model=None,
                 doc2vec_train_docs=None):

        self.doc2vec_train_docs = doc2vec_train_docs
        self.doc2vec_model = Doc2Vec.load(
            MODEL_DIR_PATH + doc2vec_model) if doc2vec_model else None
        self.svm_model = read_model(svm_model) if svm_model else None
        self.doc2vec_args = doc2vec_args
示例#7
0
    def predict(self, peptides, **kwargs):
        """
        Returns TAP predictions for given :class:`~Fred2.Core.Peptide.Peptide`.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :return: Returns a :class:`~Fred2.Core.Result.TAPPredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.TAPPredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        #group peptides by length and
        chunksize = len(pep_seqs)
        if 'chunks' in kwargs:
            chunksize = kwargs['chunks']

        result = {self.name: {}}
        pep_groups = list(pep_seqs.keys())
        pep_groups.sort(key=len)
        for length, peps in itertools.groupby(pep_groups, key=len):
            #load svm model
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s" %
                              (length, self.name))
                continue

            peps = list(peps)
            for i in range(0, len(peps), chunksize):
                encoding = self.encode(peps[i:i + chunksize])

                model_path = pkg_resources.resource_filename(
                    "Fred2.Data.svms.%s" % self.name,
                    "%s_%i" % (self.name, length))
                model = svmlight.read_model(model_path)

                pred = svmlight.classify(model, list(encoding.values()))
                for pep, score in zip(list(encoding.keys()), pred):
                    result[self.name][pep_seqs[pep]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made with " + self.name +
                             " for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result
示例#8
0
文件: SVM.py 项目: FRED-2/Fred2
    def predict(self, peptides,  **kwargs):
        """
        Returns TAP predictions for given :class:`~Fred2.Core.Peptide.Peptide`.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :return: Returns a :class:`~Fred2.Core.Result.TAPPredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.TAPPredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        #group peptides by length and
        chunksize = len(pep_seqs)
        if 'chunks' in kwargs:
            chunksize = kwargs['chunks']

        result = {self.name: {}}
        pep_groups = pep_seqs.keys()
        pep_groups.sort(key=len)
        for length, peps in itertools.groupby(pep_groups, key=len):
            #load svm model
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name))
                continue

            peps = list(peps)
            for i in xrange(0, len(peps), chunksize):
                encoding = self.encode(peps[i:i+chunksize])

                model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(self.name, length))
                model = svmlight.read_model(model_path)

                pred = svmlight.classify(model, encoding.values())
                for pep, score in itertools.izip(encoding.keys(), pred):
                        result[self.name][pep_seqs[pep]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made with "+self.name+" for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result
示例#9
0
def rec_char(div_img):
    """
    切割后的单个字符识别
    """
    result = ""
    test = binary(div_img)
    test = chformat(test)
    for i in range(10):
        model = svmlight.read_model("model/" + str(i))
        prediction = svmlight.classify(model, test)
        # print prediction
        if prediction[0] > 0:
            result = str(i)
            # print prediction[0]

    return result
示例#10
0
def zrank(aids, topic, fmodel_name):
  rerank_data = init_rerank_data(aids, topic)

  print ('[ zrank ] ===================')
  model = svmlight.read_model(fmodel_name)

  predictions = svmlight.classify(model, rerank_data)

  aid_score = zip( [x[0] for x in rerank_data ], predictions)
  aid_score.sort(key = lambda tup : tup[1], reverse=True)
  
  with open(RERANK_RESULT + '_' + topic, 'w') as f :
    pprint.pprint(aid_score, f)

  ZC.dump_cache()

  return [x[0] for x in aid_score]
示例#11
0
def zrank(aids, topic, fmodel_name):
    rerank_data = init_rerank_data(aids, topic)

    print('[ zrank ] ===================')
    model = svmlight.read_model(fmodel_name)

    predictions = svmlight.classify(model, rerank_data)

    aid_score = zip([x[0] for x in rerank_data], predictions)
    aid_score.sort(key=lambda tup: tup[1], reverse=True)

    with open(RERANK_RESULT + '_' + topic, 'w') as f:
        pprint.pprint(aid_score, f)

    ZC.dump_cache()

    return [x[0] for x in aid_score]
示例#12
0
    def load_model(self):
        if not os.path.isfile(self._modelname+"/"+self._pref+".model"):
            return False

        if self._classtype == "classifier":
            self._model = svmutil.svm_load_model(self._modelname+\
                                                 "/"+self._pref+".model")
        elif self._classtype == "structured":
            self._model = svmlight.read_model(self._modelname+\
                                              "/"+self._pref+".model")
        elif self._classtype == "percrank":
            m = KernelLBRankPerceptron(kernel=polynomial_kernel)
            mfile = open(self._modelname+"/"+self._pref+".model", 'rb')
            m.sv_a,m.sv_1,m.sv_2,m.bias = cPickle.load(mfile)
            mfile.close()
            self._model = m
        
        return True
示例#13
0
def simple_classifier(circles, color_image, bonus_radius):
    model = svmlight.read_model("./output/best_single_cup_model")
    ff = find_features()
    # TODO: fix
    label = 0
    new_circles = []
    for c in circles[:6]:
        circle = find_pixels(c, color_image, bonus_radius)
        new_circles.append(circle)
        # create features for that circle
        features = ff.generate_features(circle[0], label)
        features = parse_one_line(features)
        print features
        # run the classifier on that circle
        classification = svmlight.classify(model, [features])
        print classification
        # make a decision about whether that circle is circly enough
        # cv2.imshow("Image processed", circle)
        # cv2.waitKey()

    # for the strict form of the classifier, I require that all of the detected circles
    # are in fact circles.  other classifiers may be more lenient
    return new_circles
示例#14
0
文件: SVM.py 项目: koalive/Fred2
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models
        are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param alleles: A list of :class:`~Fred2.Core.Allele.Allele`
        :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or :class:`~Fred2.Core.Allele.Allele`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-" + a) for a in self.supportedAlleles]
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles)}

        # group peptides by length and
        result = {}

        model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s" % self.name, "%s" % self.name)
        # model_path = os.path.abspath("../Data/svms/%s/%s"%(self.name, self.name))
        model = svmlight.read_model(model_path)

        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)):
            # load svm model
            peps = list(peps)
            if length != 9:
                warnings.warn("Peptide length of %i is not supported by UniTope" % length)
                continue

            for a in allales_string.keys():
                if allales_string[a].name in self.supportedAlleles:
                    encoding = self.encode(peps, a)
                    pred = svmlight.classify(model, encoding.values())
                    result[allales_string[a]] = {}
                    for pep, score in itertools.izip(encoding.keys(), pred):
                        result[allales_string[a]][pep_seqs[pep]] = score

        if not result:
            raise ValueError("No predictions could be made for given input. Check your \
            epitope length and HLA allele combination.")
        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index],
                                                        names=['Seq', 'Method'])
        return df_result
示例#15
0
    testing_data_keys = []
    testing_data_tuples = []
    for key, vector in vectors.iteritems():
        vals = []
        num = 1
        for val in vector:
            vals.append((num, float(val)))
            num += 1
        testing_data_tuples.append((0, vals))
        testing_data_keys.append(key)

    return testing_data_keys, testing_data_tuples


print "Loading Model"
model = svmlight.read_model('svm-model.dat')

testing_data = []
directory = "tests"
filenames = [
    f for f in listdir(directory + "/originals/")
    if isfile(join(directory + "/originals/", f)) and f[0] != "."
]

start_time = time.time()
counter = 0

for filename in filenames:

    print "\n-----------------------------------"
    print directory + "/originals/" + filename + "\n"
def writeBufferToFile(path, buffer):
	outFile = open(path, 'w')
	outFile.write(buffer)
	outFile.close()


def getFullTextAsSentencesFromDocModel(document):
	sentences = []
	for paragraph in document.paragraphs:
		cleanP = paragraph.replace('\n', ' ')
		paragraphSentences = sentence_breaker.tokenize(cleanP)
		sentences.extend(paragraphSentences)

	return sentences

model = svmlight.read_model('my_model.dat')

correct = 0
total = 0
def testDoc(document):
	global total
	global correct

	testVectors = []
	docIndex = 1
	sentences = getFullTextAsSentencesFromDocModel(document)
	if len(sentences) <= 1:  # early return if no transitions.
		return

	goodDoc = DummyDocModel(sentences)
	goodGrid = TextrazorEntityGrid(goodDoc.cleanSentences())
示例#17
0
	
	testing_data_keys = []
	testing_data_tuples = []
	for key, vector in vectors.iteritems():
		vals = []
		num = 1
		for val in vector:
			vals.append((num, float(val)))
			num += 1
		testing_data_tuples.append((0,vals))
		testing_data_keys.append(key)
	
	return testing_data_keys, testing_data_tuples

print "Loading Model"
model = svmlight.read_model('svm-model.dat')

testing_data = []
directory = "tests"
filenames = [ f for f in listdir(directory + "/originals/") if isfile(join(directory + "/originals/",f)) and f[0] != "." ]

start_time = time.time()
counter = 0

for filename in filenames:

	print "\n-----------------------------------"
	print directory + "/originals/" + filename + "\n"
	
	img = io.imread(directory + "/originals/" + filename, as_grey=True)
	output = io.imread(directory + "/originals/" + filename, as_grey=False)
示例#18
0
def load_classifier(clf_i):
    clf = svmlight.read_model(os.path.join(clf_directory,str(clf_i)))
    return clf
示例#19
0
	def read_model(self, rel_path):
		abs_path = os.path.abspath(rel_path)
		model = svmlight.read_model(abs_path)
		return model
示例#20
0
文件: SVM.py 项目: linusb/Fred2
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models
        are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param alleles: A list of :class:`~Fred2.Core.Allele.Allele`
        :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or :class:`~Fred2.Core.Allele.Allele`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-" + a) for a in self.supportedAlleles]
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles)}

        # group peptides by length and
        result = {}

        model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s" % self.name, "%s" % self.name)
        # model_path = os.path.abspath("../Data/svms/%s/%s"%(self.name, self.name))
        model = svmlight.read_model(model_path)

        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)):
            # load svm model
            peps = list(peps)
            if length != 9:
                warnings.warn("Peptide length of %i is not supported by UniTope" % length)
                continue

            for a in allales_string.keys():
                if allales_string[a].name in self.supportedAlleles:
                    encoding = self.encode(peps, a)
                    pred = svmlight.classify(model, encoding.values())
                    result[allales_string[a]] = {}
                    for pep, score in itertools.izip(encoding.keys(), pred):
                        result[allales_string[a]][pep_seqs[pep]] = score

        if not result:
            raise ValueError("No predictions could be made for given input. Check your \
            epitope length and HLA allele combination.")
        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index],
                                                        names=['Seq', 'Method'])
        return df_result
#cachePath = "../cache/asasCache"
goldCachePath = "../cache/asasGoldCache"
summaryOutputPath = "../outputs"
reorderedSummaryOutputPath = summaryOutputPath + "_reordered"
evaluationOutputPath = "../results"
modelSummaryCachePath = "../cache/modelSummaryCache"
documentCachePath = "../cache/documentCache"
idfCachePath = "../cache/idfCache"
meadCacheDir = "../cache/meadCache"
rougeCacheDir = "../cache/rougeCache"
rougeDir = "../ROUGE"
# rougeDir = "/opt/dropbox/14-15/573/code/ROUGE"


rankModel = svmlight.read_model('../cache/svmlightCache/svmlightModel.dat')

rouge = RougeEvaluator(rougeDir,
					   args.modelSummaryDir,
					   summaryOutputPath,
					   modelSummaryCachePath,
					   rougeCacheDir)

totalClusters = 25
minimumAverageClusterRange = 30
maximumAverageClusterRange = 55
maxWords = 100
topics = []
topicTitles = {}
for topic in extract.topicReader.Topic.factoryMultiple(args.topicXml):
	topics.append(topic)