Exemplo n.º 1
0
    def __init__(self,
                 alpha=0,
                 decay=1,
                 ignore_leaves=True,
                 smoothed=True,
                 vector='word2vec',
                 w2vdim=300,
                 lowercase=True,
                 tree='tree',
                 kernel_path=KERNEL_PATH):
        Semeval.__init__(self,
                         vector=vector,
                         stop=False,
                         lowercase=lowercase,
                         punctuation=False,
                         w2vdim=w2vdim)
        self.path = kernel_path
        self.tree = tree
        self.memoization = {}
        self.svm = Model()
        self.flat_traindata()
        self.treekernel = TreeKernel(alpha=alpha,
                                     decay=decay,
                                     ignore_leaves=ignore_leaves,
                                     smoothed=smoothed,
                                     lowercase=lowercase)
        self.train()

        del self.additional
Exemplo n.º 2
0
    def __init__(self,
                 model='svm',
                 features='bm25,',
                 comment_features='bm25,',
                 stop=True,
                 vector='word2vec',
                 lowercase=True,
                 punctuation=True,
                 proctrain=True,
                 path=FEATURES_PATH,
                 alpha=0.1,
                 sigma=0.9,
                 gridsearch='random'):
        Semeval.__init__(self,
                         stop=stop,
                         vector=vector,
                         lowercase=lowercase,
                         punctuation=punctuation)
        self.path = path
        self.features = features.split(',')
        self.comment_features = comment_features.split(',')
        self.gridsearch = gridsearch
        self.svm = Model()

        self.model = model
        self.bm25 = SemevalBM25(
            stop=stop,
            lowercase=lowercase,
            punctuation=punctuation,
            proctrain=proctrain
        ) if 'bm25' in self.features + self.comment_features else None
        self.cosine = SemevalCosine(
            stop=stop,
            lowercase=lowercase,
            punctuation=punctuation,
            proctrain=proctrain
        ) if 'cosine' in self.features + self.comment_features else None
        self.softcosine = SemevalSoftCosine(
            stop=stop,
            lowercase=lowercase,
            punctuation=punctuation,
            proctrain=proctrain,
            vector=vector
        ) if 'softcosine' in self.features + self.comment_features else None
        self.translation = SemevalTranslation(
            alpha=alpha,
            sigma=sigma,
            stop=stop,
            lowercase=lowercase,
            punctuation=punctuation,
            proctrain=proctrain,
            vector=self.vector
        ) if 'translation' in self.features + self.comment_features else None

        self.train()
Exemplo n.º 3
0
    def __init__(self, model='svm', features='bm25,', comment_features='bm25,', stop=True, vector='word2vec', path=FEATURES_PATH, alpha=0.1, sigma=0.9, gridsearch='random'):
        Quora.__init__(self, stop=stop, vector=vector)
        self.path = path
        self.features = features.split(',')
        self.comment_features = comment_features.split(',')
        self.gridsearch = gridsearch
        self.svm = Model()

        self.model = model
        self.bm25 = QuoraBM25(stop=stop) if 'bm25' in self.features+self.comment_features else None
        self.cosine = QuoraCosine(stop=stop) if 'cosine' in self.features+self.comment_features else None
        self.softcosine = QuoraSoftCosine(stop=stop, vector=vector) if 'softcosine' in self.features+self.comment_features else None
        self.translation = QuoraTranslations(alpha=alpha, sigma=sigma, stop=stop, vector=self.vector) if 'translation' in self.features+self.comment_features else None

        self.train()
Exemplo n.º 4
0
    def __init__(self, stop={}, lowercase={}, punctuation={}, vector={}, scale=True, alpha=0.9, sigma=0.1):
        self.stop = stop
        self.lowercase = lowercase
        self.punctuation = punctuation
        self.scale = scale
        self.vector = vector
        self.alpha = alpha
        self.sigma = sigma

        self.questions, self.ranking = self.load()
        self.ensemble = Model()
        self.train()

        ranking = self.test()
        p.dump(ranking, open(os.path.join(SEMI_PATH, 'reranking'), 'wb'))
Exemplo n.º 5
0
    def __init__(self,
                 stop={},
                 lowercase={},
                 punctuation={},
                 vector={},
                 scale=True,
                 w2vdim=300,
                 kernel_path='',
                 alpha=0.8,
                 sigma=0.2):
        self.stop = stop
        self.lowercase = lowercase
        self.punctuation = punctuation
        self.scale = scale
        self.vector = vector
        self.alpha = alpha
        self.sigma = sigma
        self.kernel_path = kernel_path
        self.w2vdim = w2vdim
        self.theta = 0.9

        self.ensemble = Model()
        self.train()