Exemplo n.º 1
0
 def __init__(self, queryFile, candidatePath, mu, corpusFile, sigma, lamda):
     self.query = {}
     self.candidate = candidatePath
     self.tweet = {}
     self.mu = mu
     self.sigma = sigma  #similarity threshold
     self.lamda = lamda  #cluster threshold
     self.jaccInstance = Jaccard()
     self.klInstance = Distance(mu, corpusFile)
     print "corpus read done!"
Exemplo n.º 2
0
    def __init__(self, train_path, test_path):
        self.train_path = train_path
        self.test_path = test_path
        self.preprocessor = Preprocessor()
        self.trn = pd.DataFrame(columns=Classifier._COLS)  # Read data_frame
        self.tst = pd.DataFrame(columns=Classifier._COLS)  # Read data_frame
        self.trn_gs = pd.DataFrame(columns=Classifier._GS_COLS)  # Known labels
        self.tst_gs = pd.DataFrame(columns=Classifier._GS_COLS)  # Known labels
        self.tok_trn = []
        self.tok_tst = []

        self.feature_extractor = FeatureExtractor()
        self.jaccard = Jaccard()
        self.rfr = RFR()
        self.nn = MLPRegressor(hidden_layer_sizes=(100, 30, 30),
                               validation_fraction=0.3,
                               alpha=0.3,
                               warm_start=False,
                               max_iter=1000,
                               activation='logistic')