Exemplo n.º 1
0
def load_best_classifier_conf():
    from DataClassifierV2 import ClassifiersWrapper
    from pyspark.mllib.classification import SVMWithSGD, LogisticRegressionWithSGD, LogisticRegressionWithLBFGS, NaiveBayes
    myClassifier = ClassifiersWrapper()
    myClassifier.addClassifier(classifier=SVMWithSGD, trainParameters={}, weight=0.3)
    myClassifier.addClassifier(classifier=LogisticRegressionWithSGD, trainParameters={}, weight=0.3)
    myClassifier.addClassifier(classifier=NaiveBayes, trainParameters={}, weight=0.3)
    myClassifier.addClassifier(classifier=LogisticRegressionWithLBFGS, trainParameters={}, weight=0.7)
    return myClassifier
Exemplo n.º 2
0
def load_best_classifier_conf():
    from DataClassifierV2 import ClassifiersWrapper
    from pyspark.mllib.classification import SVMWithSGD, LogisticRegressionWithSGD, LogisticRegressionWithLBFGS, NaiveBayes
    myClassifier = ClassifiersWrapper()
    myClassifier.addClassifier(classifier=SVMWithSGD, trainParameters={}, weight=0.3)
    myClassifier.addClassifier(classifier=LogisticRegressionWithSGD, trainParameters={}, weight=0.3)
    myClassifier.addClassifier(classifier=NaiveBayes, trainParameters={}, weight=0.3)
    myClassifier.addClassifier(classifier=LogisticRegressionWithLBFGS, trainParameters={}, weight=0.7)
    return myClassifier
 newSource.lookingAll('NASDAQ:GOOGL', ['GOOG', 'GOOGL', 'GOOGLE'])
 newSource.lookingAll('NASDAQ:NVDA', ['NVIDIA'])
 newSource.lookingAll('VTX:NESN', ['NESTLE'])
 newSource.lookingAll('VTX:SCMN', ['SWISSCOM'])
 newSource.lookingAll('VTX:NOVN', ['NOVARTIS'])
 
 newsRDD = newSource.doIt()
 marketSource = GoogleFinanceMarketSourceSpark(['NASDAQ:GOOGL', 'NASDAQ:NVDA', 'VTX:NESN', 'VTX:SCMN', 'VTX:NOVN'])
 newsRDD = newsRDD.map(lambda x: marketSource.addMarketStatusToNews(x))
 #newsRDD = newsRDD.randomSplit([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1])[0]
 newsRDD.cache()
 print('nb news : %d' % newsRDD.count())
 dataSetMaker = DataSetMakerV2(n=200000)
 fullDataSet = dataSetMaker.processBinary(newsRDD)
 fullDataSet.cache()
 myClassifier = ClassifiersWrapper()
 myClassifier.addClassifier(classifier=SVMWithSGD, trainParameters={}, weight=0.3)
 myClassifier.addClassifier(classifier=LogisticRegressionWithSGD, trainParameters={}, weight=0.3)
 myClassifier.addClassifier(classifier=NaiveBayes, trainParameters={}, weight=0.3)
 myClassifier.addClassifier(classifier=LogisticRegressionWithLBFGS, trainParameters={}, weight=0.7)
 
 myClassifier2 = ClassifiersWrapper()
 myClassifier2.addClassifier(classifier=SVMWithSGD, trainParameters={}, weight=0.3)
 myClassifier2.addClassifier(classifier=LogisticRegressionWithSGD, trainParameters={}, weight=0.3)
 myClassifier2.addClassifier(classifier=NaiveBayes, trainParameters={}, weight=0.3)
 myClassifier2.addClassifier(classifier=LogisticRegressionWithLBFGS, trainParameters={}, weight=0.7)
 
 dataClassifierEvaluator = DataClassifierEvaluator(fullDataSet)
 #dataClassifierEvaluator.addModel(myClassifier, 'My Classifier')
 myClassifierOnevsOne = DataClassifierMultiClassesOneVsOne(myClassifier, 4)
 myClassifierOnevsMany = DataClassifierMultiClassesOneVsMany(myClassifier2, 4)
    #newSource.lookingAll('NASDAQ:NVDA', ['NVIDIA'])
    #newSource.lookingAll('VTX:NESN', ['NESTLE'])
    #newSource.lookingAll('VTX:SCMN', ['SWISSCOM'])
    #newSource.lookingAll('VTX:NOVN', ['NOVARTIS'])

    newsRDD = newSource.doIt()
    marketSource = GoogleFinanceMarketSourceSpark(
        ['NASDAQ:GOOGL', 'NASDAQ:NVDA', 'VTX:NESN', 'VTX:SCMN', 'VTX:NOVN'])
    newsRDD = newsRDD.map(lambda x: marketSource.addMarketStatusToNews(x))
    #newsRDD = newsRDD.randomSplit([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1])[0]
    newsRDD.cache()
    print('nb news : %d' % newsRDD.count())
    dataSetMaker = DataSetMakerV2(n=config.FEATURES_CONF['vecteur_size'])
    fullDataSet = dataSetMaker.process(newsRDD)  # TODO change
    fullDataSet.cache()
    myClassifier = ClassifiersWrapper()
    myClassifier.addClassifier(classifier=SVMWithSGD,
                               trainParameters={},
                               weight=0.4)
    #myClassifier.addClassifier(classifier=LogisticRegressionWithSGD, trainParameters={}, weight=0.3)
    myClassifier.addClassifier(classifier=NaiveBayes,
                               trainParameters={},
                               weight=0.4)
    myClassifier.addClassifier(classifier=LogisticRegressionWithLBFGS,
                               trainParameters={},
                               weight=0.4)

    myClassifier2 = ClassifiersWrapper()
    myClassifier2.addClassifier(classifier=SVMWithSGD,
                                trainParameters={},
                                weight=0.3)