Пример #1
0
  def __init__(self, code ):
    child_row = DataManager.getRow( "SELECT * FROM Child WHERE ChildCode=?", [code] )

    self.code = code
    self.cost = child_row['Cost']
    self.skills = DataManager.getRows( "SELECT * FROM ChildSkill WHERE ChildCode=?", [code] )
    self.needs = DataManager.getRows( "SELECT * FROM ChildNeed WHERE ChildCode=?", [code] )
Пример #2
0
  def __init__(self, code ):
    hobby_row = DataManager.getRow( "SELECT * FROM Hobby WHERE HobbyCode=?", [code] )

    self.code = code
    self.expense = hobby_row['Expense']
    self.skills = DataManager.getRows( "SELECT * FROM HobbySkill WHERE HobbyCode=?", [code] )
    self.needs = DataManager.getRows( "SELECT * FROM HobbyNeed WHERE HobbyCode=?", [code] )
Пример #3
0
class Test_2_DataManagerSyncStart(unittest.TestCase):    
    
    def setUp(self):            
        self.af = FeedRef((FeatureType.ADDRESS,FeedType.FEATURES))
        self.ac = FeedRef((FeatureType.ADDRESS,FeedType.CHANGEFEED))
        self.ar = FeedRef((FeatureType.ADDRESS,FeedType.RESOLUTIONFEED))
        self.aff = FeatureFactory.getInstance(self.af)
        self.afc = FeatureFactory.getInstance(self.ac)
        self.afr = FeatureFactory.getInstance(self.ar)
        self.dm = DataManager()
        
    def tearDown(self):
        self.dm.close()
        del self.afr
        del self.afc
        del self.aff
    
    def test10_validdatastoreTest(self):
        '''Tests whether a valid address object is returned on json decoded arg'''
        initdata = self.dm.pull()
        self.assertEquals(len(initdata),5,'Invalid ADL list length returned')

        
    def test20_refreshTest(self):
        '''Tests whether a valid address object is returned on json decoded arg'''
        initdata = self.dm.pull()
        self.assertTrue(isinstance(initdata[self.af][0],Address),'Invalid address type returned')
        self.assertTrue(isinstance(initdata[self.ac][0],AddressChange),'Invalid address type returned')
        self.assertTrue(isinstance(initdata[self.ar][0],AddressResolution),'Invalid address type returned')
        
    def test30_refreshTest(self):
        pass
        
    def test40_refreshTest(self):
        pass
Пример #4
0
  def __init__(self, code ):
    job_row = DataManager.getRow( "SELECT * FROM Job WHERE JobCode=?", [code] )

    self.code = code
    self.pay = job_row['Pay']
    self.skillRequirements = DataManager.getRows( "SELECT * FROM JobSkillRequirement WHERE JobCode=?", [code] )
    self.needs = DataManager.getRows( "SELECT * FROM JobNeed WHERE JobCode=?", [code] )
Пример #5
0
  def __init__(self, code ):
    partner_row = DataManager.getRow( "SELECT * FROM Partner WHERE PartnerCode=?", [code] )

    self.code = code
    self.finances = partner_row['Finances']
    self.moneyRequirement = partner_row['MoneyRequirement']
    self.skillRequirements = DataManager.getRows( "SELECT * FROM PartnerSkillRequirement WHERE PartnerCode=?", [code] )
    self.needs = DataManager.getRows( "SELECT * FROM PartnerNeed WHERE PartnerCode=?", [code] )
Пример #6
0
 def add(self, task, projectName=None):
     date = Timings.now()
     
     if self.taskType(task) != "work":
         projectName = None
         
     attributes = self.processTask(date, task, projectName)
     DataManager.writeTask(date, task, projectName, firstToday=len(self.tasks) == 1)
     return attributes
    def test_experiment_not_transformed_test(self):
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        loaded_data = FileLoader.load_file(file_path)
        data_manager = DataManager()
        data_manager.set_data(loaded_data)
        data_manager.split_data(test_split=0.19, train_split=0.62)
        learning_model = FakePredictionModel()
        exp = Experiment(data_manager, learning_model)

        exp.run_experiment()

        self.assertEquals(0, exp.get_r2(SplitTypes.Test))
    def test_experiment(self):
        output_filename_header = FileLoader.create_output_file()
        time.sleep(1)
        loaded_algorithm_combinations = FileLoader.read_csv_file("../Datasets/test.csv")
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        loaded_data = FileLoader.load_file(file_path)
        # feature_eliminator = SelectKBest(f_regression,k=k_value)

        print (loaded_algorithm_combinations[0])
        output_filename = FileLoader.create_output_file()

        for i in range(0, 80):
            normalizer = self.getnormalizer(loaded_algorithm_combinations[i][0])

            feature_eliminator = self.getfeature_eliminator(loaded_algorithm_combinations[i][1])
            the_model = self.get_model(loaded_algorithm_combinations[i][2])

            print "taking ", type(normalizer).__name__, "and feature selector ", type(
                feature_eliminator
            ).__name__, "model", type(the_model).__name__
            FileLoader.write_model_in_file(
                output_filename_header,
                type(normalizer).__name__,
                type(feature_eliminator).__name__,
                type(the_model).__name__,
                "",
                "",
                "",
                "",
                "",
            )

            the_data_manager = DataManager(feature_eliminator, normalizer=normalizer)
            the_data_manager.set_data(loaded_data)
            the_data_manager.split_data(test_split=0.15, train_split=0.70)
            exp = Experiment(the_data_manager, the_model)

            exp.run_experiment()
            # arr_selected = feature_eliminator.get_support(indices=True)

            # if(exp.get_r2(SplitTypes.Train) > 0 and exp.get_r2(SplitTypes.Valid) > 0 and exp.get_r2(SplitTypes.Test) >  0):
            FileLoader.write_model_in_file(
                output_filename,
                type(normalizer).__name__,
                type(feature_eliminator).__name__,
                type(the_model).__name__,
                "",
                exp.fitness_matrix[0],
                exp.get_r2(SplitTypes.Train),
                exp.get_r2(SplitTypes.Valid),
                exp.get_r2(SplitTypes.Test),
            )
Пример #9
0
	def calculateNeededGallons():
		
		result = []
		
		recentWateringGallons = DataManager.getPreviousWateringAmounts(pymysql.connect(host='localhost',
	    		user='******',
    			password='',
        		db='Garden',
       		 	charset='utf8mb4',
        		cursorclass=pymysql.cursors.DictCursor))
		sectorTargets = DataManager.getTargetCapacity(pymysql.connect(host='localhost',
    			user='******',
    			password='',
        		db='Garden',
        		charset='utf8mb4',
        		cursorclass=pymysql.cursors.DictCursor))
		previousRain = DataManager.getLatestRainfall(pymysql.connect(host='localhost',
    			user='******',
    			password='',
        		db='Garden',
        		charset='utf8mb4',
    			cursorclass=pymysql.cursors.DictCursor))
	    	predictedRain = DataManager.getPredictedRainfall(pymysql.connect(host='localhost',
    			user='******',
    			password='',
        		db='Garden',
        		charset='utf8mb4',
    	    	cursorclass=pymysql.cursors.DictCursor))
		currentMoistures = DataManager.getLatestMoisture(pymysql.connect(host='localhost',
    			user='******',
    			password='',
        		db='Garden',
        		charset='utf8mb4',
        		cursorclass=pymysql.cursors.DictCursor))
		
		for x in range(0, 4):
			
			currentGallons = (previousRain * 280) + recentWateringGallons[x + 1]
			
			if currentMoistures[x] > sectorTargets[x]:
				result.insert(x, 0)
			elif currentGallons > 280:
				result.insert(x, 0)
			else:
				if (predictedRain[1] * 280) * (predictedRain[0]/Decimal(100)) + currentGallons > 280:
					result.insert(x, 0)
				else:
					result.insert(x, 280 - ((predictedRain[1] * 280) * (predictedRain[0]/Decimal(100)) + currentGallons))
			
			print((predictedRain[1] * 280) * (predictedRain[0]/Decimal(100)) + currentGallons)
			
		return result
    def test_experiment_all_zeros_r2_1(self):
        the_data_manager = DataManager()
        array_all_zeroes = np.zeros((37, 397))
        the_data_manager.set_data(array_all_zeroes)
        the_data_manager.split_data(test_split=0.19, train_split=0.62)

        the_model = svm.SVR()
        exp = Experiment(the_data_manager, the_model)
        exp.run_experiment()

        r2_train = exp.get_r2(SplitTypes.Train)
        expected = 1.0
        self.assertEqual(r2_train, expected)
    def test_experiment_svm_svr_37dataset_r2_train(self):
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        loaded_data = FileLoader.load_file(file_path)
        the_data_manager = DataManager()
        the_data_manager.set_data(loaded_data)
        the_data_manager.split_data(test_split=0.19, train_split=0.62)
        the_model = svm.SVR()
        exp = Experiment(the_data_manager, the_model)
        exp.run_experiment()

        r2_train = exp.get_r2(SplitTypes.Train)
        expected_svm_r2_value = 0.93994377385638073
        self.assertEqual(r2_train, expected_svm_r2_value)
    def test_experiment_sum_of_squares_zeros_test(self):
        the_data_manager = DataManager()
        an_array_of_all_ones = np.ones((37, 397))
        the_model = svm.SVR()
        the_data_manager.set_data(an_array_of_all_ones)
        the_data_manager.split_data(test_split=0.19, train_split=0.62)
        exp = Experiment(the_data_manager, the_model)

        exp.run_experiment()
        sum_of_squares_test = exp.get_sum_of_squares(SplitTypes.Test)

        expected = 0
        self.assertEquals(expected, sum_of_squares_test)
    def test_experiment_svr_37dataset_r2_test(self):
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        loaded_data = FileLoader.load_file(file_path)
        the_data_manager = DataManager()
        the_data_manager.set_data(loaded_data)
        the_data_manager.split_data(test_split=0.19, train_split=0.62)
        the_model = svm.SVR()
        exp = Experiment(the_data_manager, the_model)

        exp.run_experiment()

        r2_test = exp.get_r2(SplitTypes.Test)
        expected_svm_r2_value = -0.33005242525900247
        self.assertEqual(r2_test, expected_svm_r2_value)
    def test_split_merge_csv_4_25_8(self):
        file_loader = FileLoader()
        data_manager = DataManager()
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        result = file_loader.load_file(file_path)
        data_manager.set_data(result)
        data_manager.split_data(test_split=0.11,train_split=0.22)

        test_shapes = np.zeros((4, 397)).shape
        valid_shapes = np.zeros((25,397)).shape
        train_shapes = np.zeros((8, 397)).shape
        expected = np.array([test_shapes, valid_shapes, train_shapes])
        result = np.array([data_manager.datum[SplitTypes.Test].shape, data_manager.datum[SplitTypes.Valid].shape, data_manager.datum[SplitTypes.Train].shape])
        self.assertTrue(np.array_equal(result, expected))
Пример #15
0
def on_message(client, userdata, msg):
    print ('Topic: ', msg.topic, '\nMessage: ', str(msg.payload))
    print("Peter:" + str(msg.payload))
    arr = [x.strip() for x in str(msg.payload).split(',')]
    devId = (arr[0])[2:]
    tmStmp = arr[1]
    x = arr[2]
    y = arr[3]
    z = arr[4]
    lat = arr[5]
    long = arr[6]
    dm = DataManager()
    dm.insertDeviceData(devId,tmStmp,x,y,z,lat,long)
    return
    def test_split_merge_csv_7_7_23(self):

         file_loader = FileLoader()
         data_manager = DataManager()
         file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
         result = file_loader.load_file(file_path)
         data_manager.set_data(result)
         data_manager.split_data(test_split=0.19,train_split=0.62)


         valid_and_test_shapes = (7, 397)
         train_shapes = (23, 397)
         expected = np.array([valid_and_test_shapes, valid_and_test_shapes, train_shapes])
         result = np.array([data_manager.datum[SplitTypes.Test].shape, data_manager.datum[SplitTypes.Valid].shape, data_manager.datum[SplitTypes.Train].shape])
         self.assertTrue(np.array_equal(result, expected))
    def test_experiment_sum_of_squares_real37_test(self):
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        loaded_data = FileLoader.load_file(file_path)
        the_data_manager = DataManager()
        the_data_manager.set_data(loaded_data)
        the_model = svm.SVR()
        the_data_manager.split_data(test_split=0.19, train_split=0.62)
        exp = Experiment(the_data_manager, the_model)

        exp.run_experiment()
        sum_of_squares_test = exp.get_sum_of_squares(SplitTypes.Test)

        expected = 6.708898437500002

        self.assertAlmostEqual(expected, sum_of_squares_test)
Пример #18
0
class Test_1_DataManagerFunctionTest(unittest.TestCase):
    
    def setUp(self):
        self.dm = DataManager()
        
    def tearDown(self):
        self.dm.close()
    
    def test10_parseAddressTest(self):
        '''Tests whether a valid address object is returned on json decoded arg'''
        assert True
        
    def test20_pullTest(self):
        '''Tests whether we get a valid list[group[address]]'''
        assert True
 def test_split_into_target_and_input(self):
     file_loader = FileLoader()
     data_manager = DataManager()
     file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
     result = file_loader.load_file(file_path)
     data_manager.set_data(result)
     data_manager.split_data(test_split=0.11,train_split=0.22)
     test_shapes_input = np.zeros((4, 396)).shape
     valid_shapes_input = np.zeros((25,396)).shape
     train_shapes_input = np.zeros((8, 396)).shape
     test_shapes_target = np.zeros((4, )).shape
     valid_shapes_target = np.zeros((25,)).shape
     train_shapes_target = np.zeros((8, )).shape
     expected = np.array([test_shapes_input, valid_shapes_input, train_shapes_input, test_shapes_target, valid_shapes_target, train_shapes_target])
     result = np.array([data_manager.inputs[SplitTypes.Test].shape, data_manager.inputs[SplitTypes.Valid].shape, data_manager.inputs[SplitTypes.Train].shape, data_manager.targets[SplitTypes.Test].shape, data_manager.targets[SplitTypes.Valid].shape, data_manager.targets[SplitTypes.Train].shape])
     self.assertTrue(np.array_equal(result, expected))
Пример #20
0
 def __init__(self, config, logger):
     self.config = config
     self.logger = logger
     
     self.start_date = config.get_value('PORTFOLIO','startdate')
     self.end_date = config.get_value('PORTFOLIO', 'enddate')
     self.name = self.config.get_value('PORTFOLIO', 'name')
     self.prevent_overlaps = {}  ## used to disallow same ticker+markout overlapping
     
     ## Get the list of indicators from the config file, then start IndicatorLibrary
     self.list_of_user_indicators = [s.upper() for s in Util.str_to_list(config.get_value('STRATEGY', 'indicators'))]
     if not self.list_of_user_indicators:
         self.logger.critical("Unable to determine list of user indicators")
         sys.exit(1)
     self.strategy = IndicatorLibrary(self.list_of_user_indicators)
     
     self.list_of_markout_periods = []
     try:
         for x in Util.str_to_list(config.get_value('STRATEGY', 'markout_periods')):
             self.list_of_markout_periods.append(int(x))
     except:
         self.logger.critical("Non integer-type value provided in STRATEGY.markout_periods")
         sys.exit(1)
     max_markout_periods = max(self.list_of_markout_periods)
     
     max_historical_periods = self.strategy.periods_required()
     
   
     self.dm = DataManager(logger, self.start_date,self.end_date,max_historical_periods,max_markout_periods)
     self.__trade_log_fn()
Пример #21
0
 def __init__(self):
     """
         Initialize the program prompting the instruction to the program
     """
     self.printProgramInfos()
     self.datamanager = DataManager()
     self.dataexplorer = DataExplorer()
Пример #22
0
 def countTasks(self):
     """Count tasks statistics divided by projects"""
     self._data = DataManager.getByRange(self._fromDate, self._toDate)
     res = {}
     for date, task, projectName in self._data:
         if task == "__start__":
             self.timings.setPrevDate(None)
         
         spentSeconds = self.timings.count(date, Tasks.taskType(task))
         
         if Tasks.taskType(task) != "work":
             continue
             
         if spentSeconds:
             if projectName not in res:
                 res[projectName] = {}
                 
             if task not in res[projectName]:
                 res[projectName][task] = spentSeconds
             else:
                 res[projectName][task] += spentSeconds
     self._countAttrib([v for k in res for v in res[k].values()])
     if res:
         ret = {}
         for k in res.keys():
             ret[k] = sorted(res[k].iteritems(), key=lambda item:item[1], reverse=True)
         return ret
     else:
         return {}
 def calc(self, DataManager, ticker, date):
     quote_list = DataManager.get(ticker, date, -20)
     quote = quote_list.pop()
     hist_volume = []
     for q in quote_list[-20:]:
         hist_volume.append(q.volume)
     return quote.volume > mean(hist_volume) * 2
Пример #24
0
    def _countObject(self, objType, targetAction):
        """Generic function for calculating projects data or slacking statistics"""
        self._data = DataManager.getByRange(self._fromDate, self._toDate)
        res = {}

        for date, task, projectName in self._data:
            if task == "__start__":
                self.timings.setPrevDate(None)
            objKey = projectName if objType == "project" else task
            
            spentSeconds = self.timings.count(date, Tasks.taskType(task))

            if Tasks.taskType(task) != targetAction:
                self.timings.setPrevDate(date)
                continue

            if spentSeconds:
                if objKey not in res:
                    res[objKey] = spentSeconds
                else:
                    res[objKey] += spentSeconds
                    
        self._countAttrib(res.values())
        if res:
            return sorted(res.iteritems(), key=lambda item:item[1], reverse=True)
        else:
            return []
Пример #25
0
 def DataExplorerUserInputLoop(self):
     """
         Create a loop asking the user which action he or she wants to take. The loop is break (and the program ends) whenever the user type quit.
     """
     userInput=""
     try:
         while userInput != "quit":
             self.printDataExploreOptions()
             userInput = raw_input("\nPlease provide the input : ")
             if userInput == "1":
                 DataExplorer.generalAnalysis(DataManager.cleaned_data,
                                              DataManager.binaryTree(DataManager.cleaned_data))
             elif userInput == "2":
                 DataExplorer.printVideoCategories()
                 userInputVideoCatagory = raw_input("\nPlease provide the number of the Video Catagory : ")
                 DataExplorer.individual_videocatagory_analysis(DataManager.cleaned_data , userInputVideoCatagory)
             elif userInput == "3":
                 DataExplorer.printCategories()
                 userInputfeature = raw_input("\nPlease provide the number of the feature : ")
                 DataExplorer.individual_feature_analysis(DataManager.cleaned_data,userInputfeature)
             elif userInput == "4":
                 self.InitiateFlow()
             elif userInput == "quit":
                 self.ExitProgram()
     except KeyboardInterrupt:
         print "quitting..."
         sys.exit()
Пример #26
0
 def setUp(self):            
     self.af = FeedRef((FeatureType.ADDRESS,FeedType.FEATURES))
     self.ac = FeedRef((FeatureType.ADDRESS,FeedType.CHANGEFEED))
     self.ar = FeedRef((FeatureType.ADDRESS,FeedType.RESOLUTIONFEED))
     self.aff = FeatureFactory.getInstance(self.af)
     self.afc = FeatureFactory.getInstance(self.ac)
     self.afr = FeatureFactory.getInstance(self.ar)
     self.dm = DataManager()
 def calc(self, DataManager, ticker, date):
     quote_list = DataManager.get(ticker, date, -200)
     quote_today = quote_list.pop()
     quote_yesterday = quote_list.pop()
     hist_close = []
     for q in quote_list[-200:]:
         hist_close.append(q.close)
     return quote_today.close > mean(hist_close) & quote_yesterday.close < mean(hist_close)
 def calc(self, DataManager, ticker, date):
     quote_list = DataManager.get(ticker, date, -260)
     quote = quote_list.pop()
     is_new_high = True
     for q in quote_list[-260:]:
         if quote.close < q.close:
             is_new_high = False
     return is_new_high
Пример #29
0
  def playGames( pDecisionMaker=None, pPlayerCodes=[], pNumRounds=1000, pOutPath='.' ):

    outPath = pOutPath

    DataManager.initSettings()
    DataManager.settings['gameResultsDbPath'] = "%s/games.db"%(outPath)
    DataManager.createGameDb()

    game = Game()
    if pDecisionMaker != None:
      game.decisionMaker = pDecisionMaker

    playerCodes = pPlayerCodes
    if len(playerCodes) == 0:
      for card in game.playerCardDeck:
        playerCodes.append( card.code )

    playerCodes = sorted(playerCodes)

    scores = []
    playerScores = {}
    DataManager.clearGameLogDb()
    for code in playerCodes:
      playerScores[code] = []
      for j in range(pNumRounds):
        if j%10 == 0:
          print "%s: Round %d"%(code,j)
        game.resetGame()
        game.addPlayer( code )
        while game.isNextStep():
          game.performNextStep( game.decisionMaker.makeDecision( game, game.nextStepAvailableActions() ) )
        DataManager.insertGameLogIntoDb(game.gameLog)
        scores.append( game.players[0].points() )
        playerScores[code].append( game.players[0].points() )

    DataManager.closeConnection("gameConn")

    line = "Avg Score: %.2f"%( sum(scores) / float(len(scores) ) )
    fileOut  = open( "%s/results.txt"%(outPath), 'wb' )
    fileOut.write( line+'\n' )
    print line
    for player in sorted(playerScores):
      line = "%s Avg Score: %.2f"%(player, sum(playerScores[player]) / float(len(playerScores[player]))) 
      print line
      fileOut.write( line+'\n' )
Пример #30
0
 def setUp(self):    
     self.dm = DataManager(ref_int)
     self.af = FeedRef((FeatureType.ADDRESS,FeedType.FEATURES))
     self.ac = FeedRef((FeatureType.ADDRESS,FeedType.CHANGEFEED))
     self.ar = FeedRef((FeatureType.ADDRESS,FeedType.RESOLUTIONFEED))
     
     self.afc = FeatureFactory.getInstance(self.ac)
     self.afr = FeatureFactory.getInstance(self.ar)
     self.addr_r = _getTestAddress(af[FeedType.FEATURES])
Пример #31
0
def easyBuildDataManager(load=False, save=True):
    '''
    method helper for building data manager
    '''
    dm = DataManager()
    if (load):
        dm.load(DataManager.DEFAULT_BACKUP_FILENAME)
    else:
        gfns = GoogleFinanceNewsSource()
        gfms = GoogleFinanceMarketSource()
        rns = ReutersNewsSource('/home/droz/corpus/headlines-docs.csv')
        dm.addNewsSource(gfns)
        dm.addNewsSource(rns)
        dm.setMarketSource(gfms)
        if (save):
            dm.save(DataManager.DEFAULT_BACKUP_FILENAME)
    return dm
Пример #32
0
conn = psycopg2.connect(
    database=url.path[1:],
    user=url.username,
    password=url.password,
    host=url.hostname,
    port=url.port
)
"""

##########################################
# Init bot.
##########################################

from DataManager import DataManager
data_manager = DataManager(conn)

from CianCianBot import CianCianBot
bot = CianCianBot(data_manager)

##########################################
# Init flask backend and linebot facility.
##########################################

from flask import Flask, request, abort

from linebot import (LineBotApi, WebhookHandler)
from linebot.exceptions import (InvalidSignatureError)
from linebot.models import (
    MessageEvent,
    TextMessage,
Пример #33
0
if FLAGS.model == "ar":
    from Models.AR import Model
elif FLAGS.model == "lm":
    from Models.LM import Model
elif FLAGS.model == "sar":
    from Models.SAR import Model
else:
    raise EOFError

train_dir = FLAGS.train_dir + "/" + FLAGS.model
if not os.path.exists(FLAGS.train_dir):
    os.mkdir(FLAGS.train_dir)
if not os.path.exists(train_dir):
    os.mkdir(train_dir)

dataManager = DataManager()
num_words, num_idioms = dataManager.get_num()
word_embed_matrix, idiom_embed_matrix = dataManager.get_embed_matrix()


def prepare_batch_data(document, candidates, ori_labels, ori_locs):
    # padding docs
    batch_size = len(document)
    doc_length = [len(doc) for doc in document]
    max_length = max(doc_length)
    mask = np.zeros((batch_size, max_length), dtype=np.float32)
    for i in range(batch_size):
        document[i] = document[i] + [0] * (max_length - doc_length[i])
        mask[i, :doc_length[i]] = 1
    document = np.array(document, dtype=np.int32)
    doc_length = np.array(doc_length, dtype=np.int32)
Пример #34
0
def delete(node,row_id):
    return jsonify(DataManager().delete(node,row_id))
Пример #35
0
def create(node):
    if not request.json:
        abort(400)

    return jsonify(DataManager().add_row(node,request.json))
Пример #36
0
               action="store_true",
               help="Plot % of active cases of population")
args = p.parse_args()

if args.all:
    args.active = True
    args.recovered = True
    args.deaths = True
    args.population_percent = True

logger = Logger("log", autosave=True)

if not args.summary and not args.summary_only and not (
        args.active or args.recovered or args.deaths
        or args.population_percent):
    logger.warning(
        "No output specified (active/recovered etc.). Use the -h option to get more information."
    )
    exit(0)

manager = DataManager(logger, args.countries, True)

if args.summary_only:
    manager.load_summary()
    print_summary()
    exit(0)
elif args.summary:
    manager.load_summary()
    print_summary()

present_history(args.countries)
Пример #37
0
tf.flags.DEFINE_boolean("allow_soft_placement", True,
                        "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Data Preparation
# ==================================================

dataManager = DataManager()

# Load data
print("Loading training data...")
x_text, y, _ = dataManager.load_training_data()
print("Finish loading data")

x = []
for data in x_text:
    a = 100 - len(data)
    if a > 0:
        front = a / 2
        back = a - front
        front_vec = [
            np.zeros(dataManager.wordvector_dim + 2) for j in range(front)
        ]
Пример #38
0
class OperateProcess(multiprocessing.Process):
    def __init__(self, name):
        multiprocessing.Process.__init__(self)  # 重构了Process类里面的构造函数
        self.name = name
        self.record = {}  # {"ean":[price, count]}

    def exceptHandler(self, info):
        info = time.strftime("%Y-%m-%d %H:%M:%S") + "\n" + info
        print(info)
        self.debug_file.write(info)
        self.debug_file.flush()

    def run(self):  # 固定用run方法,启动进程自动调用run方法
        self.debug_file = open(self.name + ".debuginfo", "a")
        self.database = DataManager(self.name)
        printYellow("启动后台改价任务")
        while 1:
            chrome_dir = "../chrome_url.txt"
            f = open(chrome_dir, "r")  # 设置文件对象
            str = f.read()  # 将txt文件的所有内容读入到字符串str中
            f.close()  # 将文件关闭
            option = webdriver.ChromeOptions()
            option.add_argument("--user-data-dir=" + os.path.abspath(str))
            option.add_argument('--no-sandbox')
            option.add_argument('--disable-dev-shm-usage')
            # option.add_argument("headless")
            option.add_argument('ignore-certificate-errors')
            option.add_argument('log-level=3')
            option.add_argument('lang=zh_CN.UTF-8')
            prefs = {
                'profile.default_content_setting_values': {
                    'images': 2,
                    'stylesheet': 2,
                }
            }
            option.add_experimental_option('prefs', prefs)
            self.chrome = webdriver.Chrome(executable_path=CHROME_DRIVER_PATH, chrome_options=option)
            self.chrome.maximize_window()
            try:
                self.LoginAccount()
            except:
                self.exceptHandler(traceback.format_exc())
                self.chrome.quit()
                self.database.handlerStatus()
                continue

    def LoginAccount(self):
        self.database.handlerStatus()
        printYellow("后台:登录账户")
        account, password = self.database.getAccountAndPassword()

        self.chrome.get("https://uae.souq.com/ae-en/login.php")
        try:
            elemNewAccount = self.chrome.find_element_by_id("email")
            elemNewLoginBtn = self.chrome.find_element_by_id("siteLogin")
            elemNewAccount.send_keys(account)
            print("输入账户:" + account)
            elemNewLoginBtn.click()
            print("点击siteLogin")
            try:
                cssSelectText = "#continue"
                WebDriverWait(self.chrome, 10, 0.5).until(EC.presence_of_element_located((By.CSS_SELECTOR, cssSelectText)))
                print("获取到continue按钮")
                elemContinue = self.chrome.find_element_by_id("continue")
                elemContinue.click()
                print("点击continue")
                cssSelectText = "#ap_password"
                WebDriverWait(self.chrome, 20, 0.5).until(EC.presence_of_element_located((By.CSS_SELECTOR, cssSelectText)))
                print("获取到password输入框")
                elemPassword = self.chrome.find_element_by_id("ap_password")
                elemLoginBtn = self.chrome.find_element_by_id("signInSubmit")
                elemPassword.send_keys(Keys.CONTROL + "a")
                elemPassword.send_keys(password)
                print("输入密码:********")
                elemLoginBtn.click()
                print("点击continue")
            except:
                print("方式一登录失败,尝试方式二登录")
                cssSelectText = "#password"
                WebDriverWait(self.chrome, 20, 0.5).until(EC.presence_of_element_located((By.CSS_SELECTOR, cssSelectText)))
                print("获取到password输入框")
                elemPassword = self.chrome.find_element_by_id("password")
                elemLoginBtn = self.chrome.find_element_by_id("siteLogin")
                elemPassword.clear()
                elemPassword.send_keys(password)
                print("输入密码:********")
                elemLoginBtn.click()
                print("点击登录")

            cssSelectText = "#search_box"
            WebDriverWait(self.chrome, 20, 0.5).until(EC.presence_of_element_located((By.CSS_SELECTOR, cssSelectText)))
        except:
            if str(self.chrome.current_url).find("uae.souq.com/ae-en/account.php") < 0:
                raise
        while 1:
            try:
                ret = self.NewInventory()
                if ret == -1:
                    return -1
            except:
                raise

    def NewInventory(self):
        if not self.database.shopLock():
            printYellow("后台:已经超出店铺数量限制")
            self.database.setStopStatus()
            while True:
                time.sleep(6000)
        printYellow("后台:打开改价页面")
        self.loginHandler = self.chrome.current_window_handle
        unknownHandler = ""
        for handler in self.chrome.window_handles:
            if handler != self.loginHandler:
                unknownHandler = handler
                break
        readyUri = "https://sell.souq.com/fbs-inventory"
        js = 'window.open("' + readyUri + '")'
        self.chrome.execute_script(js)
        handlers = self.chrome.window_handles
        for handler in handlers:
            if handler != self.loginHandler and handler != unknownHandler:
                self.inventoryFbsHandler = handler
                break

        readyUri = "https://sell.souq.com/inventory/inventory-management"
        js = 'window.open("' + readyUri + '")'
        self.chrome.execute_script(js)
        handlers = self.chrome.window_handles
        for handler in handlers:
            if handler != self.loginHandler and handler != unknownHandler and handler != self.inventoryFbsHandler:
                self.inventoryHandler = handler
                break
        printYellow("后台:开始改价")
        while 1:
            try:
                ret = self.OperateProductSelenium()
                if ret == -2:
                    printYellow("后台:未获取到搜索框,将刷新界面")
                    self.chrome.refresh()
                elif ret == -1:
                    return -1
            except:
                self.exceptHandler(traceback.format_exc())
                self.chrome.refresh()
                continue

    def OperateProductSelenium(self):
        while True:
            self.database.handlerStatus()
            time.sleep(1)
            ean, price, variant_name, is_fbs = self.database.getFirstNeedChangeItem()
            if ean == "ean" and price == "price":
                continue

            if is_fbs == 1:
                self.chrome.switch_to.window(self.inventoryFbsHandler)
                out = time.strftime("%Y-%m-%d %H:%M:%S") + " " + ean + "[fbs]\t" + str(round(price, 2))
            else:
                self.chrome.switch_to.window(self.inventoryHandler)
                out = time.strftime("%Y-%m-%d %H:%M:%S") + " " + ean + "[]\t" + str(round(price, 2))

            WebDriverWait(self.chrome, 240, 0.5).until(self.checkPage)
            change_count, flag = self.database.isLowerThanMaxTimes(ean, variant_name)
            if not flag:
                out += "[" + str(change_count) + "次]"
                printRed("后台:" + out + "\t达到最大改价次数")
                self.database.finishOneChangeItem(ean, price, variant_name)
                continue

            try:
                elemInput = self.chrome.find_elements_by_xpath(".//div[@class='row collapse advanced-search-container']//input")
                elemSearch = self.chrome.find_elements_by_xpath(".//a[@class='button postfix']")
                if not (len(elemInput) > 0 or len(elemSearch) > 0):
                    return -2
                oldEan = elemInput[0].get_attribute("value")
                elemInput[0].clear()
                elemInput[0].send_keys(ean)
                self.chrome.execute_script("arguments[0].click()", elemSearch[0])
                count = 0
                if ean != oldEan:
                    while count < 8:
                        elemLoading = self.chrome.find_element_by_xpath(".//div[@class='filterView']/div[3]")
                        if elemLoading.get_attribute("loading") == "1":
                            break
                        time.sleep(0.5)
                        count += 1
                    time.sleep(1)
                    count = 0
                    while count < 14:
                        elemLoading = self.chrome.find_element_by_xpath(".//div[@class='filterView']/div[3]")
                        if elemLoading.get_attribute("loading") == "0":
                            break
                        time.sleep(0.5)
                        count += 1

                time.sleep(1.5)
                elemProduct = self.chrome.find_elements_by_xpath(".//table[@id='table-inventory']/tbody/tr[1]/td[4]")
                if len(elemProduct) <= 0 or count >= 14:
                    printRed("后台:" + out + "\t没找到这个产品")
                    self.database.finishOneChangeItem(ean, price, variant_name)
                    continue
                self.chrome.execute_script("arguments[0].click()", elemProduct[0])

                elemPriceInput = self.chrome.find_elements_by_xpath(".//input[@id='editableInput']")
                while len(elemPriceInput) <= 0:
                    elemPriceInput = self.chrome.find_elements_by_xpath(".//input[@id='editableInput']")
                if len(elemPriceInput) <= 0:
                    printRed("后台:" + out + "\t无法获取产品的价格修改控件")
                    self.database.finishOneChangeItem(ean, price, variant_name)
                    continue
                old_price = price + 1
                elemPriceInput[0].clear()
                elemPriceInput[0].send_keys(str(price))
                elemBtn = self.chrome.find_elements_by_xpath(".//a[@class='tiny accept-btn']")
                if len(elemBtn) <= 0:
                    printRed("后台:" + out + "\t无法修改价格确定按钮")
                    self.database.finishOneChangeItem(ean, price, variant_name)
                    continue
                time_change = time.strftime("%Y-%m-%d %H:%M:%S")
                self.chrome.execute_script("arguments[0].click()", elemBtn[0])
                self.database.addAChange(ean, variant_name, old_price, price)
                self.database.addChangeRecord(ean, variant_name, time_change, price)
                out += "[" + str(change_count + 1) + "次]"
                printYellow("后台:" + out + "\t改价成功")
                self.database.finishOneChangeItem(ean, price, variant_name)
            except:
                self.exceptHandler(traceback.format_exc())
                self.database.finishOneChangeItem(ean, price, variant_name)
                return -2

    def checkPage(self, driver):
        checkPageFinishScript = "try {if (document.readyState !== 'complete') {return false;} if (window.jQuery) { if (" \
                                "window.jQuery.active) { return false; } else if (window.jQuery.ajax && " \
                                "window.jQuery.ajax.active) { return false; } } if (window.angular) { if (!window.qa) { " \
                                "window.qa = {doneRendering: false }; } var injector = window.angular.element(" \
                                "'body').injector(); var $rootScope = injector.get('$rootScope'); var $http = " \
                                "injector.get('$http'); var $timeout = injector.get('$timeout'); if ($rootScope.$$phase " \
                                "=== '$apply' || $rootScope.$$phase === '$digest' || $http.pendingRequests.length !== 0) " \
                                "{ window.qa.doneRendering = false; return false; } if (!window.qa.doneRendering) { " \
                                "$timeout(function() { window.qa.doneRendering = true;}, 0); return false;}} return " \
                                "true;} catch (ex) {return false;} "
        return driver.execute_script(checkPageFinishScript)
Пример #39
0
parser.add_argument('--interval', type=int, default=10)

# 解析设置的参数
args, _ = parser.parse_known_args(argv)

# 配置日志文件格式
logging.basicConfig(
    filename=('log/%s.log' % args.name) * (1 - args.screen),
    level=logging.DEBUG,
    format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',
    datefmt='%H:%M:%S')

# 加载语料文本,情感词,否定词,强度词文本
dm = DataManager(
    args.dataset, {
        'negation': 'negation.txt',
        'intensifier': 'intensifier.txt',
        'sentiment': 'sentiment.txt'
    })

# 从原始语料提取各类别词语
dm.gen_word_list()
# 将词语转成数值列表,构建出训练、验证和测试集
dm.gen_data()

# 构建模型
model = Model(dm.words, dm.grained, argv)
# 实例化评价器
Evaluator = EvaluatorList[dm.grained]


def do_train(label, data):
Пример #40
0
import Logger
from DataManager import DataManager
import os

logger_name = "UpdateDBClasses"
Logger.setup(logger_name)
file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "Generated/DatabaseClasses.py")
manager = DataManager(logger_name, email=False)
manager.update_classes_file(file_path)
Пример #41
0
    def load_dataset(self):
        dataManager = DataManager(self.dataset_path)

        self.data = dataManager.GetData()
Пример #42
0
def validate():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    TAG_EMBEDDING_DIM = 64
    VAL_EMBEDDING_DIM = 64
    HIDDEN_DIM = 1500
    NUM_EPOCHS = 2  # 8
    LAYER_NUM = 1
    BATCH_SIZE = 256

    data_manager = DataManager(TRAIN)

    tag_to_idx, idx_to_tag = data_manager.get_tag_dicts()
    val_to_idx, idx_to_val = data_manager.get_val_dicts()

    # ad hoc adding of UNKOWN
    val_to_idx["UNK"] = len(val_to_idx)
    idx_to_val[len(val_to_idx) - 1] = "UNK"

    train_split_idx = int(len(data_manager.get_data()) * 0.05)
    validate_split_idx = int(len(data_manager.get_data()) * 0.07)

    data_val = torch.Tensor([(
        tag_to_idx[(tag, have_children, have_sibling)],
        val_to_idx.get(val, val_to_idx["UNK"]),
    ) for tag, val, have_children, have_sibling in (
        data_manager.get_data()[train_split_idx:validate_split_idx])])

    val_data = torch.utils.data.DataLoader(
        Dataset(data_val),
        BATCH_SIZE,
        shuffle=False,
        drop_last=True,
        num_workers=0,
        pin_memory=True,
    )

    model_tag = torch.load("D://data//budala_16.pickle")

    # model_val = LSTMValue(
    #     VAL_EMBEDDING_DIM, HIDDEN_DIM, len(val_to_idx), len(val_to_idx), LAYER_NUM
    # )
    loss_function = nn.NLLLoss()
    optimizer_tag = optim.Adam(model_tag.parameters())
    # optimizer_val = optim.Adam(model_val.parameters())

    # -----------putting everything on GPU---------
    model_tag.cuda()

    for epoch in range(NUM_EPOCHS):
        start_time = time.time()
        summary_writer = SummaryWriter()

        model_tag.eval()
        # model_val.eval()
        correct_tag = 0
        # correct_val = 0

        loss_sum_tag = 0
        # loss_sum_val = 0

        cnt = 0

        ep_cnt = 0
        with torch.no_grad():
            for i, (sentence, y) in tqdm(
                    enumerate(val_data),
                    total=len(val_data),
                    desc=f"Epoch: {epoch}",
                    unit="batches",
            ):

                global_step_val = epoch * len(val_data) + i
                sentence_tag = sentence[:, :, 0].to(device)
                y_tag = y[:, 0].to(device)
                y_pred_tag = model_tag(sentence_tag)

                # sentence_val = sentence[:, :, 1].to(device)
                # y_val = y[:, 1].to(device)
                # y_pred_val = model_val(sentence_val)

                correct_tag += (y_pred_tag.argmax(dim=1) == y_tag).sum().item()
                # correct_val += (y_pred_val.argmax(dim=1) == y_val).sum().item()

                loss_tag = loss_function(y_pred_tag, y_tag.long())
                # loss_val = loss_function(y_pred_val, y_val.long())

                summary_writer.add_scalar("validation_loss_tag", loss_tag,
                                          global_step_val)
                # summary_writer.add_scalar("validation_loss_val", loss_val, global_step_val)
                loss_sum_tag += loss_tag
                # loss_sum_val += loss_val

                ep_cnt += 1
                cnt += y_tag.size(0)

            print(
                f"Validation tag: loss {loss_sum_tag/ep_cnt}, accuracy:{100*correct_tag/cnt}"
            )
            # print(
            #     f"Validation val: loss {loss_sum_val/ep_cnt}, accuracy:{100*correct_val/cnt}"
            # )
        print(f"Epoch ended, time taken {time.time()-start_time}s")
Пример #43
0
def train():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    TAG_EMBEDDING_DIM = 100
    VAL_EMBEDDING_DIM = 64
    HIDDEN_DIM = 1500
    NUM_EPOCHS = 2  # 8
    LAYER_NUM = 1
    BATCH_SIZE = 256

    data_manager = DataManager(TRAIN)

    d = data_manager.get_data()

    tag_to_idx, idx_to_tag = data_manager.get_tag_dicts()
    val_to_idx, idx_to_val = data_manager.get_val_dicts()

    # ad hoc adding of UNKOWN
    val_to_idx["UNK"] = len(val_to_idx)
    idx_to_val[len(val_to_idx) - 1] = "UNK"

    train_split_idx = int(len(data_manager.get_data()) * 0.9)
    validate_split_idx = int(len(data_manager.get_data()) * 0.92)
    data_train = torch.Tensor([(
        tag_to_idx[(tag, have_children, have_sibling)],
        val_to_idx.get(val, val_to_idx["UNK"]),
    ) for tag, val, have_children, have_sibling in (
        data_manager.get_data()[:train_split_idx])])
    data_val = torch.Tensor([(
        tag_to_idx[(tag, have_children, have_sibling)],
        val_to_idx.get(val, val_to_idx["UNK"]),
    ) for tag, val, have_children, have_sibling in (
        data_manager.get_data()[train_split_idx:validate_split_idx])])

    training_data = torch.utils.data.DataLoader(Dataset(data_train),
                                                BATCH_SIZE,
                                                shuffle=True,
                                                drop_last=True,
                                                num_workers=0)

    # test_data = None

    val_data = torch.utils.data.DataLoader(Dataset(data_val),
                                           BATCH_SIZE,
                                           shuffle=False,
                                           drop_last=True,
                                           num_workers=0)

    model_tag = LSTMTagger(TAG_EMBEDDING_DIM, HIDDEN_DIM, len(tag_to_idx),
                           len(tag_to_idx), LAYER_NUM)
    # model_val = LSTMValue(
    #     VAL_EMBEDDING_DIM, HIDDEN_DIM, len(val_to_idx), len(val_to_idx), LAYER_NUM
    # )
    loss_function = nn.NLLLoss()
    optimizer_tag = optim.SGD(model_tag.parameters(), 0.001)
    # optimizer_val = optim.Adam(model_val.parameters())

    # -----------putting everything on GPU---------
    model_tag.cuda()
    # model_val.cuda()
    # ---------------------------------------------

    model_iter = 1

    for epoch in range(NUM_EPOCHS):

        summary_writer = SummaryWriter()

        model_tag.train()
        # model_val.train()
        start_time = time.time()
        cnt = 0
        for i, (sentence, y) in tqdm(
                enumerate(training_data),
                total=len(training_data),
                desc=f"Epoch: {epoch}",
                unit="batches",
        ):
            global_step = epoch * len(training_data) + i
            size = int(sentence.size(0))

            model_tag.zero_grad()
            # model_val.zero_grad()

            sentence_tag = sentence[:, :, 0].to(device)
            y_tag = y[:, 0].to(device)

            # sentence_val = sentence[:, :, 1].to(device)
            # y_val = y[:, 1].to(device)

            y_pred_tag = model_tag(sentence_tag)
            # y_pred_val = model_val(sentence_val)

            correct_tag = (y_pred_tag.argmax(dim=1) == y_tag).sum().item()
            # correct_val = (y_pred_val.argmax(dim=1) == y_val).sum().item()

            # long treba jer y_tag treba da predstavlja rešenje
            loss_tag = loss_function(y_pred_tag, y_tag.long())
            # loss_val = loss_function(y_pred_val, y_val.long())

            summary_writer.add_scalar("Tag train loss", loss_tag, global_step)
            summary_writer.add_scalar("Tag accuracy",
                                      100 * (correct_tag / size), global_step)
            # summary_writer.add_scalar("Val train loss", loss_val, global_step)
            # summary_writer.add_scalar(
            # "Val accuracy", 100 * (correct_val / size), global_step
            # )

            loss_tag.backward()
            # loss_val.backward()

            nn.utils.clip_grad_value_(model_tag.parameters(), 5.0)
            # nn.utils.clip_grad_value_(model_val.parameters(), 5.0)

            optimizer_tag.step()
            # optimizer_val.step()

            if i % 5000 == 0:
                torch.save(model_tag, f"{DATA_ROOT}budala_{model_iter}.pickle")
                model_iter += 1

        model_tag.eval()
        # model_val.eval()

        correct_tag = 0
        correct_val = 0

        loss_sum_tag = 0
        loss_sum_val = 0

        cnt = 0

        ep_cnt = 0
        with torch.no_grad():
            for i, (sentence, y) in tqdm(
                    enumerate(val_data),
                    total=len(val_data),
                    desc=f"Epoch: {epoch}",
                    unit="batches",
            ):
                global_step_val = epoch * len(val_data) + i

                sentence_tag = sentence[:, :, 0].to(device)
                y_tag = y[:, 0].to(device)
                y_pred_tag = model_tag(sentence_tag)

                # sentence_val = sentence[:, :, 1].to(device)
                # y_val = y[:, 1].to(device)
                # y_pred_val = model_val(sentence_val)

                correct_tag += (y_pred_tag.argmax(dim=1) == y_tag).sum().item()
                # correct_val += (y_pred_val.argmax(dim=1) == y_val).sum().item()

                loss_tag = loss_function(y_pred_tag, y_tag.long())
                # loss_val = loss_function(y_pred_val, y_val.long())

                summary_writer.add_scalar("validation_loss_tag", loss_tag,
                                          global_step_val)
                # summary_writer.add_scalar("validation_loss_val", loss_val, global_step_val)
                loss_sum_tag += loss_tag
                # loss_sum_val += loss_val

                ep_cnt += 1
                cnt += y_tag.size(0)

            print(
                f"Validation tag: loss {loss_sum_tag/ep_cnt}, accuracy:{100*correct_tag/cnt}"
            )
            # print(
            #     f"Validation val: loss {loss_sum_val/ep_cnt}, accuracy:{100*correct_val/cnt}"
            # )
        print(f"Epoch ended, time taken {time.time()-start_time}s")

    torch.save(model_tag, "D://data//first_model_tag.pickle")
Пример #44
0
            return self.predictions[0][now_time]
        else:
            return self.predictions[now_time]


if __name__ == '__main__':
    import yaml
    import sys

    sys.path.insert(0, '..')
    from DataManager import DataManager
    from xbos import get_client

    with open("../config_file.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)

    with open("../Buildings/ciee/ZoneConfigs/HVAC_Zone_Eastzone.yml", 'r') as ymlfile:
        advise_cfg = yaml.load(ymlfile)

    if cfg["Server"]:
        c = get_client(agent=cfg["Agent_IP"], entity=cfg["Entity_File"])
    else:
        c = get_client()

    dm = DataManager(cfg, advise_cfg, c, "HVAC_Zone_Eastzone")

    occ = Occupancy(dm.preprocess_occ(), 15, 4, 4, advise_cfg["Advise"]["Occupancy_Sensors"])
    for i in range(10):
        print "Intervals ahead: " + str(i)
        print occ.occ(i)
def train():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    TAG_EMBEDDING_DIM = 64
    VAL_EMBEDDING_DIM = 128
    HIDDEN_DIM = 1500
    NUM_EPOCHS = 2
    LAYER_NUM = 1
    BATCH_SIZE = 256

    data_manager_train = DataManager(TRAIN)
    data_manager_eval = DataManager(TEST)
    warnings.filterwarnings("ignore")

    tag_to_idx, idx_to_tag = data_manager_train.get_tag_dicts()
    val_to_idx, idx_to_val = data_manager_train.get_val_dicts()

    validate_split_idx = int(len(data_manager_eval.get_data()) *
                             0.04)  # 2000 za eval

    data_train = torch.Tensor([(
        tag_to_idx.get((tag, have_children, have_sibling), tag_to_idx["UNK"]),
        val_to_idx.get(val, val_to_idx["UNK"]),
    ) for tag, val, have_children, have_sibling in (
        data_manager_train.get_data())])

    data_eval = torch.Tensor([(
        tag_to_idx.get((tag, have_children, have_sibling), tag_to_idx["UNK"]),
        val_to_idx.get(val, val_to_idx["UNK"]),
    ) for tag, val, have_children, have_sibling in (
        data_manager_eval.get_data()[:validate_split_idx])])

    train_data_loader = torch.utils.data.DataLoader(Dataset(data_train),
                                                    BATCH_SIZE,
                                                    shuffle=True,
                                                    drop_last=True,
                                                    num_workers=8)

    eval_data_loader = torch.utils.data.DataLoader(Dataset(data_eval),
                                                   BATCH_SIZE,
                                                   shuffle=False,
                                                   drop_last=True,
                                                   num_workers=8)

    model_tag = nn.DataParallel(
        AtentionModel(len(tag_to_idx), len(val_to_idx), TAG_EMBEDDING_DIM,
                      VAL_EMBEDDING_DIM, HIDDEN_DIM, LAYER_NUM, False))

    model_val = nn.DataParallel(
        AtentionModel(len(tag_to_idx), len(val_to_idx), TAG_EMBEDDING_DIM,
                      VAL_EMBEDDING_DIM, HIDDEN_DIM, LAYER_NUM, True))

    #model = torch.load(f"D://data//model_attention_1.pickle")
    loss_function = nn.NLLLoss()
    optimizer_tag = optim.Adam(model_tag.parameters())
    optimizer_val = optim.Adam(model_val.parameters())

    # -----------putting models on GPU-------------
    model_tag.cuda()
    model_val.cuda()
    # ---------------------------------------------

    model_iter = 1

    # Sluzi za Tensorboard
    summary_writer = SummaryWriter()

    for epoch in range(NUM_EPOCHS):

        model_tag.train()
        model_val.train()

        for i, (sentence, y) in tqdm(
                enumerate(train_data_loader),
                total=len(train_data_loader),
                desc=f"Epoch: {epoch}",
                unit="batches",
        ):
            global_step = epoch * len(train_data_loader) + i
            size = int(sentence.size(0))

            model_tag.zero_grad()
            model_val.zero_grad()
            model_tag.train()
            model_val.train()

            unk_idx = val_to_idx["UNK"]
            mask_unk = y[:,
                         1] != unk_idx  # mask for all y val that are not UNK

            sentence_tag = sentence.to(device)
            y_pred_tag = model_tag(sentence_tag)
            y = y.to(device)

            correct_tag = (y_pred_tag.argmax(dim=1) == y[:, 0]).sum().item()
            loss_tag = loss_function(y_pred_tag, y[:, 0].long())

            summary_writer.add_scalar("model_tag: train loss", loss_tag,
                                      global_step)
            summary_writer.add_scalar("model_tag: accuracy",
                                      100 * (correct_tag / size), global_step)

            loss_tag.backward()
            nn.utils.clip_grad_value_(model_tag.parameters(), 5.0)
            optimizer_tag.step()

            loss_val = 0
            if mask_unk.sum() > 0:
                # do forward for val_model
                sentence_val = sentence[mask_unk, :, :].to(device)
                y_pred_val = model_val(sentence_val)
                y = y.to(device)

                correct_val = (y_pred_val.argmax(dim=1) == y[mask_unk,
                                                             1]).sum().item()
                loss_val = loss_function(y_pred_val, y[mask_unk, 1].long())

                summary_writer.add_scalar("model_value: train loss", loss_val,
                                          global_step)
                summary_writer.add_scalar("model_value: train accuracy",
                                          100 * (correct_val / size),
                                          global_step)

                loss_val.backward()
                nn.utils.clip_grad_value_(model_val.parameters(), 5.0)
                optimizer_val.step()

            if (i + 1) % 200 == 0:
                tag = f"TRAIN tag accuracy: {100 * (correct_tag / size)}, tag loss: {loss_tag}, "
                val = f"val accuracy: {100 * (correct_val / size)}, val loss: {loss_val}\n"

                with open(f'{DATA_ROOT}log.txt', 'a') as log:
                    log.write(tag)
                    log.write(val)

            TIME_FOR_EVAL = 2500
            if (i + 1) % TIME_FOR_EVAL == 0:
                #evaluation
                torch.save(
                    model_tag,
                    f"D://data//models//tag//budala_{model_iter}.pickle")
                torch.save(
                    model_val,
                    f"D://data//models//val//budala_{model_iter}.pickle")
                model_iter += 1

                model_tag.eval()
                model_val.eval()

                correct_sum_tag = 0
                correct_sum_val = 0
                loss_sum_tag = 0
                loss_sum_val = 0
                size_sum_eval = 0

                with torch.no_grad():

                    for i_eval, (sentence_eval, y_eval) in tqdm(
                            enumerate(eval_data_loader),
                            total=len(eval_data_loader),
                            desc=f"Epoch eval: {global_step//TIME_FOR_EVAL}",
                            unit="batches",
                    ):
                        global_step_eval = (global_step // TIME_FOR_EVAL
                                            ) * len(eval_data_loader) + i_eval
                        size_eval = int(sentence_eval.size(0))
                        size_sum_eval += size_eval
                        sentence_eval = sentence_eval.to(device)

                        unk_idx = val_to_idx["UNK"]
                        mask_unk = y_eval[:, 1] != unk_idx

                        #tag
                        sentence_tag = sentence_eval.to(device)
                        y_pred_tag = model_tag(sentence_tag)
                        y_eval = y_eval.to(device)

                        correct_tag = (y_pred_tag.argmax(
                            dim=1) == y_eval[:, 0]).sum().item()
                        loss_tag = loss_function(y_pred_tag, y_eval[:,
                                                                    0].long())

                        correct_sum_tag += correct_tag
                        loss_sum_tag += loss_tag

                        summary_writer.add_scalar("model_tag: evaluation loss",
                                                  loss_tag, global_step_eval)
                        summary_writer.add_scalar(
                            "model_tag: evaluation accuracy",
                            100 * (correct_tag / size_eval), global_step_eval)

                        if mask_unk.sum() > 0:
                            sentence_eval = sentence_eval[mask_unk].to(device)
                            y_pred_val = model_val(sentence_eval)
                            y_eval = y_eval.to(device)

                            correct_val = (y_pred_val.argmax(
                                dim=1) == y_eval[mask_unk, 1]).sum().item()
                            loss_val = loss_function(
                                y_pred_val, y_eval[mask_unk, 1].long())

                            correct_sum_val += correct_val
                            loss_sum_val += loss_val

                            summary_writer.add_scalar(
                                "model_value: evaluation loss", loss_val,
                                global_step_eval)
                            summary_writer.add_scalar(
                                "model_value: evaluation accuracy",
                                100 * (correct_val / size_eval),
                                global_step_eval)

                    summary_writer.add_scalar(
                        "model_tag: average evaluation loss",
                        loss_sum_tag / len(eval_data_loader),
                        global_step // TIME_FOR_EVAL)
                    summary_writer.add_scalar(
                        "model_tag: average evaluation accuracy",
                        100 * (correct_sum_tag / size_sum_eval),
                        global_step // TIME_FOR_EVAL)

                    summary_writer.add_scalar(
                        "model_value: average evaluation loss",
                        loss_sum_val / len(eval_data_loader),
                        global_step // TIME_FOR_EVAL)
                    summary_writer.add_scalar(
                        "model_value: average evaluation accuracy",
                        100 * (correct_sum_val / size_sum_eval),
                        global_step // TIME_FOR_EVAL)

                    tag = f"EVAL: tag accuracy: {100 * (correct_sum_tag / size_sum_eval)}, tag loss: {loss_sum_tag/len(eval_data_loader)}, "
                    val = f"val accuracy: {100 * (correct_sum_val / size_sum_eval)}, val loss: {loss_sum_val/len(eval_data_loader)}\n"

                    with open(f'{DATA_ROOT}log.txt', 'a') as log:
                        log.write(tag)
                        log.write(val)
Пример #46
0
from PIL import Image
#torch
import torch
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms, models
from torch import nn

#parameters Loading
from AppParametersLoader import AppParametersLoader
parameters = AppParametersLoader()
parameters.print_all()

#Data Loading
from DataManager import DataManager
data_manager = DataManager()
data_manager.load_TrainTestValid(parameters.data_dir())

#model definition
from ModelManager import ModelManager
if parameters.arch() == 'vgg16':
    model = models.vgg16(pretrained=True)
    input_nodes = 25088
elif parameters.arch() == 'densenet121':
    model = models.densenet121(pretrained=True)
    input_nodes = 1024

classifier = nn.Sequential(
    nn.Linear(input_nodes, parameters.hidden_units()), nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(parameters.hidden_units(),
Пример #47
0
    parser.add_argument('--fast', type=int, choices=[0, 1], default=0)
    parser.add_argument('--screen', type=int, choices=[0, 1], default=0)
    parser.add_argument('--optimizer', type=str, default='ADAGRAD')
    parser.add_argument('--grained', type=int, default=2)
    parser.add_argument('--lr', type=float, default=0.0001)
    parser.add_argument('--lr_word_vector', type=float, default=0.000007)
    parser.add_argument('--epoch', type=int, default=25)
    parser.add_argument('--batch', type=int, default=10)
    parser.add_argument('--doc_num', type=int, default=50000)
    #parser.add_argument('--reload', type=str, default=True)
    parser.add_argument('--saveto', type=str, default='best_model17.pkl')
    parser.add_argument('--reload_dic', type=str, default=False)
    #parser.add_argument('--reload_dic', type=str, default='dic.pkl')
    args, _ = parser.parse_known_args(argv)
    random.seed(args.seed)
    data = DataManager(args.dataset)
    if args.reload_dic:
        print('reloading dictionary...')
        wordlist = data.load_word(args.reload_dic)

    else:
        print('building dictionary...')
        wordlist = data.gen_word()
        print('saving dictionary...')
        pkl.dump(wordlist, open('dic.pkl', 'wb'), -1)
    print('%d unique words in total' % len(wordlist))
    train_data, test_data = data.gen_data(args.grained)
    random.shuffle(train_data)
    num = int(len(train_data) * 0.11)
    dev_data = train_data[:num]
    train_data_new = train_data[num:]
Пример #48
0
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, BatchNormalization, Flatten, Reshape
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras import utils
from DataManager import DataManager

print("Loading training data...")
dm = DataManager(random_state=0)
training_data, training_labels = dm.loadTrainingData()
testing_data, testing_labels = dm.loadTestingData()
validation_data, validation_labels = dm.loadValidationData()

print('Loaded shapes')
for i in training_data, training_labels, testing_data, testing_labels, validation_data, validation_labels:
    print(i.shape)

input_shape = tuple(training_data.shape[1:])
num_classes = len(np.unique(training_labels))
print("input_shape: {}".format(input_shape))
print("num_classes: {}".format(num_classes))

# Convert to categorical classes
training_labels = utils.to_categorical(training_labels, num_classes)
testing_labels = utils.to_categorical(testing_labels, num_classes)
validation_labels = utils.to_categorical(validation_labels, num_classes)

data_generator = ImageDataGenerator(featurewise_center=True,
                                    featurewise_std_normalization=True,
                                    rotation_range=20,
Пример #49
0
        skip_first = 0
        for pair, df in self.returns_dict.items():
            if skip_first == 0:
                skip_first = 1
                continue
            combined_returns = pd.concat([combined_returns, df],
                                         ignore_index=True,
                                         axis=0)
        self.combined_returns = combined_returns
        self.total_returns = 1
        for returns in combined_returns['returns'].values:
            self.total_returns = self.total_returns * (1 + returns)


if __name__ == "__main__":
    dm = DataManager()
    # This code will just do it for one sector
    # x.data = x.getOneSector(sector="Energy", fromDate="2015-01-01", toDate="2016-09-21")
    dm.getOneSector(sector="Energy",
                    fromDate="2013-01-01",
                    toDate="2015-01-01")
    # x.calcReturns()

    strat = CointStrategyStopLoss
    bt = Backtester(strat, dm.data)
    bt.backtest()
    bt.plot_stuff()
    # bt.strat.CA.plot_pair(['MA','V'], fromDate="2014-01-01", toDate="2018-01-01")
    print(bt.total_returns)
    plt.show()
Пример #50
0
 def destination_value(self) -> int:
     if self.__destination_value == -1 and self.__destination is not None:
         self.__destination_value = self.__rate_dest[
             DataManager.transfer_airport_cod_names_to_all(
                 self.__destination.code)]
     return self.__destination_value
Пример #51
0
        print("epoch ", e, ": dev F1: ", devF1, ", test F1: ", testF1)
        f.write("epoch "+ str(e)+ ": dev F1: "+ str(devF1)+ ", test F1: "+ str(testF1)+ "\n")
        f.close()
        torch.save(model, "checkpoints/model_"+args.logfile+"_"+str(e))

if __name__ == "__main__":
    torch.manual_seed(1)
    if not os.path.exists('checkpoints'):
        os.mkdir('checkpoints')

    argv = sys.argv[1:]
    parser = Parser().getParser()
    args, _ = parser.parse_known_args(argv)

    print("Load data start...")
    dm = DataManager(args.datapath, args.testfile)
    wv = dm.vector

    train_data, test_data, dev_data = dm.data['train'], dm.data['test'], dm.data['dev']
    print("train_data count: ", len(train_data))
    print("test_data  count: ", len(test_data))
    print("dev_data   count: ", len(dev_data))

    model = Model(args.lr, args.dim, args.statedim, wv, dm.relation_count)
    model.cuda()
    if args.start != '':
        pretrain_model = torch.load(args.start) 
        model_dict = model.state_dict() 
        pretrained_dict = pretrain_model.state_dict() 
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 
        model_dict.update(pretrained_dict) 
Пример #52
0
def get_names():
    image_list = 'images.txt'
    return DataManager.get('images/{}'.format(image_list))
Пример #53
0
def update(node,row_id):
    if not request.json:
        abort(400)
    
    return jsonify(DataManager().edit_row(node,row_id,request.json))
Пример #54
0
import gensim
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from concurrent.futures import ThreadPoolExecutor
from DataManager import DataManager

datamanager = DataManager()

sentences = datamanager.sentences
POSes = datamanager.parses
POS_id = datamanager.POS_id
entitypairs = datamanager.training_entitypairs
testing_entitypairs = datamanager.testing_entitypairs
relations = datamanager.relations
document = []


def check_entity_in_words(entity, words):
    if entity in words:
        return True
    elif len(entity) == 3 and entity[1:] in words:
        return True
    else:
        return False


def search_relation_sentence(entitypair):
    context = []
    context_pos = []
    e1_first_sentence = []
    e1_first_pos = []
Пример #55
0
def mainTF(options):

    import tensorflow as tf
    from CreateModel import CreateModel
    from DataManager import DataManager
    from DataSet import DataSet

    print "PROCESSING VALIDATION DATA"

    dgSig = DataGetter.DefinedVariables(options.netOp.vNames, signal=True)
    dgBg = DataGetter.DefinedVariables(options.netOp.vNames, background=True)

    validDataSig = [
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_0_division_1_TTbarSingleLepT_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_0_division_1_TTbarSingleLepTbar_validation_0.h5",
          ), 1)
    ]

    validDataBgTTbar = [
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_TTbarSingleLepT_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_TTbarSingleLepTbar_validation_0.h5",
          ), 1),
    ]

    validDataBgQCDMC = [
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT100to200_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT200to300_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT300to500_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT500to700_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT700to1000_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT1000to1500_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT1500to2000_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT2000toInf_validation_0.h5",
          ), 1)
    ]

    validDataBgQCDData = [((
        "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_Data_JetHT_2016_validation_0.h5",
    ), 1)]

    print "Input Variables: ", len(dgSig.getList())

    # Import data
    #print options.runOp.validationSamples

    validDataSig = getValidData(dgSig, validDataSig, options)
    validDataBgTTbar = getValidData(dgBg, validDataBgTTbar, options)
    validDataBgQCDMC = getValidData(dgBg, validDataBgQCDMC, options)
    validDataBgQCDData = getValidData(dgBg, validDataBgQCDData, options)

    validDataTTbar = combineValidationData(validDataSig, validDataBgTTbar)
    validDataQCDMC = combineValidationData(validDataSig, validDataBgQCDMC)
    validDataQCDData = combineValidationData(validDataSig, validDataBgQCDData)

    #get input/output sizes
    #print validData["data"].shape
    nFeatures = validDataTTbar["data"].shape[1]
    nLabels = validDataTTbar["labels"].shape[1]
    nWeights = validDataTTbar["weights"].shape[1]

    #Training parameters
    l2Reg = options.runOp.l2Reg
    MiniBatchSize = options.runOp.minibatchSize
    nEpoch = options.runOp.nepoch
    ReportInterval = options.runOp.reportInterval
    validationCount = min(options.runOp.nValidationEvents,
                          validDataTTbar["data"].shape[0])

    #scale data inputs to mean 0, stddev 1
    categories = numpy.array(options.netOp.vCategories)
    mins = numpy.zeros(categories.shape, dtype=numpy.float32)
    ptps = numpy.zeros(categories.shape, dtype=numpy.float32)
    for i in xrange(categories.max()):
        selectedCategory = categories == i
        mins[selectedCategory] = validDataTTbar["data"][:,
                                                        selectedCategory].mean(
                                                        )
        ptps[selectedCategory] = validDataTTbar["data"][:,
                                                        selectedCategory].std(
                                                        )
    ptps[ptps < 1e-10] = 1.0

    ##Create data manager, this class controls how data is fed to the network for training
    #                 DataSet(fileGlob, xsec, Nevts, kFactor, sig, prescale, rescale)
    signalDataSets = [
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_*_division_0_TTbarSingleLepT_training_*.h5",
            365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 8),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_*_division_0_TTbarSingleLepTbar_training_*.h5",
            365.4, 61901450, 1.0, True, 0, 1.0, 1.0, 8),
    ]

    #pt reweighting histograms
    ttbarRatio = (numpy.array([
        0.7976347, 1.010679, 1.0329635, 1.0712056, 1.1147588, 1.0072196,
        0.79854023, 0.7216115, 0.7717652, 0.851551, 0.8372917
    ]),
                  numpy.array([
                      0., 50., 100., 150., 200., 250., 300., 350., 400., 450.,
                      500., 1e10
                  ]))
    QCDDataRatio = (numpy.array([
        0.50125164, 0.70985824, 1.007087, 1.6701245, 2.5925348, 3.6850858,
        4.924969, 6.2674766, 7.5736594, 8.406105, 7.7529635
    ]),
                    numpy.array([
                        0., 50., 100., 150., 200., 250., 300., 350., 400.,
                        450., 500., 1e10
                    ]))
    QCDMCRatio = (numpy.array([
        0.75231355, 1.0563549, 1.2571484, 1.3007764, 1.0678109, 0.83444154,
        0.641499, 0.49130705, 0.36807108, 0.24333349, 0.06963781
    ]),
                  numpy.array([
                      0., 50., 100., 150., 200., 250., 300., 350., 400., 450.,
                      500., 1e10
                  ]))

    backgroundDataSets = [
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_TTbarSingleLepT_training_*.h5",
            365.4, 61878989, 1.0, False, 0, 1.0, 1.0, 8, ttbarRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_TTbarSingleLepTbar_training_*.h5",
            365.4, 61901450, 1.0, False, 0, 1.0, 1.0, 8, ttbarRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_Data_JetHT_2016_training_*.h5",
            1.0,
            1,
            1.0,
            False,
            1,
            1.0,
            1.0,
            8,
            include=False),  #QCDDataRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT100to200_training_*.h5",
            27990000,
            80684349,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio), 
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT200to300_training_*.h5",
            1712000,
            57580393,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT300to500_training_*.h5",
            347700,
            54537903,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT500to700_training_*.h5",
            32100,
            62271343,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT700to1000_training_*.h5",
            6831,
            45232316,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT1000to1500_training_*.h5",
            1207,
            15127293,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT1500to2000_training_*.h5",
            119.9,
            11826702,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT2000toInf_training_*.h5",
            25.24,
            6039005,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
    ]

    dm = DataManager(options.netOp.vNames, nEpoch, nFeatures, nLabels, 2,
                     nWeights, options.runOp.ptReweight, signalDataSets,
                     backgroundDataSets)

    # Build the graph
    denseNetwork = [nFeatures] + options.netOp.denseLayers + [nLabels]
    convLayers = options.netOp.convLayers
    rnnNodes = options.netOp.rnnNodes
    rnnLayers = options.netOp.rnnLayers
    mlp = CreateModel(options, denseNetwork, convLayers, rnnNodes, rnnLayers,
                      dm.inputDataQueue, MiniBatchSize, mins, 1.0 / ptps)

    #summary writer
    summary_writer = tf.summary.FileWriter(options.runOp.directory +
                                           "log_graph",
                                           graph=tf.get_default_graph())

    print "TRAINING NETWORK"

    with tf.Session(config=tf.ConfigProto(
            intra_op_parallelism_threads=8)) as sess:
        sess.run(tf.global_variables_initializer())

        #start queue runners
        dm.launchQueueThreads(sess)

        print "Reporting validation loss every %i batches with %i events per batch for %i epochs" % (
            ReportInterval, MiniBatchSize, nEpoch)

        #preload the first data into staging area
        sess.run([mlp.stagingOp],
                 feed_dict={
                     mlp.reg: l2Reg,
                     mlp.keep_prob: options.runOp.keepProb
                 })

        i = 0
        N_TRAIN_SUMMARY = 10

        #flush queue until the sample fraction is approximately equal
        flushctr = 200
        while dm.continueTrainingLoop():
            result = sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize))
            signalCount = result[1][:, 0].sum()
            bgCount = result[1][:, 1].sum()
            signalFraction = signalCount / (signalCount + bgCount)
            #the first this fraction drops below 0.5 means we are close enough to equal signal/bg fraction
            if signalFraction < 0.5:
                flushctr -= 1
                if flushctr <= 0:
                    break

        try:
            while dm.continueTrainingLoop():

                grw = 2 / (1 + exp(-i / 10000.0)) - 1

                #run validation operations
                if i == 0 or not i % ReportInterval:
                    #run validation operations
                    validation_loss, accuracy, summary_vl = sess.run(
                        [
                            mlp.loss_ph, mlp.accuracy,
                            mlp.merged_valid_summary_op
                        ],
                        feed_dict={
                            mlp.x_ph: validDataTTbar["data"][:validationCount],
                            mlp.y_ph_:
                            validDataTTbar["labels"][:validationCount],
                            mlp.p_ph_:
                            validDataTTbar["domain"][:validationCount],
                            mlp.reg: l2Reg,
                            mlp.gradientReversalWeight: grw,
                            mlp.wgt_ph:
                            validDataTTbar["weights"][:validationCount]
                        })
                    summary_writer.add_summary(summary_vl, i / N_TRAIN_SUMMARY)

                    print(
                        'Interval %d, validation accuracy %0.6f, validation loss %0.6f'
                        % (i / ReportInterval, accuracy, validation_loss))

                    validation_loss, accuracy, summary_vl_QCDMC = sess.run(
                        [
                            mlp.loss_ph, mlp.accuracy,
                            mlp.merged_valid_QCDMC_summary_op
                        ],
                        feed_dict={
                            mlp.x_ph: validDataQCDMC["data"][:validationCount],
                            mlp.y_ph_:
                            validDataQCDMC["labels"][:validationCount],
                            mlp.p_ph_:
                            validDataQCDMC["domain"][:validationCount],
                            mlp.reg: l2Reg,
                            mlp.gradientReversalWeight: grw,
                            mlp.wgt_ph:
                            validDataQCDMC["weights"][:validationCount]
                        })
                    summary_writer.add_summary(summary_vl_QCDMC,
                                               i / N_TRAIN_SUMMARY)

                    validation_loss, accuracy, summary_vl_QCDData = sess.run(
                        [
                            mlp.loss_ph, mlp.accuracy,
                            mlp.merged_valid_QCDData_summary_op
                        ],
                        feed_dict={
                            mlp.x_ph:
                            validDataQCDData["data"][:validationCount],
                            mlp.y_ph_:
                            validDataQCDData["labels"][:validationCount],
                            mlp.p_ph_:
                            validDataQCDData["domain"][:validationCount],
                            mlp.reg:
                            l2Reg,
                            mlp.gradientReversalWeight:
                            grw,
                            mlp.wgt_ph:
                            validDataQCDData["weights"][:validationCount]
                        })
                    summary_writer.add_summary(summary_vl_QCDData,
                                               i / N_TRAIN_SUMMARY)

                #run training operations
                if i % N_TRAIN_SUMMARY == 0:
                    _, _, summary = sess.run(
                        [
                            mlp.stagingOp, mlp.train_step,
                            mlp.merged_train_summary_op
                        ],
                        feed_dict={
                            mlp.reg: l2Reg,
                            mlp.keep_prob: options.runOp.keepProb,
                            mlp.training: True,
                            mlp.gradientReversalWeight: grw
                        })
                    summary_writer.add_summary(summary, i / N_TRAIN_SUMMARY)
                else:
                    sess.run(
                        [mlp.stagingOp, mlp.train_step],
                        feed_dict={
                            mlp.reg: l2Reg,
                            mlp.keep_prob: options.runOp.keepProb,
                            mlp.training: True
                        })
                i += 1

            while dm.continueFlushingQueue():
                sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize))

        except Exception, e:
            # Report exceptions to the coordinator.
            dm.requestStop(e)
        finally:
Пример #56
0
class QuotesSpider(scrapy.Spider):
    name = "goldcar"

    def __init__(self, shop_name=None, *args, **kwargs):
        super(QuotesSpider, self).__init__(*args, **kwargs)
        self.database = DataManager(shop_name)
        self.shop_name = shop_name.lower()
        self.start_urls = self.database.getScrapyUrl()
        self.page_index = 1
        # out = "抓取数据:" + self.start_urls[0]
        # print(out)

    def parse(self, response):
        for quote in response.xpath(
                ".//div[@class='column column-block block-grid-large single-item']"
        ):
            self.database.handlerStatus()
            time.sleep(random.randint(1, 3))
            data_id = quote.xpath(
                ".//a[@class='img-link quickViewAction sPrimaryLink']/@data-id"
            ).extract()[0] + "/u/"
            data_img = str(
                quote.xpath(
                    ".//a[@class='img-link quickViewAction sPrimaryLink']/@data-img"
                ).extract()[0]).split("item_L_")[-1].split(
                    "_")[0] + "/i/?ctype=dsrch"
            uri = str(
                quote.xpath(
                    ".//a[@class='img-link quickViewAction sPrimaryLink']/@href"
                ).extract()[0]).replace(data_id, data_img)
            if uri is not None:
                yield response.follow(uri, callback=self.parseHandler1)

            # 获取下一页的url, (DEL::如果没有就从头开始)
        next_page = response.xpath(
            ".//li[@class='pagination-next goToPage']/a/@href").extract()
        if next_page is not None and len(next_page) > 0:
            next_page = next_page[0].replace("page=", "section=2&page=")
            yield response.follow(next_page, callback=self.parse)

    def parseHandler1(self, response):
        if not response.text:
            print("parseHandler_b: empty response")
            return
        gold_shop = str(
            response.xpath(".//span[@class='unit-seller-link']//b//text()").
            extract()[0]).lower()
        ean = str(
            response.xpath(
                ".//div[@id='productTrackingParams']/@data-ean").extract()[0])
        url = response.xpath(
            ".//a[@class='show-for-medium bold-text']/@href").extract()
        if url is not None and len(url) > 0:
            yield response.follow(url[0],
                                  callback=self.parseHandler2,
                                  meta={
                                      "ean": ean,
                                      "gold_shop": gold_shop
                                  })

    def parseHandler2(self, response):
        infos = self.getAllPirce(
            response
        )  # 获取所有的价格并以此形式返回{shop_name:[price, rating, fullfilled], ...}
        self.solutionNoon(response.meta["ean"], infos,
                          response.meta["gold_shop"])

    def getAllPirce(self, response):
        infos = {}
        rows = response.xpath(".//div[@id='condition-all']/div[@class='row']")
        for row in rows:
            price = row.xpath(
                ".//div[@class='field price-field']//text()").extract()[0]
            price = round(float(price.strip().split('\n')[-1].split("SAR")[0]),
                          2)
            shop_name = row.xpath(
                ".//div[@class='field seller-name']//a//text()").extract(
                )[0].lower()
            ret = row.xpath(
                ".//div[@class='field clearfix labels']//div[@class='fullfilled']"
            )
            fullfilled = False
            rating = 100
            if ret:
                fullfilled = True
            else:
                rating = row.xpath(
                    ".//div[@class='field seller-rating']//a//text()").extract(
                    )
                if rating:
                    rating = round(
                        float(rating[0].split('%')[0].split("(")[-1]), 2)
                else:
                    rating = 0  # no rating yet
            infos[shop_name] = [price, rating, fullfilled]
        return infos

    def solutionNoon(self, ean, infos, gold_shop, variant_name=""):
        if not self.database.isInWhiteList(ean, variant_name):
            out = "前台:不在白名单 " + time.strftime("%Y-%m-%d %H:%M:%S") + "   " + ean + "[" + variant_name + "]\t本店铺[" + str(infos[self.shop_name][0]) + "]\t" + \
              "购物车[" + str(infos[gold_shop][0]) + "][" + gold_shop + "]"
            print(out)
            return

        attr = self.database.getAttr(ean)
        out = time.strftime("%Y-%m-%d %H:%M:%S") + "   " + ean + "[" + variant_name + "]\t本店铺[" + str(infos[self.shop_name][0]) + "]\t" + \
              "购物车[" + str(infos[gold_shop][0]) + "][" + gold_shop + "]"
        self.database.spiderRecord(ean, infos[gold_shop][0], gold_shop,
                                   variant_name)
        if gold_shop in attr["my_shop"]:  # 黄金购物车是自家店铺
            out = "情况A " + out + "\t不修改"
        else:
            if infos[self.shop_name][2]:  # 是FBN产品
                diff1 = abs(infos[gold_shop][0] - infos[self.shop_name][0]
                            ) / infos[self.shop_name][0]
                if infos[gold_shop][2]:  # 黄金购物车是FBN
                    if diff1 > attr["percent"]:
                        out = "情况B " + out + "\t不修改"
                    else:
                        price = round(
                            min(infos[gold_shop][0], infos[self.shop_name][0])
                            - attr["lowwer"], 2)
                        if price < attr["self_least_price"]:
                            out = "情况C " + out + "\t不修改"
                        else:
                            self.database.needToChangePrice(
                                ean, price, gold_shop, variant_name, 1)
                            out = "情况C " + out + "\t差价比[" + str(
                                round(diff1 * 100,
                                      2)) + "%]\t改价为[" + str(price) + "]"
                else:
                    price = round(infos[self.shop_name][0] - attr["lowwer"], 2)
                    if price < max(infos[gold_shop][0],
                                   attr["self_least_price"]):
                        out = "情况D " + out + "\t不修改"
                    else:
                        self.database.needToChangePrice(
                            ean, price, gold_shop, variant_name, 1)
                        out = "情况D " + out + "\t改价为[" + str(price) + "]"

            else:
                least_price = 999999
                for info in infos.values():
                    if least_price > info[0]:
                        least_price = info[0]
                diff2 = abs(
                    min(infos[gold_shop][0], least_price) -
                    infos[self.shop_name][0]) / infos[self.shop_name][0]
                if diff2 > attr["percent"]:
                    out = "情况E " + out + "\t最低价[" + str(least_price) + "]\t" + "差价比[" + \
                          str(round(diff2 * 100, 2)) + "%]" + "不修改"
                else:
                    price = round(
                        min(infos[gold_shop][0], least_price) - attr["lowwer"],
                        2)
                    if price < attr["self_least_price"]:
                        out = "情况F " + out + "\t最低价[" + str(
                            least_price) + "]\t" + "不修改"
                    else:
                        self.database.needToChangePrice(
                            ean, price, gold_shop, variant_name, 0)
                        out = "情况F " + out + "\t最低价[" + str(least_price) + "]\t" + "差价比[" + \
                              str(round(diff2 * 100, 2)) + "%]\t改价为[" + str(price) + "]"
        out = "前台:" + out
        print(out)
Пример #57
0
#! /usr/bin/env python

import tensorflow as tf
import numpy as np
import os
import time
import datetime
from DataManager import DataManager
from RE_CNN import TextCNN
from tensorflow.contrib import learn

dataManager = DataManager()

# Parameters
# ==================================================

# Eval Parameters
tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
tf.flags.DEFINE_string("checkpoint_dir", "./runs/1481741016/checkpoints", "Checkpoint directory from training run")
tf.flags.DEFINE_boolean("eval_test", True, "Evaluate on all testing data")

# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")


FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))

def process_frames(infos, threads=2):
    print("Processing with {0} thread(s)".format(threads))

    with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
        for info in infos:
            executor.submit(process_frame, info)


if __name__ == "__main__":
    # process command line arguments
    options = process_args()

    # load data
    data_manager = DataManager()
    data_manager.load("../db/g2x-1479064727.db")

    # make svg generator
    generator = SVGGenerator('./overlay.svg.mustache')

    # build list of frames of interest, and their associated metadata
    frames = filter(
        lambda f: f.in_range(options["start"], options["end"]),
        map(
            lambda f: Frame(options["input"], options["output"], f),
            os.listdir(options["input"])
        )
    )

    # process all frames
Пример #59
0
# ========== NetManager呼び出し ==========
net_cls = NetManager()

# ========== PathManager呼び出し ==========
path_cls = PathManager(tfrecord_folder=TFRECORD_FOLDER,
                       output_rootfolder=OUT_ROOT_FOLDER,
                       epoch_output_rootfolder=EPOCH_OUT_ROOT_FOLDER)
path_cls.all_makedirs()  # 結果保存フォルダ生成

# ========== DataSet呼び出し ==========
# プロパティデータ読み込み
df = pd.read_csv(path_cls.get_property_path())
shuf_train_ds_cls = DataManager(
    tfrecord_path=path_cls.get_train_ds_path(),
    img_root=IMAGE_ROOT_PATH,
    batch_size=SHUF_LEARN_BATCH_SIZE,
    net_cls=net_cls,
    data_n=df.at[0, 'total_learn_data'],
    suffle_buffer=SUFFLE_BUFFER_SIZE,
)
train_ds_cls = DataManager(
    tfrecord_path=path_cls.get_train_ds_path(),
    img_root=IMAGE_ROOT_PATH,
    batch_size=LEARN_BATCH_SIZE,
    net_cls=net_cls,
    data_n=df.at[0, 'total_learn_data'],
)
test_ds_cls = DataManager(
    tfrecord_path=path_cls.get_test_ds_path(),
    img_root=IMAGE_ROOT_PATH,
    batch_size=TEST_BATCH_SIZE,
    net_cls=net_cls,
Пример #60
0
 def __init__(self, shop_name=None, *args, **kwargs):
     super(QuotesSpider, self).__init__(*args, **kwargs)
     self.database = DataManager(shop_name)
     self.shop_name = shop_name.lower()
     self.start_urls = self.database.getScrapyUrl()
     self.page_index = 1