def main(): global TIMER TIMER = Timer() oparser = argparse.ArgumentParser(description="intelligent crawling with q-learning") oparser.add_argument("--config-file", dest="configFile", required=True, help="Path to config file (containing MySQL login etc.)") oparser.add_argument("--language-pair", dest="langPair", required=True, help="The 2 language we're interested in, separated by ,") oparser.add_argument("--save-dir", dest="saveDir", default=".", help="Directory that model WIP are saved to. If existing model exists then load it") oparser.add_argument("--save-plots", dest="saveDirPlots", default="plot", help="Directory ") oparser.add_argument("--num-train-hosts", dest="numTrainHosts", type=int, default=1, help="Number of domains to train on") oparser.add_argument("--num-test-hosts", dest="numTestHosts", type=int, default=3, help="Number of domains to test on") oparser.add_argument("--max-crawl", dest="maxCrawl", type=int, default=sys.maxsize, help="Maximum number of pages to crawl") oparser.add_argument("--gamma", dest="gamma", type=float, default=0.999, help="Reward discount") options = oparser.parse_args() np.random.seed() np.set_printoptions(formatter={'float': lambda x: "{0:0.1f}".format(x)}, linewidth=666) if not os.path.exists(options.saveDir): os.makedirs(options.saveDir, exist_ok=True) if not os.path.exists("pickled_domains"): os.makedirs("pickled_domains", exist_ok=True) languages = GetLanguages(options.configFile) params = LearningParams(languages, options, languages.maxLangId, languages.GetLang("None")) print("options.numTrainHosts", options.numTrainHosts) #hosts = ["http://vade-retro.fr/"] hosts = ["http://telasmos.org/"] #hosts = ["http://www.buchmann.ch/", "http://telasmos.org/", "http://tagar.es/"] #hosts = ["http://www.visitbritain.com/"] #hostsTest = ["http://vade-retro.fr/"] #hostsTest = ["http://www.visitbritain.com/"] hostsTest = ["http://www.visitbritain.com/", "http://chopescollection.be/", "http://www.bedandbreakfast.eu/"] envs = GetEnvs(options.configFile, languages, hosts[:options.numTrainHosts]) envsTest = GetEnvs(options.configFile, languages, hostsTest[:options.numTestHosts]) tf.reset_default_graph() qn = Qnetwork(params) init = tf.global_variables_initializer() saver = None #tf.train.Saver() with tf.Session() as sess: sess.run(init) Train(params, sess, saver, qn, envs, envsTest)
def main(): global TIMER TIMER = Timer() oparser = argparse.ArgumentParser( description="intelligent crawling with q-learning") oparser.add_argument( "--config-file", dest="configFile", required=True, help="Path to config file (containing MySQL login etc.)") oparser.add_argument( "--language-pair", dest="langPair", required=True, help="The 2 language we're interested in, separated by ,") oparser.add_argument( "--save-dir", dest="saveDir", default=".", help= "Directory that model WIP are saved to. If existing model exists then load it" ) oparser.add_argument("--save-plots", dest="saveDirPlots", default="plot", help="Directory ") oparser.add_argument( "--delete-duplicate-transitions", dest="deleteDuplicateTransitions", default=False, help="If True then only unique transition are used in each batch") oparser.add_argument("--num-train-hosts", dest="numTrainHosts", type=int, default=1, help="Number of domains to train on") oparser.add_argument("--num-test-hosts", dest="numTestHosts", type=int, default=3, help="Number of domains to test on") options = oparser.parse_args() np.random.seed() np.set_printoptions(formatter={'float': lambda x: "{0:0.1f}".format(x)}, linewidth=666) languages = GetLanguages(options.configFile) params = LearningParams(languages, options.saveDir, options.saveDirPlots, options.deleteDuplicateTransitions, options.langPair, languages.maxLangId, languages.GetLang("None")) if not os.path.exists(options.saveDirPlots): os.mkdir(options.saveDirPlots) #hostName = "http://vade-retro.fr/" hosts = ["http://www.buchmann.ch/" ] #, "http://telasmos.org/", "http://tagar.es/"] #hostName = "http://www.visitbritain.com/" #hostNameTest = "http://vade-retro.fr/" #hostNameTest = "http://www.buchmann.ch/" hostsTest = [ "http://www.visitbritain.com/", "http://chopescollection.be/", "http://www.bedandbreakfast.eu/" ] envs = GetEnvs(options.configFile, languages, hosts[:options.numTrainHosts]) envsTest = GetEnvs(options.configFile, languages, hostsTest[:options.numTestHosts]) tf.reset_default_graph() qns = Qnets(params) init = tf.global_variables_initializer() saver = None #tf.train.Saver() with tf.Session() as sess: sess.run(init) totRewards, totDiscountedRewards = Train(params, sess, saver, qns, envs, envsTest)