def buildStat(self, objnames = []): self.vb.call("output", "buildStat", [self], "Building the stat module.") ## initialize the histogram collection #self.objcoll.setSources(self.mypaf.input.sources) #self.objcoll.setCategs([o.name for o in self.mypaf.input.cfg.getObjs("region=='selection' and (type=='none' or type=='tree')")]) objlist = self.mypaf.input.cfg.getObjs("region=='output' and (type=='evyield' or type=='obyield' or type=='effmap')") if len(objnames) > 0: objlist = lib.getElmAttrAllOr(objlist, "name", objnames) allselstr = [[s.name, s.definition] for s in self.input.cfg.getObjs("region=='selection' and (type=='none' or type=='tree')")] for var in objlist: alist = args.args(var.argstring) if var.type == "obyield" and not alist.has("obj"): self.vb.warning("Physics object (argument 'obj') is not given for object of type ObYield. ObYield is ignored.") continue csel = self.mypaf.findSelections(["tree"], alist) categories = [o.name for o in csel] if var.type == "evyield": self.objcoll.addEvYield(var.name, var.definition.split(":")[0], var.argstring, self.mypaf.input.sources, categories) elif var.type == "obyield": self.objcoll.addObYield(alist.get("obj"), var.name, var.definition.split(":")[0], var.argstring, self.mypaf.input.sources, categories) elif var.type == "effmap": ssels = [sel.sel(c.definition, allselstr) for c in csel] self.objcoll.addEffMap(var.name, [s.string for s in ssels], var.argstring, self.mypaf.input.sources, categories) #elif var.type == "roc": # self.objcoll.addRoc(var.name, var.definition, var.argstring) del alist self.objcoll.build()
def buildDraw(self, objnames = []): self.vb.call("output", "buildDraw", [self], "Building the draw module.") self.openFile() ## initialize the histogram collection #self.objcoll.setSources(self.mypaf.input.sources) #self.objcoll.setCategs([o.name for o in self.mypaf.input.cfg.getObjs("region=='selection' and (type=='none' or type=='tree')")]) objlist = self.mypaf.input.cfg.getObjs("region=='output' and (type=='file' or type=='plot')") if len(objnames) > 0: objlist = lib.getElmAttrAllOr(objlist, "name", objnames) ## get the histogram info and initialize the histograms for var in objlist: alist = args.args(var.argstring) if not alist.has("obs") and not alist.has("obsx"): self.vb.warning("Physics observable (argument 'obs') is not given for histogram. Histogram is ignored.") continue categories = [o.name for o in self.mypaf.findSelections(["tree"], alist)] #binargs, labels = lib.prepareHistInfo(self.db, alist) self.objcoll.addHist(var.name, lib.getHistDim(var.definition), var.argstring, self.mypaf.input.sources, categories) if var.type == "plot": self.objcoll.setHistP(var.name) del alist
def progargs(): parser = args('Update VoltDB catalog and deployment', 21212) parser.add_argument( '--catalog', '-c', metavar='JAR', help='Catalog file to be given to the VoltDB cluster. ' 'Defaults to catalog.$SITE.jar') parser.add_argument( '--deployment', '-d', metavar='XML', help='Deployment file to be given to the VoltDB cluster. ' 'Defaults to deployment.$SITE.xml') parser.add_argument( '--no-suspend', '-n', action='store_false', dest='suspend', help='Do not suspend the server. Updates catalog only.') parser.add_argument( '--quiet', '-q', action='store_false', dest='verbose', help="Be quiet, don't output status messages.") parser.add_argument( '--snapshot', '-s', help='Perform a manual snapshot') parser.add_argument( '--snappath', metavar='PATH', help='Path to save the snapshot in. [%(default)s]') return parser.parse_args()
def buildScan(self, objnames = []): self.vb.call("output", "buildScan", [self], "Building the scan module.") ## initialize the histogram collection #self.objcoll.setSources(self.mypaf.input.sources) #self.objcoll.setCategs([o.name for o in self.mypaf.input.cfg.getObjs("region=='selection' and (type=='none' or type=='tree')")]) objlist = self.mypaf.input.cfg.getObjs("region=='output' and (type=='evlist' or type=='oblist')") if len(objnames) > 0: objlist = lib.getElmAttrAllOr(objlist, "name", objnames) for var in objlist: alist = args.args(var.argstring) if var.type == "oblist" and not alist.has("obj"): self.vb.warning("Physics object (argument 'obj') is not given for object of type ObList. ObList is ignored.") continue categories = [o.name for o in self.mypaf.findSelections(["tree"], alist)] if var.type == "evlist": self.objcoll.addEvList(var.name, var.definition.split(":"), var.argstring, self.mypaf.input.sources, categories) self.objcoll.addEvYield(var.name, var.definition.split(":")[0], var.argstring, self.mypaf.input.sources, categories) elif var.type == "oblist": self.objcoll.addObList(alist.get("obj"), var.name, var.definition.split(":"), var.argstring, self.mypaf.input.sources, categories) self.objcoll.addObYield(alist.get("obj"), var.name, var.definition.split(":")[0], var.argstring, self.mypaf.input.sources, categories) del alist self.objcoll.build()
def main(): # get options from console. options = args() # get configuration from file. config = get_conf(options['config_file']) # create ES connection to hosts. connections.create_connection(hosts=config['elasticsearch']['hosts'], timeout=30) # create the searcher instance to find alarms, given the options from # console. searcher = Searcher(options['from'], options['query'], ttime=options['to'], per_page=500, min_priority=options['min_priority']) buckets = [ PathClassBucket( utils.build_url(config['kibana']['host'], config['kibana']['secure'])) ] # manually fetch all alarms from the searcher and pass it to every bucket. for alarm in searcher.pages(): for bucket in buckets: bucket.cherry_pick(alarm) # dump all buckets, this will print out all buckets. for bucket in buckets: bucket.dump()
def __init__(self, mypaf, name, binargs, labels, argstring = ""): self.name = name self.binargs = binargs self.p = False self.built = False self.dlist = args.args("") self.alist = args.args(argstring) self.mypaf = mypaf self.db = mypaf.db self.vb = mypaf.vb self.vb.call("hist", "__init__", [self, mypaf, name, binargs, labels, argstring], "Initializing the hist class.") self.setParent() self.setLabels(labels) self.setD()
def progargs(): parser = args('Update VoltDB log4j configuration', 21212) parser.add_argument( 'log4jxml', nargs='?', help='log4j config file to update to. Defaults to log4j.$SITE.xml') return parser.parse_args()
def remove_rf(options, arguments): path = join_listlike({}, arguments) if file_isdir({}, args(path)): os.rmdir(path) else: os.remove(path)
def progargs(): parser = args('Restore a VoltDB cluster from snapshot.', 21211) parser.add_argument( '--path', default='/var/voltdb/snapshot/', help='Path to restore the snapshot from. [%(default)s]') parser.add_argument( 'snapshotname', help='Snapshot filename prefix to restore from') return parser.parse_args()
def __init__(self): self.args = args() self.text_data = None self.global_step = 0 self.SENTENCES_PREFIX = ['Q: ', 'A: '] self.main()
def __init__(self, mypaf, obj, name, variable, argstring = ""): self.mypaf = mypaf self.db = mypaf.db self.vb = mypaf.vb self.obj = obj.strip() self.name = name.strip() self.variable = variable self.alist = args.args(argstring) self.built = False
def __init__(self, mypaf, name, definition, argstring = ""): self.mypaf = mypaf self.db = mypaf.db self.vb = mypaf.vb self.vb.call("effmap", "__init__", [self, mypaf, name, definition, argstring], "Initializing the effmap class.") self.name = name.strip() self.alist = args.args(argstring) self.defs = definition self.built = False
def __init__(self, mypaf, name, variable, argstring = ""): self.mypaf = mypaf self.db = mypaf.db self.vb = mypaf.vb self.vb.call("evyield", "__init__", [self, mypaf, name, variable, argstring], "Initializing the evyield class.") self.name = name.strip() self.variable = variable.strip() self.alist = args.args(argstring) self.built = False
def __init__(self, mypaf, obj, name, variables, argstring = ""): self.mypaf = mypaf self.db = mypaf.db self.vb = mypaf.vb self.obj = obj.strip() self.name = name.strip() self.alist = args.args(argstring) self.vars = ["Row", "Instance"] self.vars.extend(variables) self.built = False
def __init__(self): self.args = args() self.text_data = None self.seq2seq_model = None self.writer = None self.saver = None self.sess = None self.train_op = None self.global_step = 0 self.SENTENCE_PREFIX = ['Q:', 'A:'] self.main()
def main( arg_one_input="D:\\workspace\\pycharm\\paper_algorithm\\FindSimilarityCommunity\\src\\contrast\\data\\paper\\synthetic\\football_1.net", arg_one_feature_file="D:\\workspace\\pycharm\\paper_algorithm\\FindSimilarityCommunity\\src\\contrast\\data\\paper\\synthetic\\football_info_115_1", arg_two_input="D:\\workspace\\pycharm\\paper_algorithm\\FindSimilarityCommunity\\src\\contrast\\data\\paper\\synthetic\\football_1-0.05.net", arg_two_feature_file="D:\\workspace\\pycharm\\paper_algorithm\\FindSimilarityCommunity\\src\\contrast\\data\\paper\\synthetic\\football_info_115_2" ): warnings.filterwarnings("ignore", category=FutureWarning) t1 = time.time() # init graph arg_one = args.args() arg_one.input = arg_one_input arg_one.feature_file = arg_one_feature_file nx_graph_one = nx.read_edgelist(arg_one.input, nodetype=int, comments="%") adj_matrix_one = nx.adjacency_matrix(nx_graph_one).todense() g_one = Graph(adj_matrix_one) g_one.read_edgelist(filename=arg_one.input, weighted=arg_one.weighted, directed=arg_one.directed) g_one.read_node_features(arg_one.feature_file) arg_two = args.args() arg_two.input = arg_two_input arg_two.feature_file = arg_two_feature_file nx_graph_two = nx.read_edgelist(arg_two.input, nodetype=int, comments="%") adj_matrix_two = nx.adjacency_matrix(nx_graph_two).todense() g_two = Graph(adj_matrix_two) g_two.read_edgelist(filename=arg_two.input, weighted=arg_two.weighted, directed=arg_two.directed) g_two.read_node_features(arg_two.feature_file) # community detection # igraph.Graph.community_infomap() # SCAN algorithm_one = SCAN(g_one.G, 0.5, 3) communities_one = algorithm_one.execute() algorithm_two = SCAN(g_two.G, 0.5, 3) communities_two = algorithm_two.execute() return communities_one, communities_two, g_one, g_two
def get_iplist_from_unofficial(self): print '-> get ip list from UNOFFICIAL source...\n\tbe patient...' import args a = args.args() iplist = [] i1, i2 = a.test_args() for i in xrange(0, len(i1)): p = '0x%s/%s' % (i1[i], i2[i]) ip = IPy.IP(p) for x in ip: iplist.append(str(x)) iplist = {}.fromkeys(iplist).keys() print '\tget %s IPs' % len(iplist) return iplist
def main(): """ please setting $hashTagStr """ hashTagStr = "" # get args search_words, envName, slugid = args() # Twitter auth api = auth_api(envName) # search query userName = api.me().screen_name word = "twitter.com/" + userName + ' /-from:' + userName # record csv name csvname = envName + "_quotedIds.csv" removeIdsList = csvToListMulti(csvname) removeIdsList = list(map(lambda x: x[1], removeIdsList)) # search set_count = "100" results = api.search(q=word, count=set_count) results += api.mentions_timeline() attckIdList = [] for i in results: if i.text not in userName and i.text not in "RT" \ and i.user.screen_name not in removeIdsList and i.user.screen_name not in userName: print([i.id_str, i.user.screen_name, i.text]) attckIdList.append([ i.user.screen_name, i.id, i.user.name, urlReplyRemove(i.user.description), i.user.statuses_count ]) pprint(attckIdList) # create tweet and bynary upload print( "----------------------------------------------------------------upload" ) uploadList = screenShotAndUpload(attckIdList, envName, hashTagStr) print( "----------------------------------------------------------------post") # post for i in uploadList: if i[0] != [] or i[1] != []: try: post = api.update_status(status=i[0], media_ids=i[1]) print([post.created_at, post.text]) except Exception as e: print(f'{i} is {e}') pprint(uploadList) # record baka recordList = list(map(lambda x: [x[1], x[0]], attckIdList)) listToCsvMulti(csvname, recordList)
def run(scheme, name, schemes = [], alist = args.args("")): if scheme == "add" : return add (name, schemes, alist) elif scheme == "bins" : return bins (name, schemes, alist) elif scheme == "card" : return card (name, schemes, alist) elif scheme == "comp" : return comp (name, schemes, alist) #elif scheme == "datamc": return datamc(name, schemes[0].getHist(), [s.getHist() for s in schemes[1:]]) elif scheme == "div" : return div (name, schemes, alist) elif scheme == "ffit" : return ffit (name, schemes, alist) elif scheme == "mult" : return mult (name, schemes, alist) elif scheme == "pack" : return pack (name, schemes, alist) elif scheme == "proj" : return proj (name, schemes, alist) #elif scheme == "roc" : return roc () elif scheme == "stack" : return stack (name, schemes, alist) elif scheme == "sub" : return sub (name, schemes, alist) elif scheme == "tfit" : return tfit (name, schemes, alist)
def progargs(): parser = args('Snapshot a VoltDB cluster.', 21211) parser.add_argument( '--path', default='/var/voltdb/snapshot/', help='Path to save the snapshot in. [%(default)s]') parser.add_argument( '--no-blocking', '-n', dest='blocking', action='store_false', help='Perform a non-blocking snapshot.') parser.add_argument( 'snapshotname', help='Snapshot filename prefix to save with') return parser.parse_args()
def exportAsHist(self, var = "pt"): self.close() alist = args.args("var=" + var) i = lib.findElm(self.vars, var) binargs, names = lib.prepareHistInfo(self.db, alist) h = hist.hist(self.mypaf, self.name, binargs, names) h.build(self.sources, self.categs) for sidx in range(len(self.sources)): for cidx in range(len(self.categs)): f = open(self.paths[sidx][cidx], "r") lines = f.readlines() for entry in lines: h.fill(sidx, cidx, float(entry.split(":=")[i].strip())) f.close() return h
def __init__(self, mypaf, name, dim = 1, argstring = ""): self.name = name self.dim = dim self.p = False self.built = False self.alist = args.args(argstring) self.salist = styleargs.styleargs(self.alist.get("style"), "1", lib.useVal("ROOT.kBlack", self.alist.get("color"))) self.mypaf = mypaf self.db = mypaf.db self.vb = mypaf.vb self.vb.call("hist", "__init__", [self, mypaf, name, dim, argstring], "Initializing the hist class.") self.setBinArgs() self.setParent() self.setLabels()
def __init__(self, mypaf, type, name, definition = "", argstring = ""): self.type = type self.name = name self.dargs = definition.strip().split() self.alist = args.args(argstring) self.argstring = argstring self.errorstate = False self.executed = False self.mypaf = mypaf self.db = mypaf.db self.vb = mypaf.vb self.vb.call("hscheme", "__init__", [self, mypaf, type, name, definition, argstring], "Initializing the hscheme class.") self.check()
def build(self): self.alist = args.args("color=" + self.color) ## mult if self.style == "mult": if self.i == 1: self.alist.set("draw1mode" , "pe" ) self.alist.set("fillstyle" , "0" ) self.alist.set("linestyle" , "1" ) self.alist.set("linewidth" , "2" ) self.alist.set("markerstyle", "8" ) self.alist.set("markersize" , "1.0" ) elif self.i == 2: self.alist.set("draw1mode" , "hist") self.alist.set("fillstyle" , "1001") self.alist.set("linestyle" , "1" ) self.alist.set("linewidth" , "2" ) self.alist.set("markerstyle", "8" ) self.alist.set("markersize" , "1.0" ) else : self.alist.set("draw1mode" , "hist") self.alist.set("fillstyle" , "0" ) self.alist.set("linestyle" , "1" ) self.alist.set("linewidth" , "2" ) self.alist.set("markerstyle", "8" ) self.alist.set("markersize" , "1.0" ) ## default else: if self.i == 1: self.alist.set("draw1mode" , "pe" ) self.alist.set("fillstyle" , "0" ) self.alist.set("linestyle" , "1" ) self.alist.set("linewidth" , "2" ) self.alist.set("markerstyle", "8" ) self.alist.set("markersize" , "1.8" ) else : self.alist.set("draw1mode" , "hist") self.alist.set("fillstyle" , "0" ) self.alist.set("linestyle" , "1" ) self.alist.set("linewidth" , "2" ) self.alist.set("markerstyle", "8" ) self.alist.set("markersize" , "1.8" )
def progargs(): parser = args('Import database from PostgreSQL to VoltDB', 21212) parser.add_argument( '--pgport', default=5432, type=int, metavar='PORT', help='Port to connect to PostgreSQL on. [%(default)s]') parser.add_argument( '--pguser', default='voltdb', metavar='USER', help='User to connect to PostgreSQL as. [%(default)s]') parser.add_argument( '--pgpassword', default='', # Workaround Python 2.4's psycopg2 not supporting None metavar='PASSWD', help='Password to connect to PostgreSQL with. [%(default)s]') parser.add_argument( '--pgdb', default='voltdb', metavar='DB', help='PostgreSQL database to use. [%(default)s]') parser.add_argument( '--pgschema', default='voltdb', metavar='SCHEMA', help='PostgreSQL schema to use. [%(default)s]') parser.add_argument( '--quiet', '-q', action='store_false', dest='verbose', help="Be quiet, don't output status messages") parser.add_argument( 'pgsqlserver', help='PostgreSQL database to dump from.') return parser.parse_args()
def buildPlot(self, objnames = []): self.vb.call("output", "buildPlot", [self], "Building the plot module.") self.openFile() objlist = self.mypaf.input.cfg.getObjs("region=='output' and (type=='file' or type=='plot')") if len(objnames) > 0: objlist = lib.getElmAttrAllOr(objlist, "name", objnames) ## reserve hist with one source per hist for var in objlist: alist = args.args(var.argstring) if not alist.has("obs") and not alist.has("obsx"): self.vb.warning("Physics observable (argument 'obs') is not given for histogram. Histogram is ignored.") continue ## actually, find the selection which has the good name? categories = [o.name for o in self.mypaf.findSelections(["tree"], alist)] source = alist.get("source") #binargs, names = lib.prepareHistInfo(self.db, alist) self.objcoll.addHist(var.name, var.argstring, [source], categories) if var.type == "plot": self.objcoll.setHistP(var.name) del alist
def progargs(): parser = args('Shutdown a VoltDB cluster', 21211) parser.add_argument( '--path', default='/var/voltdb/snapshot/', help='Path to save the snapshot in. [%(default)s]') parser.add_argument( '--no-quiesce', action='store_true', help='Do not quiesce the database before shutdown. DANGER! ' 'Exports may not be complete with this option.') parser.add_argument( '--snapshot', default=None, metavar='NAME', help='Perform a snapshot before shutting down.') parser.add_argument( '--snapshot-path', default='/var/voltdb/snapshot/', metavar='PATH', help='Path to save the snapshot in. [%(default)s]') return parser.parse_args()
fname = os.path.join(summary_folder, run_type + '.dat') if result.parallel: c = parallel.Client(profile=result.profile) view = c.load_balanced_view() else: view = None #load arguments sys.path.insert(0, main_folder) try: del sys.modules['args'] except KeyError: pass import args exp_kwargs = args.args() #run if result.run: #run experiment pprint.pprint(exp_kwargs) exp = run_experiments(view=view, action=action, single_runs_folder=single_runs_folder, **exp_kwargs) #collect data if action == 'collect': data = merge(exp) if not run_regress:
if result.parallel: c = parallel.Client(profile=result.profile) view = c.load_balanced_view() else: view = None #load arguments sys.path.insert(0, main_folder) try: del sys.modules['args'] except KeyError: pass import args exp_kwargs = args.args() #run if result.run: #run experiment pprint.pprint(exp_kwargs) exp = run_experiments(view=view, action=action, single_runs_folder=single_runs_folder, **exp_kwargs) #collect data if action == 'collect': data = merge(exp) if not run_regress: estimators=('HDDM2Single', 'Quantiles_subj', 'ML')
def __init__(self, type, training_steps_per_epoch, vocabSize): self.type = type self.args = args(type) print("build model") print("vocabSize", vocabSize) # ============================================================================== # Set necessary parameters # ============================================================================== if (type == "train"): self.global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.maximum( tf.train.exponential_decay( self.args.learning_rate, self.global_step, training_steps_per_epoch, self.args.learning_rate_decay_factor, staircase=True), self.args.min_learning_rate) START = tf.constant(value=[self.args.GO] * self.args.batch_size) # ============================================================================== # define placeholder # ============================================================================== with tf.name_scope("placeholder"): self.personas_ph = tf.placeholder( tf.int32, shape=[ self.args.batch_size, self.args.max_num_persona, self.args.max_num_personalength ], name="personas") self.personas_len_ph = tf.placeholder( tf.int32, shape=[self.args.batch_size, self.args.max_num_persona], name="persona_lengths") self.persona_turn = tf.placeholder(tf.int32, shape=[self.args.batch_size], name="persona_turn") self.historys_ph = tf.placeholder( tf.int32, shape=[ self.args.batch_size, 2 * self.args.max_num_history_turns + 1, self.args.max_num_Qlength ], name="historys") self.historys_len_ph = tf.placeholder( tf.int32, shape=[ self.args.batch_size, 2 * self.args.max_num_history_turns + 1 ], name="history_lengths") self.historys_turn = tf.placeholder(tf.int32, shape=[self.args.batch_size], name="history_turn") self.answers_ph = tf.placeholder( tf.int32, shape=[self.args.batch_size, self.args.max_num_Alength], name="answers") self.answer_len_ph = tf.placeholder(tf.int32, shape=[self.args.batch_size], name="answer_lengths") self.answer_targets_ph = tf.placeholder( tf.int32, shape=[self.args.batch_size, self.args.max_num_Alength + 1], name="answer_targets") personas_turn_mask = tf.sequence_mask(self.persona_turn, self.args.max_num_persona) self.att_persona_sentence_mask = tf.cast(personas_turn_mask, dtype=tf.float32) print("self.att_persona_sentence_mask :", self.att_persona_sentence_mask) personas_len_mask = tf.sequence_mask(self.personas_len_ph, self.args.max_num_personalength) personas_len_mask = tf.reshape(personas_len_mask, [self.args.batch_size, -1]) self.att_persona_mask = tf.cast(personas_len_mask, dtype=tf.float32) print("self.att_persona_mask:", self.att_persona_mask) historys_len_mask = tf.sequence_mask(self.historys_len_ph, self.args.max_num_Qlength) historys_len_mask = tf.reshape(historys_len_mask, [self.args.batch_size, -1]) self.att_message_mask = tf.cast(historys_len_mask, dtype=tf.float32) print("self.att_message_mask:", self.att_message_mask) # Because EOS is added at the end, the length is increased by 1 self.answer_len_ph_ = self.answer_len_ph + 1 self.topic_words_emb_ph = tf.placeholder(tf.float32, shape=[ self.args.batch_size, self.args.num_topic_words, self.args.num_topics ], name="topic_words_emb") # --------------------------------------------------- # bow-loss # --------------------------------------------------- self.answers_in_persona_label = tf.placeholder( tf.int32, shape=[self.args.batch_size, vocabSize], name="answers_in_persona_label") # 0/1 标签 answers_in_persona_label = tf.cast(self.answers_in_persona_label, tf.float32) # --------------------------------------------------- # persona attention label # --------------------------------------------------- self.answer_attention_ph = tf.placeholder( tf.float32, shape=[self.args.batch_size, self.args.max_num_persona], name="answer_attention") # ============================================================================== # Embedding (share) and other variable # ============================================================================== with ops.device("/cpu:0"): if variable_scope.get_variable_scope().initializer: initializer = variable_scope.get_variable_scope().initializer else: File = h5py.File("../Data/glove_train.h5", 'r') initializer = np.array(File["embedding"]) embedding = variable_scope.get_variable(name="embedding", initializer=initializer, dtype=tf.float32) # Weights self.W_p_key = self.random_weight(self.args.rnnHiddenSize * 2, self.args.rnnHiddenSize * 2, name="W_p_key") self.W_p_value = self.random_weight(self.args.rnnHiddenSize * 2, self.args.rnnHiddenSize * 2, name="W_p_value") START_EMB = embedding_ops.embedding_lookup(embedding, START) # ============================================================================== # split placeholders and embed # ============================================================================== personas = embedding_ops.embedding_lookup(embedding, self.personas_ph) personas = tf.transpose(personas, [1, 0, 2, 3]) personas_lengths = tf.transpose(self.personas_len_ph, [1, 0]) historys = embedding_ops.embedding_lookup(embedding, self.historys_ph) historys = tf.transpose(historys, [1, 0, 2, 3]) historys_lengths = tf.transpose(self.historys_len_ph, [1, 0]) # questions = embedding_ops.embedding_lookup(embedding, self.questions_ph) answers = embedding_ops.embedding_lookup(embedding, self.answers_ph) # ============================================================================== # make RNN cell # ============================================================================== def single_cell(hidden_size, in_keep_prob): if self.args.use_lstm: cell = tf.contrib.rnn.BasicLSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True) else: cell = tf.contrib.rnn.GRUCell(hidden_size) cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=in_keep_prob) return cell def make_cell(hidden_size, in_keep_prob): if self.args.rnnLayers > 1: return tf.contrib.rnn.MultiRNNCell([ single_cell(hidden_size, in_keep_prob) for _ in range(hidden_size) ]) else: return single_cell(hidden_size, in_keep_prob) fw_encoder_cell_persona = make_cell(self.args.rnnHiddenSize, self.args.keep_prob) bw_encoder_cell_persona = make_cell(self.args.rnnHiddenSize, self.args.keep_prob) fw_encoder_cell_history_1 = make_cell(self.args.rnnHiddenSize, self.args.keep_prob) bw_encoder_cell_history_1 = make_cell(self.args.rnnHiddenSize, self.args.keep_prob) fw_encoder_cell_history_2 = make_cell(self.args.rnnHiddenSize, self.args.keep_prob) bw_encoder_cell_history_2 = make_cell(self.args.rnnHiddenSize, self.args.keep_prob) persona_word_enc = [] message_word_enc = [] # ============================================================================== # encode persona # ============================================================================== print("encode personas...") personas_enc = [] for i in range(self.args.max_num_persona): with tf.variable_scope('persona_sentence_EncoderRNN', reuse=tf.AUTO_REUSE) as varscope: persona_sentence_Output, persona_sentence_State = tf.nn.bidirectional_dynamic_rnn( cell_fw=fw_encoder_cell_persona, cell_bw=bw_encoder_cell_persona, inputs=personas[i], sequence_length=personas_lengths[i], dtype=tf.float32, scope=varscope) # [batch_size, encoder_cell.state_size] persona_sentence_Output = tf.concat( [persona_sentence_Output[0], persona_sentence_Output[1]], -1) persona_sentence_State = tf.concat( [persona_sentence_State[0], persona_sentence_State[1]], -1) # print("persona_sentence_State.h:",persona_sentence_State.h) persona_sentence_State = tf.reshape(persona_sentence_State, [self.args.batch_size, 1, -1]) if i == 0: personas_enc = persona_sentence_State persona_word_enc = persona_sentence_Output else: personas_enc = tf.concat( [personas_enc, persona_sentence_State], 1) # sentenses memory persona_word_enc = tf.concat( [persona_word_enc, persona_sentence_Output], 1) # words memory print("personas_enc:", personas_enc) # [batch_size, max_num_persona, hiddensize] print("persona_word_enc:", persona_word_enc ) # [batch_size, max_num_persona*persona_length, hiddensize] # ============================================================================== # encode history (HRED) # ============================================================================== print("encode history...") historys_sentence_enc = [] for i in range(2 * self.args.max_num_history_turns + 1): with tf.variable_scope('history_sentence_EncoderRNN', reuse=tf.AUTO_REUSE) as varscope: history_sentence_Output, history_sentence_State = tf.nn.bidirectional_dynamic_rnn( cell_fw=fw_encoder_cell_history_1, cell_bw=bw_encoder_cell_history_1, inputs=historys[i], sequence_length=historys_lengths[i], dtype=tf.float32, scope=varscope) # [batch_size, encoder_cell.state_size] history_sentence_Output = tf.concat( [history_sentence_Output[0], history_sentence_Output[1]], -1) history_sentence_State = tf.concat( [history_sentence_State[0], history_sentence_State[1]], -1) # print("history_sentence_State:",history_sentence_State) history_sentence_State = tf.reshape(history_sentence_State, [self.args.batch_size, 1, -1]) if i == 0: historys_sentence_enc = history_sentence_State message_word_enc = history_sentence_Output else: historys_sentence_enc = tf.concat( [historys_sentence_enc, history_sentence_State], 1) message_word_enc = tf.concat( [message_word_enc, history_sentence_Output], 1) print("historys_sentence_enc:", historys_sentence_enc) # [batch_size, h_turn, hidden_size*2] print("message_word_enc:", message_word_enc) with tf.variable_scope('history_sequence_EncoderRNN', reuse=tf.AUTO_REUSE) as varscope: history_sequence_Output, history_sequence_State = tf.nn.bidirectional_dynamic_rnn( cell_fw=fw_encoder_cell_history_2, cell_bw=bw_encoder_cell_history_2, inputs=historys_sentence_enc, sequence_length=self.historys_turn, dtype=tf.float32, scope=varscope) # [batch_size, encoder_cell.state_size] history_sequence_Output = tf.concat( [history_sequence_Output[0], history_sequence_Output[1]], -1) history_sequence_State = tf.concat( [history_sequence_State[0], history_sequence_State[1]], -1) print("history_sequence_output:", history_sequence_Output) # [batch_size, h_turn, hidden_size] history_sequence_Output = tf.transpose(history_sequence_Output, [1, 0, 2]) print("history_sequence_output:", history_sequence_Output) # [h_turn, batch_size, hidden_size] print("history_sequence_State:", history_sequence_State) # [batch_size, hidden_size] # ==================================================== # context retrieve persona sentense memory # ==================================================== print("query --> persona_sentense-memory") personas_enc_key = tf.matmul(personas_enc, self.W_p_key) personas_enc_value = tf.matmul(personas_enc, self.W_p_value) persona_memory_enc = [] persona_a_t_all = [] query = 0 for i in range(2 * self.args.max_num_history_turns + 1): query = query + history_sequence_Output[i] s = tf.reduce_sum( tf.multiply(tf.expand_dims(query, 1), personas_enc_key), 2) # [batch_size, len] a_t = tf.nn.softmax(s) # print("a_t:", a_t) persona_a_t_all.append(a_t) v_P = tf.reduce_sum( tf.multiply(tf.expand_dims(a_t, -1), personas_enc_value), 1) query = v_P # query = query + v_P persona_memory_enc.append(query) # [len, batch, hiden_size] # Select the final memory result according to the context length a = tf.range(self.args.batch_size) b = self.historys_turn - 1 index = tf.concat([tf.expand_dims(b, 1), tf.expand_dims(a, 1)], 1) print("index:", index) persona_memory = tf.gather_nd(persona_memory_enc, index) print("persona_memory:", persona_memory) self.persona_a_t = tf.gather_nd(persona_a_t_all, index) # ==================================================== # Merge information,get s0 # ==================================================== encoder_State = tf.concat( values=[history_sequence_State, persona_memory], axis=1) # [batch_size, (2*hidden_size)*2] encoder_State = tf.layers.dense(encoder_State, self.args.rnnHiddenSize) print("encoder_State:", encoder_State) # attention sentences persona_sentence_attention_State = personas_enc print("persona_sentence_attention_State:", persona_sentence_attention_State) # attention words persona_attention_State = persona_word_enc message_attention_State = message_word_enc print("persona_attention_State:", persona_attention_State) print("message_attention_State:", message_attention_State) # ============================================================================== # decode # ============================================================================== print("decode ...") with tf.variable_scope('DecoderRNN'): # att_persona_sentence_mask = self.att_persona_sentence_mask att_persona_mask = self.att_persona_mask att_message_mask = self.att_message_mask topic_words_emb_ph = self.topic_words_emb_ph encoder_State_s0 = encoder_State if (self.type != "train") and self.args.beam_search: print("use beamsearch decoding.. num_BeamSearch=", self.args.num_BeamSearch) persona_attention_State = tf.contrib.seq2seq.tile_batch( persona_attention_State, multiplier=self.args.num_BeamSearch) att_persona_mask = tf.contrib.seq2seq.tile_batch( self.att_persona_mask, multiplier=self.args.num_BeamSearch) message_attention_State = tf.contrib.seq2seq.tile_batch( message_attention_State, multiplier=self.args.num_BeamSearch) att_message_mask = tf.contrib.seq2seq.tile_batch( self.att_message_mask, multiplier=self.args.num_BeamSearch) topic_words_emb_ph = tf.contrib.seq2seq.tile_batch( self.topic_words_emb_ph, multiplier=self.args.num_BeamSearch) encoder_State_s0 = tf.contrib.seq2seq.tile_batch( encoder_State, multiplier=self.args.num_BeamSearch) encoder_State = nest.map_structure( lambda s: tf.contrib.seq2seq.tile_batch( s, self.args.num_BeamSearch), encoder_State) # mask message_mask_inf = 1 - att_message_mask mask = np.zeros(att_message_mask.shape) for i in range(att_message_mask.shape[0]): for j in range(att_message_mask.shape[1]): if message_mask_inf[i][j] == 1: mask[i][j] = -np.inf att_message_mask_inf = message_mask_inf * mask persona_mask_inf = 1 - att_persona_mask mask = np.zeros(att_persona_mask.shape) for i in range(att_persona_mask.shape[0]): for j in range(att_persona_mask.shape[1]): if persona_mask_inf[i][j] == 1: mask[i][j] = -np.inf att_persona_mask_inf = persona_mask_inf * mask self.decoder_cell_ = MyCell(self.args.rnnHiddenSize, persona_attention_State, att_persona_mask_inf, message_attention_State, att_message_mask_inf, encoder_State_s0, topic_words_emb_ph) # ------------- dropout ------------------ self.decoder_cell = tf.contrib.rnn.DropoutWrapper( self.decoder_cell_, input_keep_prob=self.args.keep_prob) # The decoder used by train and test is different, for the variable name correspondence if (self.type == "train"): output_layer = tf.compat.v1.layers.Dense( vocabSize, kernel_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.1), name='decoder/dense') else: output_layer = tf.compat.v1.layers.Dense( vocabSize, kernel_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.1)) if (self.type == "train"): answers = [ tf.squeeze(input=word, axis=1) for word in tf.split( value=answers, num_or_size_splits=self.args.max_num_Alength, axis=1) ] # print("answer:",answer) answers = [START_EMB] + answers # answers = tf.transpose(answers, [1, 0, 2]) # [batch_size, A_length+1, embedding_size] print("answers:", answers) decoder_Outputs, decoder_State = static_rnn( cell=self.decoder_cell, inputs=answers, initial_state=encoder_State, sequence_length=self.answer_len_ph_, dtype=tf.float32, scope="decoder") decoder_Outputs = tf.stack(decoder_Outputs, 1) print("decoder_Outputs:", decoder_Outputs) self.decoder_logits_train = output_layer( decoder_Outputs) # [batch_size, A_len, vocab] print("self.decoder_logits_train:", self.decoder_logits_train) # result self.answers_predict = tf.argmax(self.decoder_logits_train, axis=-1, name='answers_predict') print("self.answers_predict:", self.answers_predict) mask = tf.cast(x=tf.not_equal(x=self.answer_targets_ph, y=self.args.PAD), dtype=tf.float32) # [batch_size, Alength+1] self.loss1 = tf.contrib.seq2seq.sequence_loss( logits=self.decoder_logits_train, targets=self.answer_targets_ph, weights=mask) self.ppl = tf.reduce_mean(tf.exp(self.loss1)) # ---------------------------------------------------------------------------------------------- # P-BoWs loss # ----------------------------------------------------------------------------------------------- print("bow-loss-sigmoid-weight") print("lamba_loss1:", self.args.lamba_loss1) bow_state = tf.reduce_sum(self.decoder_logits_train, 1) # [batch_size, vocab] self.bow_prediction = tf.nn.sigmoid(bow_state) print("self.bow_prediction:", self.bow_prediction) # [batch_size, vocab] target_one_hot_bow = tf.one_hot( indices=self.answer_targets_ph, depth=vocabSize, dtype=tf.float32) # [batch_size, Alength+1, vocab] target_bow = tf.reduce_max(input_tensor=target_one_hot_bow, axis=1) # [batch_size, vocab] # mask2 remove pad, eos, etc. m1 = [1.0 for _ in range(vocabSize - 4)] m2 = [0.0 for _ in range(4)] m3 = tf.reshape(tf.concat([m2, m1], 0), [1, -1]) self.mask2 = tf.concat([m3] * self.args.batch_size, axis=0) print("mask2:", self.mask2) self.target_bow = target_bow * self.mask2 + answers_in_persona_label * self.args.lamba_persona_weight print("self.target_bow:", self.target_bow) # sigmoid loss ylogy+(1-y)log(1-y) self.loss2 = -tf.reduce_mean( input_tensor=self.target_bow * tf.log(self.bow_prediction + eps) + (1 - self.target_bow) * tf.log( (1 - self.bow_prediction) + eps), axis=1) # ---------------------------------------------------------------------------------------------- # P-Match loss # ----------------------------------------------------------------------------------------------- print("lamba_loss2:", self.args.lamba_loss2) persona_a_t = tf.log(self.persona_a_t + eps) print("answer_attention_ph:", self.answer_attention_ph) print("persona_sentence_a_t:", persona_a_t) self.loss3 = -tf.reduce_sum( input_tensor=persona_a_t * self.answer_attention_ph, axis=1) # [batch_size] print("self.loss3:", self.loss3) # total loss self.loss1 = tf.reduce_mean(self.loss1) self.loss2 = tf.reduce_mean(self.loss2) self.loss3 = tf.reduce_mean(self.loss3) self.loss = self.loss1 + self.args.lamba_loss1 * self.loss2 + self.args.lamba_loss2 * self.loss3 # self.loss = tf.reduce_mean(self.loss1) # -----------tersonborad ------------------ # tf.summary.scalar('loss', self.loss) params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, self.args.max_gradient_norm) self.opt_op = tf.compat.v1.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) else: start_tokens = tf.ones([ self.args.batch_size, ], tf.int32) * self.args.GO end_token = self.args.EOS if self.args.beam_search: print("decoder_cell:", self.decoder_cell) inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder( cell=self.decoder_cell, embedding=embedding, start_tokens=start_tokens, end_token=end_token, initial_state=encoder_State, beam_width=self.args.num_BeamSearch, output_layer=output_layer, length_penalty_weight=0.5) else: decoding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=embedding, start_tokens=start_tokens, end_token=end_token) inference_decoder = tf.contrib.seq2seq.BasicDecoder( cell=self.decoder_cell, helper=decoding_helper, initial_state=encoder_State, output_layer=output_layer) decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder=inference_decoder, maximum_iterations=self.args.max_num_Alength + 1, scope="decoder") if self.args.beam_search: self.decoder_predict_decode = decoder_outputs.predicted_ids else: self.decoder_predict_decode = tf.expand_dims( decoder_outputs.sample_id, -1) print("self.decoder_predict_decode:", self.decoder_predict_decode) # 取第一个结果 self.answers_predict = self.decoder_predict_decode[:, :, 0] print("answers_predict:", self.answers_predict) variable = [v for v in tf.trainable_variables()] for v in variable: print(v) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999) print("build model finish")
def main(): opt = args.args() if opt.load_dir: assert os.path.isdir(opt.load_dir) opt.save_dir = opt.load_dir else: opt.save_dir = '{}/{}_{}_{}_{}'.format(opt.save_dir, opt.dataset, opt.model, opt.noise_type, int(opt.noise * 100)) try: os.makedirs(opt.save_dir) except OSError: pass cudnn.benchmark = True logger = logging.getLogger("ydk_logger") fileHandler = logging.FileHandler(opt.save_dir + '/train.log') streamHandler = logging.StreamHandler() logger.addHandler(fileHandler) logger.addHandler(streamHandler) logger.setLevel(logging.INFO) logger.info(opt) ################################################################################################### if opt.dataset == 'cifar10_wo_val': num_classes = 10 in_channels = 3 else: logger.info('There exists no data') ## # Computing mean trainset = dset.ImageFolder(root='{}/{}/train'.format( opt.dataroot, opt.dataset), transform=transforms.ToTensor()) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batchSize, shuffle=False, num_workers=opt.workers) mean = 0 for i, data in enumerate(trainloader, 0): imgs, labels = data mean += torch.from_numpy(np.mean(np.asarray(imgs), axis=(2, 3))).sum(0) mean = mean / len(trainset) ## transform_train = transforms.Compose([ transforms.Resize(opt.imageSize), transforms.RandomCrop(opt.imageSize, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((mean[0], mean[1], mean[2]), (1.0, 1.0, 1.0)) ]) transform_test = transforms.Compose([ transforms.Resize(opt.imageSize), transforms.ToTensor(), transforms.Normalize((mean[0], mean[1], mean[2]), (1.0, 1.0, 1.0)) ]) logger.info(transform_train) logger.info(transform_test) with open( 'noise/%s/train_labels_n%02d_%s' % (opt.noise_type, opt.noise * 000, opt.dataset), 'rb') as fp: clean_labels = pickle.load(fp) with open( 'noise/%s/train_labels_n%02d_%s' % (opt.noise_type, opt.noise * 100, opt.dataset), 'rb') as fp: noisy_labels = pickle.load(fp) logger.info( float(np.sum(clean_labels != noisy_labels)) / len(clean_labels)) trainset = noisy_folder.ImageFolder(root='{}/{}/train'.format( opt.dataroot, opt.dataset), noisy_labels=noisy_labels, transform=transform_train) testset = dset.ImageFolder(root='{}/{}/test'.format( opt.dataroot, opt.dataset), transform=transform_test) clean_labels = list(clean_labels.astype(int)) noisy_labels = list(noisy_labels.astype(int)) # noise 样本的索引 inds_noisy = np.asarray([ ind for ind in range(len(trainset)) if trainset.imgs[ind][-1] != clean_labels[ind] ]) inds_clean = np.delete(np.arange(len(trainset)), inds_noisy) print(len(inds_noisy)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batchSize, shuffle=True, num_workers=opt.workers) testloader = torch.utils.data.DataLoader(testset, batch_size=opt.batchSize, shuffle=False, num_workers=opt.workers) if opt.model == 'resnet34': net = resnet.resnet34(in_channels=in_channels, num_classes=num_classes) else: logger.info('no model exists') weight = torch.FloatTensor(num_classes).zero_() + 1. for i in range(num_classes): weight[i] = (torch.from_numpy( np.array(trainset.imgs)[:, 1].astype(int)) == i).sum() weight = 1 / (weight / weight.max()) criterion = nn.CrossEntropyLoss(weight=weight) criterion_nll = nn.NLLLoss() criterion_nr = nn.CrossEntropyLoss(reduce=False) # net # criterion # criterion_nll # criterion_nr optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) train_preds = torch.zeros(len(trainset), num_classes) - 1. num_hist = 10 train_preds_hist = torch.zeros(len(trainset), num_hist, num_classes) pl_ratio = 0. nl_ratio = 1. - pl_ratio train_losses = torch.zeros(len(trainset)) - 1. if opt.load_dir: ckpt = torch.load(opt.load_dir + '/' + opt.load_pth) net.load_state_dict(ckpt['state_dict']) optimizer.load_state_dict(ckpt['optimizer']) train_preds_hist = ckpt['train_preds_hist'] pl_ratio = ckpt['pl_ratio'] nl_ratio = ckpt['nl_ratio'] epoch_resume = ckpt['epoch'] logger.info('loading network SUCCESSFUL') else: epoch_resume = 0 logger.info('loading network FAILURE') ################################################################################################### # Start training best_test_acc = 0.0 for epoch in range(epoch_resume, opt.max_epochs): train_loss = train_loss_neg = train_acc = 0.0 pl = 0. nl = 0. if epoch in opt.epoch_step: for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 opt.lr = param_group['lr'] for i, data in enumerate(trainloader, 0): net.zero_grad() imgs, labels, index = data labels_neg = (labels.unsqueeze(-1).repeat(1, opt.ln_neg) + torch.LongTensor(len(labels), opt.ln_neg).random_( 1, num_classes)) % num_classes assert labels_neg.max() <= num_classes - 1 assert labels_neg.min() >= 0 assert (labels_neg != labels.unsqueeze(-1).repeat( 1, opt.ln_neg)).sum() == len(labels) * opt.ln_neg imgs = Variable(imgs) labels = Variable(labels) labels_neg = Variable(labels_neg) logits = net(imgs) ## s_neg = torch.log( torch.clamp(1. - F.softmax(logits, -1), min=1e-5, max=1.)) s_neg *= weight[labels].unsqueeze(-1).expand(s_neg.size()) _, pred = torch.max(logits.data, -1) acc = float((pred == labels.data).sum()) train_acc += acc train_loss += imgs.size(0) * criterion(logits, labels).data train_loss_neg += imgs.size(0) * criterion_nll( s_neg, labels_neg[:, 0]).data train_losses[index] = criterion_nr(logits, labels).cpu().data ## if epoch >= opt.switch_epoch: if epoch == opt.switch_epoch and i == 0: logger.info('Switch to SelNL') labels_neg[train_preds_hist.mean(1)[index, labels] < 1 / float(num_classes)] = -100 labels = labels * 0 - 100 else: labels = labels * 0 - 100 loss = criterion(logits, labels) * float((labels >= 0).sum()) loss_neg = criterion_nll( s_neg.repeat(opt.ln_neg, 1), labels_neg.t().contiguous().view(-1)) * float( (labels_neg >= 0).sum()) ((loss + loss_neg) / (float((labels >= 0).sum()) + float( (labels_neg[:, 0] >= 0).sum()))).backward() optimizer.step() train_preds[index.cpu()] = F.softmax(logits, -1).cpu().data pl += float((labels >= 0).sum()) nl += float((labels_neg[:, 0] >= 0).sum()) train_loss /= len(trainset) train_loss_neg /= len(trainset) train_acc /= len(trainset) pl_ratio = pl / float(len(trainset)) nl_ratio = nl / float(len(trainset)) noise_ratio = 1. - pl_ratio noise = (np.array(trainset.imgs)[:, 1].astype(int) != np.array(clean_labels)).sum() logger.info( '[%6d/%6d] loss: %5f, loss_neg: %5f, acc: %5f, lr: %5f, noise: %d, pl: %5f, nl: %5f, noise_ratio: %5f' % (epoch, opt.max_epochs, train_loss, train_loss_neg, train_acc, opt.lr, noise, pl_ratio, nl_ratio, noise_ratio)) ############################################################################################### if epoch == 0: for i in range(in_channels): imgs.data[:, i] += mean[i] img = vutils.make_grid(imgs.data) vutils.save_image(img, '%s/x.jpg' % (opt.save_dir)) logger.info('%s/x.jpg saved' % (opt.save_dir)) net.eval() test_loss = test_acc = 0.0 with torch.no_grad(): for i, data in enumerate(testloader, 0): imgs, labels = data imgs = Variable(imgs) labels = Variable(labels) logits = net(imgs) loss = criterion(logits, labels) test_loss += imgs.size(0) * loss.data _, pred = torch.max(logits.data, -1) acc = float((pred == labels.data).sum()) test_acc += acc test_loss /= len(testset) test_acc /= len(testset) inds = np.argsort(np.array(train_losses))[::-1] rnge = int(len(trainset) * noise_ratio) inds_filt = inds[:rnge] recall = float(len(np.intersect1d(inds_filt, inds_noisy))) / float( len(inds_noisy)) precision = float(len(np.intersect1d(inds_filt, inds_noisy))) / float(rnge) ############################################################################################### logger.info( '\tTESTING...loss: %5f, acc: %5f, best_acc: %5f, recall: %5f, precision: %5f' % (test_loss, test_acc, best_test_acc, recall, precision)) net.train() ############################################################################################### assert train_preds[train_preds < 0].nelement() == 0 train_preds_hist[:, epoch % num_hist] = train_preds train_preds = train_preds * 0 - 1. assert train_losses[train_losses < 0].nelement() == 0 train_losses = train_losses * 0 - 1. ############################################################################################### is_best = test_acc > best_test_acc best_test_acc = max(test_acc, best_test_acc) state = ({ 'epoch': epoch, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), 'train_preds_hist': train_preds_hist, 'pl_ratio': pl_ratio, 'nl_ratio': nl_ratio, }) logger.info('saving model...') fn = os.path.join(opt.save_dir, 'checkpoint.pth.tar') torch.save(state, fn) if epoch % 100 == 0 or epoch == opt.switch_epoch - 1 or epoch == opt.max_epochs - 1: fn = os.path.join(opt.save_dir, 'checkpoint_epoch%d.pth.tar' % (epoch)) torch.save(state, fn) # if is_best: # fn_best = os.path.join(opt.save_dir, 'model_best.pth.tar') # logger.info('saving best model...') # shutil.copyfile(fn, fn_best) if epoch % 10 == 0: logger.info('saving histogram...') plt.hist(train_preds_hist.mean(1)[ torch.arange(len(trainset)), np.array(trainset.imgs)[:, 1].astype(int)], bins=20, range=(0., 1.), edgecolor='black', color='g') plt.xlabel('probability') plt.ylabel('number of data') plt.grid() plt.savefig(opt.save_dir + '/histogram_epoch%03d.jpg' % (epoch)) plt.clf() logger.info('saving separated histogram...') plt.hist(train_preds_hist.mean(1)[ torch.arange(len(trainset))[inds_clean], np.array(trainset.imgs)[:, 1].astype(int)[inds_clean]], bins=20, range=(0., 1.), edgecolor='black', alpha=0.5, label='clean') plt.hist(train_preds_hist.mean(1)[ torch.arange(len(trainset))[inds_noisy], np.array(trainset.imgs)[:, 1].astype(int)[inds_noisy]], bins=20, range=(0., 1.), edgecolor='black', alpha=0.5, label='noisy') plt.xlabel('probability') plt.ylabel('number of data') plt.grid() plt.savefig(opt.save_dir + '/histogram_sep_epoch%03d.jpg' % (epoch)) plt.clf()
def setSources(self): ## source can be ## - sample ## - dataset ## - group ## - gengroupbasic ## - gengroupraw ## - gengroupfine ## - custom ## source variable in header can take, only applies to tree input! ## - sample ## - dataset ## - group self.vb.call("input", "setSources", [self], "Setting the sources of this instance.") self.sources = [] if not self.cfg.hasVar("source"): self.vb.error("Default source is not specified in the header of the cfg file.") hs = self.cfg.hasVar("source") sn = self.cfg.getVar("source") for i, iobj in enumerate(self.cfg.getObjs("region=='input' and type=='tree'")): alist = args.args(iobj.argstring) if not iobj.type == "tree" and not alist.has("source"): self.vb.error("No source is specified for input object " + iobj.name + ".") ## source given to this input object if alist.has("source"): s = alist.get("source") ## source not given, take default defined in header else: if not iobj.type == "tree": self.vb.error("No source is specified for input object " + iobj.name + ".") if sn == "dataset": s = self.db.getVar("samples", iobj.name, "dataset") elif sn == "group": s = self.db.getVar("samples", iobj.name, "group") else: s = iobj.name sidx = lib.findElm(self.sources, s) if sidx == -1: self.sources.append(s) sidx = len(self.sources) - 1 iobj.setSource(sidx) for i, iobj in enumerate(self.cfg.getObjs("region=='output'")): alist = args.args(iobj.argstring) if alist.has("source"): sidx = lib.findElm(self.sources, alist.get("source")) if sidx == -1: self.sources.append(alist.get("source")) sidx = len(self.sources) - 1
# print(self.features.T) # print(self.features.T.shape) return self.features.T def preprocessFeature(self): if self.features.shape[1] > 200: U, S, VT = la.svd(self.features) Ud = U[:, 0:200] Sd = S[0:200] self.features = np.array(Ud) * Sd.reshape(200) if __name__ == "__main__": warnings.filterwarnings("ignore", category=FutureWarning) rep_method = RepMethod(max_layer=2) arg_1 = args.args() arg_1.input = "data/test/karate.edgelist_1" arg_1.feature_file = "data/test/cora.features_1" t1 = time.time() nx_graph_1 = nx.read_edgelist(arg_1.input, nodetype=int, comments="%") adj_matrix_1 = nx.adjacency_matrix(nx_graph_1).todense() g_1 = Graph(adj_matrix_1) g_1.read_edgelist(filename=arg_1.input, weighted=arg_1.weighted, directed=arg_1.directed) g_1.read_node_features(arg_1.feature_file) xTawd_1 = XTADW(g_1, arg_1.representation_size) structure_feature_1 = xTawd_1.get_features(rep_method) # print(structure_feature_1) # print(structure_feature_1.shape)
# 运行主程序前用来预处理数据 import glob import os from args import args hparams = args() parser = hparams.parser hp = parser.parse_args() files = glob.glob("raw-data/" + hp.dataset + "/*.txt") print("共有 %d 个图" % len(files)) for k, file in enumerate(files): print("处理第 %d 个图" % (k + 1)) f = open(file, 'r') edge_cnt = 0 node_set = set() biao = {} for line in f: temp = line[:-1].split(' ') # print(tem) if (len(temp)) < 3: break x = int(temp[0]) y = int(temp[1]) edge_cnt += 1 node_set.add(x) node_set.add(y) f.close() biao = {} cn = 1 for i in node_set:
import sys sys.path.append("..") import math import random import tensorflow as tf import numpy as np import datetime from data_util import data_preprocess3 from model import Model as Model from args import args # ============================================================================== # Loading dataset # ============================================================================== args = args("train") # args = args("test") data_preprocess = data_preprocess3(args) num_sample = data_preprocess.num_sample print("num_sample:", num_sample) if not os.path.exists(args.savePath): os.makedirs(args.savePath) os.environ["CUDA_VISIBLE_DEVICES"] = '0' config = tf.ConfigProto() config.gpu_options.allow_growth = True # gpu_options = tf.GPUOptions(allow_growth=True) # Iterations per epoch numIterPerEpoch = int(math.ceil(num_sample / args.batch_size)) print('%d iter per epoch.\n' % numIterPerEpoch)
from tweetUtil import listToCsvMulti, csvToList, simple_tweet_search_j, auth_api, auth_api2, uploadVideo, csvToListMulti, urlReplyRemove, listToCsv from args import args from pprint import pprint import functools # speedy log print = functools.partial(print, flush=True) # get args search_words, envName, slugid = args() # Twiter Auth api = auth_api(envName) api2 = auth_api2(envName) def main(): csvname = envName + "_tweeted_movie.csv" uidName = envName + "_user_id_tweeted_movie.csv" # Extracting text and original URL copyIdAndImege = [] rawJsonList = simple_tweet_search_j(search_words, envName) print("---------------------search target") # Exclude already been posted and tweetedIdList = csvToListMulti(csvname) tweetedIdList = list(map(lambda x: int(x[0]), tweetedIdList)) uidList = csvToList(uidName) meId = api.me().screen_name followerIdsInt = api.followers_ids(meId) followerIds = [str(i) for i in followerIdsInt] print( "----------------------------------------------------------------Exclusion target (posted)" )
def equal(a, b): """Tests both items on equality""" print(a == b) def join(c: str, *a): """Joins the arguments with the first one""" print(c.join(a)) def add(*i: lambda x: list(map(float, x))): """Adds the given numbers""" from functools import reduce print(reduce(float.__add__, i, 0.0)) def echo(i: str): """echoes the given string :param i: the string to print """ print(i) if __name__ == "__main__": import args args.args()
from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from tensorflow.keras.optimizers import Adam from collections import deque from tensorflow.losses import huber_loss import args import shutil import subprocess import sys import threading import numpy as np import env_data args = args.args() def softmax(x): # 入力値の中で最大値を取得 c = np.max(x) # オーバーフロー対策として、最大値cを引く。こうすることで値が小さくなる exp_a = np.exp(x - c) sum_exp_a = np.sum(exp_a, axis=0) y = exp_a / sum_exp_a return y def readable_size(size): for unit in ['K', 'M']:
#import matplotlib.pyplot as plt import torch import torchvision from torchvision import datasets, transforms import torch.nn.functional as F import torch.nn as nn import torch.optim as optim import torch.backends.cudnn as cudnn from resnet import ResNet56, ResNetNoShort56, ResNet110, ResNetNoShort110, ResNet20, ResNetNoShort20 from args import args from utils import progress_bar args = args() if args.instance is None: args.instance = '{}_optim_{}_lr_{}_batch-size_{}_seed_{}'.format( args.arch, args.optim, args.lr, args.batch_size, args.seed) print(args.instance) torch.manual_seed(args.seed) print('==> Preparing data..') transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([
import args import os import numpy as np import torchvision.datasets as dset import torchvision.transforms as transforms import random import pickle opt = args.args() try: os.makedirs('noise/%s' % (opt.noise_type)) except OSError: pass ################################################################################################### if opt.dataset == 'cifar10_wo_val': num_classes = 10 else: print('There exists no data') trainset = dset.ImageFolder(root='{}/{}/train'.format(opt.dataroot, opt.dataset), transform=transforms.ToTensor()) clean_labels = np.array(trainset.imgs)[:, 1] for n in range(10): trainset = dset.ImageFolder(root='{}/{}/train'.format( opt.dataroot, opt.dataset), transform=transforms.ToTensor()) noisy_idx = [] for c in range(num_classes):
print("TAWD", "begin...") print("Reading...") if args.graph_format == 'adjlist': g.read_adjlist(filename=args.input) elif args.graph_format == 'edgelist': g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed) g.read_node_label(args.label_file) g.read_node_features(args.feature_file) model = xtadw.TADW(graph=g, dim=args.representation_size, lamb=args.lamb) t2 = time.time() print(t2 - t1) print("Saving embeddings...") model.save_embeddings(args.output) vectors = model.vectors X, Y = read_node_label(args.label_file) print("Training classifier using {:.2f}% nodes...".format(args.clf_ratio * 100)) clf = Classifier(vectors=vectors, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, args.clf_ratio) if __name__ == "__main__": warnings.filterwarnings("ignore", category=FutureWarning) random.seed(32) np.random.seed(32) agrs = args.args() main(agrs)
def main(arg_one_input="data/paper/truedata/facebook.net", arg_one_feature_file="data/paper/truedata/facebook_info", arg_two_input="data/paper/truedata/twitter.net", arg_two_feature_file="data/paper/truedata/twitter_info"): # syn data input as follow # arg_one_input="data/paper/synthetic/football_1.net", # arg_one_feature_file="data/paper/synthetic/football_info_115_1", # arg_two_input="data/paper/synthetic/football_1-0.01.net", # arg_two_feature_file="data/paper/synthetic/football_info_115_2" # tue data input as follow # arg_one_input="data/paper/truedata/facebook.net", # arg_one_feature_file="data/paper/truedata/facebook_info", # arg_two_input="data/paper/truedata/twitter.net", # arg_two_feature_file="data/paper/truedata/twitter_info" # disturb factor input as follow # arg_one_input="data/paper/disturb/football_1-0.1.net", # arg_one_feature_file="data/paper/synthetic/football_info_115_1", # arg_two_input="data/paper/disturb/football_1-0.1.net", # arg_two_feature_file="data/paper/synthetic/football_info_115_2" warnings.filterwarnings("ignore", category=FutureWarning) t1 = time.time() # init graph # K is maxLayer # alpha is discount factor for higher layers rep_method = RepMethod(max_layer=4, alpha=0.1) arg_one = args.args() arg_one.input = arg_one_input arg_one.feature_file = arg_one_feature_file nx_graph_one = nx.read_edgelist(arg_one.input, nodetype=int, comments="%") adj_matrix_one = nx.adjacency_matrix(nx_graph_one).todense() g_one = Graph(adj_matrix_one) g_one.read_edgelist(filename=arg_one.input, weighted=arg_one.weighted, directed=arg_one.directed) g_one.read_node_features(arg_one.feature_file) arg_two = args.args() arg_two.input = arg_two_input arg_two.feature_file = arg_two_feature_file nx_graph_two = nx.read_edgelist(arg_two.input, nodetype=int, comments="%") adj_matrix_two = nx.adjacency_matrix(nx_graph_two).todense() g_two = Graph(adj_matrix_two) g_two.read_edgelist(filename=arg_two.input, weighted=arg_two.weighted, directed=arg_two.directed) g_two.read_node_features(arg_two.feature_file) # community detection # igraph.Graph.community_infomap() # SCAN # algorithm_one = SCAN(g_one.G, 0.5, 3) # communities_one = algorithm_one.execute() # algorithm_two = SCAN(g_two.G, 0.5, 3) # communities_two = algorithm_two.execute() # LV # G_one = load_graph_LV(arg_one_input) # algorithm_one = Louvain(G_one) # communities_one = algorithm_one.execute() # G_two=load_graph_LV(arg_two_input) # algorithm_two = Louvain(G_two) # communities_two = algorithm_two.execute() # CPM # algorithm_one = CPM() # communities_one = algorithm_one.execute(g_one.G, 4) # algorithm_two = CPM() # communities_two = algorithm_two.execute(g_two.G, 4) # GN other experiment run on this algorithm G_one = load_graph_GN(arg_one_input) algorithm_one = GN(G_one) communities_one = algorithm_one.execute() G_two = load_graph_GN(arg_two_input) algorithm_two = GN(G_two) communities_two = algorithm_two.execute() # LPA can not use this algorithm # algorithm_one = LPA(g_one.G) # communities_one = algorithm_one.execute() # algorithm_two = LPA(g_two.G) # communities_two = algorithm_two.execute() # LPA # algorithm_one = EM(g_one.G, 9) # communities_one = algorithm_one.execute() # algorithm_two = EM(g_two.G, 2) # communities_two = algorithm_two.execute() # LV # G_one = load_graph_LV(arg_one_input) # algorithm_one = Louvain(G_one) # communities_one = algorithm_one.execute() # G_two=load_graph_LV(arg_two_input) # algorithm_two = Louvain(G_two) # communities_two = algorithm_two.execute() # CPM # algorithm_one = CPM() # communities_one = algorithm_one.execute(g_one.G, 4) # algorithm_two = CPM() # communities_two = algorithm_two.execute(g_two.G, 4) # LFM # algorithm_one = LFM(g_one.G, 0.8) # communities_one = algorithm_one.execute() # algorithm_two = LFM(g_two.G, 0.8) # communities_two = algorithm_two.execute() # print(communities_one) # print(communities_two) # demo # algorithm = SCAN(g_one.G) # communities = algorithm.execute() # print(communities) # node embed x_tawd_one = XTADW(g_one, arg_one.representation_size) structure_feature_one = x_tawd_one.get_features(rep_method) x_tawd_two = XTADW(g_two, arg_two.representation_size) structure_feature_two = x_tawd_two.get_features(rep_method) structure_feature_one, structure_feature_two = completion_vec( structure_feature_one, structure_feature_two) combine_future = np.vstack((structure_feature_one, structure_feature_two)) # S is dim the first para # lamb is the second para penalty factor recm = RECM(5, 0.1, g_one, g_two) recm.getT() g_one_node_embeding, g_two_node_embeding = recm.train( 5, rep_method, combine_future) # print(communities_one) # print(communities_two) # print("shape", g_one_node_embeding.shape) res, len_community_pair = computer_pair(communities_one, communities_two, g_one_node_embeding, g_two_node_embeding) # print(res) TP_FP = len(res) TP = 0 TP_FN = len_community_pair for tuple_ele in res: tuple_ele = tuple_ele[0] if tuple_ele[0] == tuple_ele[1]: TP = TP + 1 pre = TP / TP_FP recall = TP / TP_FN print("pre: ", pre, "recall: ", recall) print("sum time ", time.time() - t1)
import sys import os import args import shutil from config import const print(os.path.split(os.path.realpath(__file__))[0]) os.chdir(os.path.split(os.path.realpath(__file__))[0]) cmd = args.args() print(cmd) if cmd.get('c'): os.system('python client/main.py') elif cmd.get('s'): import socket myname = socket.getfqdn(socket.gethostname()) myaddr = socket.gethostbyname(myname) os.system('celery worker -A server.main -l info -n {0}'.format(myaddr)) elif cmd.get('renew_c'): ## delete folder: client/config ## delete folder: client/task ## copy ./config to client/config ## new folder: client/task ## new file: client/task/__init__.py ## new file: client/task/task_api.py ## read ./task/task_api.py | filter | client/task/task_api.py # client.config if os.path.exists('client/config'): shutil.rmtree('client/config')
# -*- coding: utf-8 -*- """ Created on Fri Nov 1 13:27:46 2019 @author: Chenghai Li """ import csv from args import args import numpy as np from matplotlib import pyplot as plt arg = args() step = arg.step input_length = arg.input_length predict_length = arg.predict_length split_ratio = arg.split_ratio file = open(r'data/EURUSD.csv') file_csv = csv.reader(file) data = [] for row in file_csv: row[2] = float(row[2]) row[3] = float(row[3]) row[4] = float(row[4]) row[5] = float(row[5]) data.append(row[2:6]) train = np.array(data[:int(len(data) * split_ratio)], dtype=np.float32)