示例#1
0
文件: output.py 项目: cheidegg/MyPAF
	def buildStat(self, objnames = []):
		
		self.vb.call("output", "buildStat", [self], "Building the stat module.")
		## initialize the histogram collection
		#self.objcoll.setSources(self.mypaf.input.sources)
		#self.objcoll.setCategs([o.name for o in self.mypaf.input.cfg.getObjs("region=='selection' and (type=='none' or type=='tree')")])

		objlist = self.mypaf.input.cfg.getObjs("region=='output' and (type=='evyield' or type=='obyield' or type=='effmap')")
		if len(objnames) > 0:
			objlist = lib.getElmAttrAllOr(objlist, "name", objnames)
		allselstr = [[s.name, s.definition] for s in self.input.cfg.getObjs("region=='selection' and (type=='none' or type=='tree')")]

		for var in objlist:
			alist = args.args(var.argstring)
			if var.type == "obyield" and not alist.has("obj"):
				self.vb.warning("Physics object (argument 'obj') is not given for object of type ObYield. ObYield is ignored.")
				continue

			csel       = self.mypaf.findSelections(["tree"], alist)
			categories = [o.name for o in csel]

			if var.type == "evyield":
				self.objcoll.addEvYield(var.name, var.definition.split(":")[0], var.argstring, self.mypaf.input.sources, categories)
			elif var.type == "obyield":
				self.objcoll.addObYield(alist.get("obj"), var.name, var.definition.split(":")[0], var.argstring, self.mypaf.input.sources, categories)
			elif var.type == "effmap":
				ssels = [sel.sel(c.definition, allselstr) for c in csel]
				self.objcoll.addEffMap(var.name, [s.string for s in ssels], var.argstring, self.mypaf.input.sources, categories)
			#elif var.type == "roc":
			#	self.objcoll.addRoc(var.name, var.definition, var.argstring)
			del alist
		self.objcoll.build()
示例#2
0
文件: output.py 项目: cheidegg/MyPAF
	def buildDraw(self, objnames = []):

		self.vb.call("output", "buildDraw", [self], "Building the draw module.")
		self.openFile()

		## initialize the histogram collection
		#self.objcoll.setSources(self.mypaf.input.sources)
		#self.objcoll.setCategs([o.name for o in self.mypaf.input.cfg.getObjs("region=='selection' and (type=='none' or type=='tree')")])

		objlist = self.mypaf.input.cfg.getObjs("region=='output' and (type=='file' or type=='plot')")
		if len(objnames) > 0:
			objlist = lib.getElmAttrAllOr(objlist, "name", objnames)

		## get the histogram info and initialize the histograms
		for var in objlist:
			alist = args.args(var.argstring)
			if not alist.has("obs") and not alist.has("obsx"):
				self.vb.warning("Physics observable (argument 'obs') is not given for histogram. Histogram is ignored.")
				continue
			
			categories = [o.name for o in self.mypaf.findSelections(["tree"], alist)]
			#binargs, labels = lib.prepareHistInfo(self.db, alist)
			self.objcoll.addHist(var.name, lib.getHistDim(var.definition), var.argstring, self.mypaf.input.sources, categories)
			if var.type == "plot": self.objcoll.setHistP(var.name)
			del alist
示例#3
0
def progargs():
    parser = args('Update VoltDB catalog and deployment', 21212)
    parser.add_argument(
        '--catalog', '-c',
        metavar='JAR',
        help='Catalog file to be given to the VoltDB cluster. '
             'Defaults to catalog.$SITE.jar')
    parser.add_argument(
        '--deployment', '-d',
        metavar='XML',
        help='Deployment file to be given to the VoltDB cluster. '
             'Defaults to deployment.$SITE.xml')
    parser.add_argument(
        '--no-suspend', '-n',
        action='store_false',
        dest='suspend',
        help='Do not suspend the server.  Updates catalog only.')
    parser.add_argument(
        '--quiet', '-q',
        action='store_false',
        dest='verbose',
        help="Be quiet, don't output status messages.")
    parser.add_argument(
        '--snapshot', '-s',
        help='Perform a manual snapshot')
    parser.add_argument(
        '--snappath',
        metavar='PATH',
        help='Path to save the snapshot in. [%(default)s]')

    return parser.parse_args()
示例#4
0
文件: output.py 项目: cheidegg/MyPAF
	def buildScan(self, objnames = []):
		
		self.vb.call("output", "buildScan", [self], "Building the scan module.")
		## initialize the histogram collection
		#self.objcoll.setSources(self.mypaf.input.sources)
		#self.objcoll.setCategs([o.name for o in self.mypaf.input.cfg.getObjs("region=='selection' and (type=='none' or type=='tree')")])

		objlist = self.mypaf.input.cfg.getObjs("region=='output' and (type=='evlist' or type=='oblist')")
		if len(objnames) > 0:
			objlist = lib.getElmAttrAllOr(objlist, "name", objnames)

		for var in objlist:
			alist = args.args(var.argstring)
			if var.type == "oblist" and not alist.has("obj"):
				self.vb.warning("Physics object (argument 'obj') is not given for object of type ObList. ObList is ignored.")
				continue

			categories = [o.name for o in self.mypaf.findSelections(["tree"], alist)]

			if var.type == "evlist":
				self.objcoll.addEvList(var.name, var.definition.split(":"), var.argstring, self.mypaf.input.sources, categories)
				self.objcoll.addEvYield(var.name, var.definition.split(":")[0], var.argstring, self.mypaf.input.sources, categories)
			elif var.type == "oblist":
				self.objcoll.addObList(alist.get("obj"), var.name, var.definition.split(":"), var.argstring, self.mypaf.input.sources, categories)
				self.objcoll.addObYield(alist.get("obj"), var.name, var.definition.split(":")[0], var.argstring, self.mypaf.input.sources, categories)
			del alist
		self.objcoll.build()
示例#5
0
def main():
    # get options from console.
    options = args()

    # get configuration from file.
    config = get_conf(options['config_file'])

    # create ES connection to hosts.
    connections.create_connection(hosts=config['elasticsearch']['hosts'],
                                  timeout=30)

    # create the searcher instance to find alarms, given the options from
    # console.
    searcher = Searcher(options['from'],
                        options['query'],
                        ttime=options['to'],
                        per_page=500,
                        min_priority=options['min_priority'])

    buckets = [
        PathClassBucket(
            utils.build_url(config['kibana']['host'],
                            config['kibana']['secure']))
    ]

    # manually fetch all alarms from the searcher and pass it to every bucket.
    for alarm in searcher.pages():
        for bucket in buckets:
            bucket.cherry_pick(alarm)

    # dump all buckets, this will print out all buckets.
    for bucket in buckets:
        bucket.dump()
示例#6
0
	def __init__(self, mypaf, name, binargs, labels, argstring = ""):

		self.name    = name
		self.binargs = binargs
		self.p       = False
		self.built   = False

		self.dlist   = args.args("")
		self.alist   = args.args(argstring)
		self.mypaf   = mypaf
		self.db      = mypaf.db
		self.vb      = mypaf.vb
		self.vb.call("hist", "__init__", [self, mypaf, name, binargs, labels, argstring], "Initializing the hist class.")

		self.setParent()
		self.setLabels(labels)
		self.setD()
示例#7
0
def progargs():
    parser = args('Update VoltDB log4j configuration', 21212)
    parser.add_argument(
        'log4jxml',
        nargs='?',
        help='log4j config file to update to. Defaults to log4j.$SITE.xml')

    return parser.parse_args()
示例#8
0
def remove_rf(options, arguments):

    path = join_listlike({}, arguments)

    if file_isdir({}, args(path)):
        os.rmdir(path)
    else:
        os.remove(path)
示例#9
0
def remove_rf(options, arguments):
    
    path = join_listlike({}, arguments)

    if file_isdir({}, args(path)):
        os.rmdir(path)
    else:
        os.remove(path)
示例#10
0
def progargs():
    parser = args('Restore a VoltDB cluster from snapshot.', 21211)
    parser.add_argument(
        '--path',
        default='/var/voltdb/snapshot/',
        help='Path to restore the snapshot from. [%(default)s]')
    parser.add_argument(
        'snapshotname',
        help='Snapshot filename prefix to restore from')

    return parser.parse_args()
示例#11
0
    def __init__(self):

        self.args = args()

        self.text_data = None

        self.global_step = 0

        self.SENTENCES_PREFIX = ['Q: ', 'A: ']

        self.main()
示例#12
0
文件: obyield.py 项目: cheidegg/MyPAF
	def __init__(self, mypaf, obj, name, variable, argstring = ""):

		self.mypaf     = mypaf
		self.db        = mypaf.db
		self.vb        = mypaf.vb

		self.obj       = obj.strip()
		self.name      = name.strip()
		self.variable  = variable
		self.alist     = args.args(argstring)
		self.built     = False
示例#13
0
文件: effmap.py 项目: cheidegg/MyPAF
	def __init__(self, mypaf, name, definition, argstring = ""):

		self.mypaf     = mypaf
		self.db        = mypaf.db
		self.vb        = mypaf.vb
		self.vb.call("effmap", "__init__", [self, mypaf, name, definition, argstring], "Initializing the effmap class.")

		self.name      = name.strip()
		self.alist     = args.args(argstring)
		self.defs      = definition
		self.built     = False
示例#14
0
文件: evyield.py 项目: cheidegg/MyPAF
	def __init__(self, mypaf, name, variable, argstring = ""):

		self.mypaf     = mypaf
		self.db        = mypaf.db
		self.vb        = mypaf.vb
		self.vb.call("evyield", "__init__", [self, mypaf, name, variable, argstring], "Initializing the evyield class.")

		self.name      = name.strip()
		self.variable  = variable.strip()
		self.alist     = args.args(argstring)
		self.built     = False
示例#15
0
文件: oblist.py 项目: cheidegg/MyPAF
	def __init__(self, mypaf, obj, name, variables, argstring = ""):

		self.mypaf     = mypaf
		self.db        = mypaf.db
		self.vb        = mypaf.vb

		self.obj       = obj.strip()
		self.name      = name.strip()
		self.alist     = args.args(argstring)
		self.vars      = ["Row", "Instance"]
		self.vars.extend(variables)
		self.built     = False
示例#16
0
    def __init__(self):
        self.args = args()
        self.text_data = None
        self.seq2seq_model = None
        self.writer = None
        self.saver = None
        self.sess = None
        self.train_op = None
        self.global_step = 0
        self.SENTENCE_PREFIX = ['Q:', 'A:']

        self.main()
示例#17
0
def main(
    arg_one_input="D:\\workspace\\pycharm\\paper_algorithm\\FindSimilarityCommunity\\src\\contrast\\data\\paper\\synthetic\\football_1.net",
    arg_one_feature_file="D:\\workspace\\pycharm\\paper_algorithm\\FindSimilarityCommunity\\src\\contrast\\data\\paper\\synthetic\\football_info_115_1",
    arg_two_input="D:\\workspace\\pycharm\\paper_algorithm\\FindSimilarityCommunity\\src\\contrast\\data\\paper\\synthetic\\football_1-0.05.net",
    arg_two_feature_file="D:\\workspace\\pycharm\\paper_algorithm\\FindSimilarityCommunity\\src\\contrast\\data\\paper\\synthetic\\football_info_115_2"
):
    warnings.filterwarnings("ignore", category=FutureWarning)
    t1 = time.time()
    # init graph
    arg_one = args.args()
    arg_one.input = arg_one_input
    arg_one.feature_file = arg_one_feature_file
    nx_graph_one = nx.read_edgelist(arg_one.input, nodetype=int, comments="%")
    adj_matrix_one = nx.adjacency_matrix(nx_graph_one).todense()
    g_one = Graph(adj_matrix_one)

    g_one.read_edgelist(filename=arg_one.input,
                        weighted=arg_one.weighted,
                        directed=arg_one.directed)
    g_one.read_node_features(arg_one.feature_file)

    arg_two = args.args()
    arg_two.input = arg_two_input
    arg_two.feature_file = arg_two_feature_file
    nx_graph_two = nx.read_edgelist(arg_two.input, nodetype=int, comments="%")
    adj_matrix_two = nx.adjacency_matrix(nx_graph_two).todense()
    g_two = Graph(adj_matrix_two)

    g_two.read_edgelist(filename=arg_two.input,
                        weighted=arg_two.weighted,
                        directed=arg_two.directed)
    g_two.read_node_features(arg_two.feature_file)
    # community detection
    # igraph.Graph.community_infomap()
    # SCAN
    algorithm_one = SCAN(g_one.G, 0.5, 3)
    communities_one = algorithm_one.execute()
    algorithm_two = SCAN(g_two.G, 0.5, 3)
    communities_two = algorithm_two.execute()
    return communities_one, communities_two, g_one, g_two
示例#18
0
 def get_iplist_from_unofficial(self):
     print '-> get ip list from UNOFFICIAL source...\n\tbe patient...'
     import args
     a = args.args()
     iplist = []
     i1, i2 = a.test_args()
     for i in xrange(0, len(i1)):
         p = '0x%s/%s' % (i1[i], i2[i])
         ip = IPy.IP(p)
         for x in ip:
             iplist.append(str(x))
     iplist = {}.fromkeys(iplist).keys()
     print '\tget %s IPs' % len(iplist)
     return iplist
 def get_iplist_from_unofficial(self):
     print '-> get ip list from UNOFFICIAL source...\n\tbe patient...'
     import args
     a = args.args()
     iplist = []
     i1, i2 = a.test_args()
     for i in xrange(0, len(i1)):
         p = '0x%s/%s' % (i1[i], i2[i])
         ip = IPy.IP(p)
         for x in ip:
             iplist.append(str(x))
     iplist = {}.fromkeys(iplist).keys()
     print '\tget %s IPs' % len(iplist)
     return iplist
示例#20
0
def main():
    """
    please setting $hashTagStr
    """
    hashTagStr = ""
    # get args
    search_words, envName, slugid = args()
    # Twitter auth
    api = auth_api(envName)
    # search query
    userName = api.me().screen_name
    word = "twitter.com/" + userName + ' /-from:' + userName
    # record csv name
    csvname = envName + "_quotedIds.csv"
    removeIdsList = csvToListMulti(csvname)
    removeIdsList = list(map(lambda x: x[1], removeIdsList))
    # search
    set_count = "100"
    results = api.search(q=word, count=set_count)
    results += api.mentions_timeline()
    attckIdList = []
    for i in results:
        if i.text not in userName and i.text not in "RT"  \
                and i.user.screen_name not in removeIdsList and i.user.screen_name not in userName:
            print([i.id_str, i.user.screen_name, i.text])
            attckIdList.append([
                i.user.screen_name, i.id, i.user.name,
                urlReplyRemove(i.user.description), i.user.statuses_count
            ])
    pprint(attckIdList)
    # create tweet and bynary upload
    print(
        "----------------------------------------------------------------upload"
    )
    uploadList = screenShotAndUpload(attckIdList, envName, hashTagStr)
    print(
        "----------------------------------------------------------------post")
    # post
    for i in uploadList:
        if i[0] != [] or i[1] != []:
            try:
                post = api.update_status(status=i[0], media_ids=i[1])
                print([post.created_at, post.text])
            except Exception as e:
                print(f'{i} is {e}')
    pprint(uploadList)
    # record baka
    recordList = list(map(lambda x: [x[1], x[0]], attckIdList))
    listToCsvMulti(csvname, recordList)
示例#21
0
文件: schemes.py 项目: cheidegg/MyPAF
def run(scheme, name, schemes = [], alist = args.args("")):

	if   scheme == "add"   : return add   (name, schemes, alist)
	elif scheme == "bins"  : return bins  (name, schemes, alist)
	elif scheme == "card"  : return card  (name, schemes, alist)
	elif scheme == "comp"  : return comp  (name, schemes, alist)
	#elif scheme == "datamc": return datamc(name, schemes[0].getHist(), [s.getHist() for s in schemes[1:]])
	elif scheme == "div"   : return div   (name, schemes, alist)
	elif scheme == "ffit"  : return ffit  (name, schemes, alist)
	elif scheme == "mult"  : return mult  (name, schemes, alist)
	elif scheme == "pack"  : return pack  (name, schemes, alist)
	elif scheme == "proj"  : return proj  (name, schemes, alist)
	#elif scheme == "roc"   : return roc   ()
	elif scheme == "stack" : return stack (name, schemes, alist)
	elif scheme == "sub"   : return sub   (name, schemes, alist)
	elif scheme == "tfit"  : return tfit  (name, schemes, alist)
示例#22
0
def progargs():
    parser = args('Snapshot a VoltDB cluster.', 21211)
    parser.add_argument(
        '--path',
        default='/var/voltdb/snapshot/',
        help='Path to save the snapshot in. [%(default)s]')
    parser.add_argument(
        '--no-blocking', '-n',
        dest='blocking',
        action='store_false',
        help='Perform a non-blocking snapshot.')
    parser.add_argument(
        'snapshotname',
        help='Snapshot filename prefix to save with')

    return parser.parse_args()
示例#23
0
文件: oblist.py 项目: cheidegg/MyPAF
	def exportAsHist(self, var = "pt"):

		self.close()

		alist = args.args("var=" + var)
		i = lib.findElm(self.vars, var)
		binargs, names = lib.prepareHistInfo(self.db, alist)
		h = hist.hist(self.mypaf, self.name, binargs, names)
		h.build(self.sources, self.categs)
		for sidx in range(len(self.sources)):
			for cidx in range(len(self.categs)):
				f     = open(self.paths[sidx][cidx], "r")
				lines = f.readlines()
				for entry in lines:
					h.fill(sidx, cidx, float(entry.split(":=")[i].strip()))
				f.close()
		return h
示例#24
0
文件: hist.py 项目: cheidegg/MyPAF
	def __init__(self, mypaf, name, dim = 1, argstring = ""):

		self.name    = name
		self.dim     = dim
		self.p       = False
		self.built   = False

		self.alist   = args.args(argstring)
		self.salist  = styleargs.styleargs(self.alist.get("style"), "1", lib.useVal("ROOT.kBlack", self.alist.get("color")))
		self.mypaf   = mypaf
		self.db      = mypaf.db
		self.vb      = mypaf.vb
		self.vb.call("hist", "__init__", [self, mypaf, name, dim, argstring], "Initializing the hist class.")

		self.setBinArgs()
		self.setParent()
		self.setLabels()
示例#25
0
文件: hscheme.py 项目: cheidegg/MyPAF
	def __init__(self, mypaf, type, name, definition = "", argstring = ""):

		self.type       = type
		self.name       = name
		self.dargs      = definition.strip().split()
		self.alist      = args.args(argstring)
		self.argstring  = argstring

		self.errorstate = False
		self.executed   = False

		self.mypaf      = mypaf
		self.db         = mypaf.db
		self.vb         = mypaf.vb

		self.vb.call("hscheme", "__init__", [self, mypaf, type, name, definition, argstring], "Initializing the hscheme class.")
		self.check()
示例#26
0
	def build(self):

		self.alist = args.args("color=" + self.color)

		## mult
		if   self.style == "mult":
			if   self.i == 1: 
				self.alist.set("draw1mode"  , "pe"  )
				self.alist.set("fillstyle"  , "0"   )
				self.alist.set("linestyle"  , "1"   )
				self.alist.set("linewidth"  , "2"   )
				self.alist.set("markerstyle", "8"   )
				self.alist.set("markersize" , "1.0" )
			elif self.i == 2:
				self.alist.set("draw1mode"  , "hist")
				self.alist.set("fillstyle"  , "1001")
				self.alist.set("linestyle"  , "1"   )
				self.alist.set("linewidth"  , "2"   )
				self.alist.set("markerstyle", "8"   )
				self.alist.set("markersize" , "1.0" )
			else            :
				self.alist.set("draw1mode"  , "hist") 
				self.alist.set("fillstyle"  , "0"   )
				self.alist.set("linestyle"  , "1"   )
				self.alist.set("linewidth"  , "2"   )
				self.alist.set("markerstyle", "8"   )
				self.alist.set("markersize" , "1.0" )

		## default
		else:
			if   self.i == 1: 
				self.alist.set("draw1mode"  , "pe"  )
				self.alist.set("fillstyle"  , "0"   )
				self.alist.set("linestyle"  , "1"   )
				self.alist.set("linewidth"  , "2"   )
				self.alist.set("markerstyle", "8"   )
				self.alist.set("markersize" , "1.8" )
			else            : 
				self.alist.set("draw1mode"  , "hist") 
				self.alist.set("fillstyle"  , "0"   )
				self.alist.set("linestyle"  , "1"   )
				self.alist.set("linewidth"  , "2"   )
				self.alist.set("markerstyle", "8"   )
				self.alist.set("markersize" , "1.8" )
示例#27
0
def progargs():
    parser = args('Import database from PostgreSQL to VoltDB', 21212)
    parser.add_argument(
        '--pgport',
        default=5432,
        type=int,
        metavar='PORT',
        help='Port to connect to PostgreSQL on. [%(default)s]')
    parser.add_argument(
        '--pguser',
        default='voltdb',
        metavar='USER',
        help='User to connect to PostgreSQL as. [%(default)s]')
    parser.add_argument(
        '--pgpassword',
        default='', # Workaround Python 2.4's psycopg2 not supporting None
        metavar='PASSWD',
        help='Password to connect to PostgreSQL with. [%(default)s]')
    parser.add_argument(
        '--pgdb',
        default='voltdb',
        metavar='DB',
        help='PostgreSQL database to use. [%(default)s]')
    parser.add_argument(
        '--pgschema',
        default='voltdb',
        metavar='SCHEMA',
        help='PostgreSQL schema to use. [%(default)s]')
    parser.add_argument(
        '--quiet', '-q',
        action='store_false',
        dest='verbose',
        help="Be quiet, don't output status messages")
    parser.add_argument(
        'pgsqlserver',
        help='PostgreSQL database to dump from.')

    return parser.parse_args()
示例#28
0
文件: output.py 项目: cheidegg/MyPAF
	def buildPlot(self, objnames = []):

		self.vb.call("output", "buildPlot", [self], "Building the plot module.")
		self.openFile()

		objlist = self.mypaf.input.cfg.getObjs("region=='output' and (type=='file' or type=='plot')")
		if len(objnames) > 0:
			objlist = lib.getElmAttrAllOr(objlist, "name", objnames)

		## reserve hist with one source per hist
		for var in objlist:
			alist = args.args(var.argstring)
			if not alist.has("obs") and not alist.has("obsx"):
				self.vb.warning("Physics observable (argument 'obs') is not given for histogram. Histogram is ignored.")
				continue

			## actually, find the selection which has the good name?
			categories = [o.name for o in self.mypaf.findSelections(["tree"], alist)]
			source = alist.get("source")
			#binargs, names = lib.prepareHistInfo(self.db, alist)
			self.objcoll.addHist(var.name, var.argstring, [source], categories)
			if var.type == "plot": self.objcoll.setHistP(var.name) 
			del alist
示例#29
0
def progargs():
    parser = args('Shutdown a VoltDB cluster', 21211)
    parser.add_argument(
        '--path',
        default='/var/voltdb/snapshot/',
        help='Path to save the snapshot in. [%(default)s]')
    parser.add_argument(
        '--no-quiesce',
        action='store_true',
        help='Do not quiesce the database before shutdown.  DANGER! '
             'Exports may not be complete with this option.')
    parser.add_argument(
        '--snapshot',
        default=None,
        metavar='NAME',
        help='Perform a snapshot before shutting down.')
    parser.add_argument(
        '--snapshot-path',
        default='/var/voltdb/snapshot/',
        metavar='PATH',
        help='Path to save the snapshot in. [%(default)s]')

    return parser.parse_args()
示例#30
0
        fname = os.path.join(summary_folder, run_type + '.dat')

    if result.parallel:
        c = parallel.Client(profile=result.profile)
        view = c.load_balanced_view()
    else:
        view = None

    #load arguments
    sys.path.insert(0, main_folder)
    try:
        del sys.modules['args']
    except KeyError:
        pass
    import args
    exp_kwargs = args.args()

    #run
    if result.run:

        #run experiment
        pprint.pprint(exp_kwargs)
        exp = run_experiments(view=view,
                              action=action,
                              single_runs_folder=single_runs_folder,
                              **exp_kwargs)

        #collect data
        if action == 'collect':
            data = merge(exp)
            if not run_regress:
示例#31
0
    if result.parallel:
        c = parallel.Client(profile=result.profile)
        view = c.load_balanced_view()
    else:
        view = None


    #load arguments
    sys.path.insert(0, main_folder)
    try:
        del sys.modules['args']
    except KeyError:
        pass
    import args
    exp_kwargs = args.args()


    #run
    if result.run:

        #run experiment
        pprint.pprint(exp_kwargs)
        exp = run_experiments(view=view, action=action, single_runs_folder=single_runs_folder,
                              **exp_kwargs)

        #collect data
        if action == 'collect':
            data = merge(exp)
            if not run_regress:
                estimators=('HDDM2Single', 'Quantiles_subj', 'ML')
    def __init__(self, type, training_steps_per_epoch, vocabSize):
        self.type = type
        self.args = args(type)

        print("build model")
        print("vocabSize", vocabSize)

        # ==============================================================================
        #                               Set necessary parameters
        # ==============================================================================
        if (type == "train"):
            self.global_step = tf.Variable(0, trainable=False)
            self.learning_rate = tf.maximum(
                tf.train.exponential_decay(
                    self.args.learning_rate,
                    self.global_step,
                    training_steps_per_epoch,
                    self.args.learning_rate_decay_factor,
                    staircase=True), self.args.min_learning_rate)

        START = tf.constant(value=[self.args.GO] * self.args.batch_size)
        # ==============================================================================
        #                               define placeholder
        # ==============================================================================
        with tf.name_scope("placeholder"):
            self.personas_ph = tf.placeholder(
                tf.int32,
                shape=[
                    self.args.batch_size, self.args.max_num_persona,
                    self.args.max_num_personalength
                ],
                name="personas")
            self.personas_len_ph = tf.placeholder(
                tf.int32,
                shape=[self.args.batch_size, self.args.max_num_persona],
                name="persona_lengths")
            self.persona_turn = tf.placeholder(tf.int32,
                                               shape=[self.args.batch_size],
                                               name="persona_turn")

            self.historys_ph = tf.placeholder(
                tf.int32,
                shape=[
                    self.args.batch_size,
                    2 * self.args.max_num_history_turns + 1,
                    self.args.max_num_Qlength
                ],
                name="historys")
            self.historys_len_ph = tf.placeholder(
                tf.int32,
                shape=[
                    self.args.batch_size,
                    2 * self.args.max_num_history_turns + 1
                ],
                name="history_lengths")
            self.historys_turn = tf.placeholder(tf.int32,
                                                shape=[self.args.batch_size],
                                                name="history_turn")

            self.answers_ph = tf.placeholder(
                tf.int32,
                shape=[self.args.batch_size, self.args.max_num_Alength],
                name="answers")
            self.answer_len_ph = tf.placeholder(tf.int32,
                                                shape=[self.args.batch_size],
                                                name="answer_lengths")
            self.answer_targets_ph = tf.placeholder(
                tf.int32,
                shape=[self.args.batch_size, self.args.max_num_Alength + 1],
                name="answer_targets")

        personas_turn_mask = tf.sequence_mask(self.persona_turn,
                                              self.args.max_num_persona)
        self.att_persona_sentence_mask = tf.cast(personas_turn_mask,
                                                 dtype=tf.float32)
        print("self.att_persona_sentence_mask :",
              self.att_persona_sentence_mask)
        personas_len_mask = tf.sequence_mask(self.personas_len_ph,
                                             self.args.max_num_personalength)
        personas_len_mask = tf.reshape(personas_len_mask,
                                       [self.args.batch_size, -1])
        self.att_persona_mask = tf.cast(personas_len_mask, dtype=tf.float32)
        print("self.att_persona_mask:", self.att_persona_mask)
        historys_len_mask = tf.sequence_mask(self.historys_len_ph,
                                             self.args.max_num_Qlength)
        historys_len_mask = tf.reshape(historys_len_mask,
                                       [self.args.batch_size, -1])
        self.att_message_mask = tf.cast(historys_len_mask, dtype=tf.float32)
        print("self.att_message_mask:", self.att_message_mask)

        # Because EOS is added at the end, the length is increased by 1
        self.answer_len_ph_ = self.answer_len_ph + 1

        self.topic_words_emb_ph = tf.placeholder(tf.float32,
                                                 shape=[
                                                     self.args.batch_size,
                                                     self.args.num_topic_words,
                                                     self.args.num_topics
                                                 ],
                                                 name="topic_words_emb")
        # ---------------------------------------------------
        # bow-loss
        # ---------------------------------------------------
        self.answers_in_persona_label = tf.placeholder(
            tf.int32,
            shape=[self.args.batch_size, vocabSize],
            name="answers_in_persona_label")  # 0/1 标签
        answers_in_persona_label = tf.cast(self.answers_in_persona_label,
                                           tf.float32)

        # ---------------------------------------------------
        # persona attention label
        # ---------------------------------------------------
        self.answer_attention_ph = tf.placeholder(
            tf.float32,
            shape=[self.args.batch_size, self.args.max_num_persona],
            name="answer_attention")

        # ==============================================================================
        # Embedding (share) and other variable
        # ==============================================================================
        with ops.device("/cpu:0"):
            if variable_scope.get_variable_scope().initializer:
                initializer = variable_scope.get_variable_scope().initializer
            else:
                File = h5py.File("../Data/glove_train.h5", 'r')
                initializer = np.array(File["embedding"])
            embedding = variable_scope.get_variable(name="embedding",
                                                    initializer=initializer,
                                                    dtype=tf.float32)

            # Weights
            self.W_p_key = self.random_weight(self.args.rnnHiddenSize * 2,
                                              self.args.rnnHiddenSize * 2,
                                              name="W_p_key")
            self.W_p_value = self.random_weight(self.args.rnnHiddenSize * 2,
                                                self.args.rnnHiddenSize * 2,
                                                name="W_p_value")

        START_EMB = embedding_ops.embedding_lookup(embedding, START)

        # ==============================================================================
        # split placeholders and embed
        # ==============================================================================
        personas = embedding_ops.embedding_lookup(embedding, self.personas_ph)
        personas = tf.transpose(personas, [1, 0, 2, 3])
        personas_lengths = tf.transpose(self.personas_len_ph, [1, 0])
        historys = embedding_ops.embedding_lookup(embedding, self.historys_ph)
        historys = tf.transpose(historys, [1, 0, 2, 3])
        historys_lengths = tf.transpose(self.historys_len_ph, [1, 0])
        # questions = embedding_ops.embedding_lookup(embedding, self.questions_ph)
        answers = embedding_ops.embedding_lookup(embedding, self.answers_ph)

        # ==============================================================================
        # make RNN cell
        # ==============================================================================
        def single_cell(hidden_size, in_keep_prob):
            if self.args.use_lstm:
                cell = tf.contrib.rnn.BasicLSTMCell(hidden_size,
                                                    forget_bias=1.0,
                                                    state_is_tuple=True)
            else:
                cell = tf.contrib.rnn.GRUCell(hidden_size)
            cell = tf.contrib.rnn.DropoutWrapper(cell,
                                                 input_keep_prob=in_keep_prob)
            return cell

        def make_cell(hidden_size, in_keep_prob):
            if self.args.rnnLayers > 1:
                return tf.contrib.rnn.MultiRNNCell([
                    single_cell(hidden_size, in_keep_prob)
                    for _ in range(hidden_size)
                ])
            else:
                return single_cell(hidden_size, in_keep_prob)

        fw_encoder_cell_persona = make_cell(self.args.rnnHiddenSize,
                                            self.args.keep_prob)
        bw_encoder_cell_persona = make_cell(self.args.rnnHiddenSize,
                                            self.args.keep_prob)
        fw_encoder_cell_history_1 = make_cell(self.args.rnnHiddenSize,
                                              self.args.keep_prob)
        bw_encoder_cell_history_1 = make_cell(self.args.rnnHiddenSize,
                                              self.args.keep_prob)
        fw_encoder_cell_history_2 = make_cell(self.args.rnnHiddenSize,
                                              self.args.keep_prob)
        bw_encoder_cell_history_2 = make_cell(self.args.rnnHiddenSize,
                                              self.args.keep_prob)

        persona_word_enc = []
        message_word_enc = []

        # ==============================================================================
        # encode persona
        # ==============================================================================
        print("encode personas...")
        personas_enc = []
        for i in range(self.args.max_num_persona):
            with tf.variable_scope('persona_sentence_EncoderRNN',
                                   reuse=tf.AUTO_REUSE) as varscope:
                persona_sentence_Output, persona_sentence_State = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=fw_encoder_cell_persona,
                    cell_bw=bw_encoder_cell_persona,
                    inputs=personas[i],
                    sequence_length=personas_lengths[i],
                    dtype=tf.float32,
                    scope=varscope)  # [batch_size, encoder_cell.state_size]
                persona_sentence_Output = tf.concat(
                    [persona_sentence_Output[0], persona_sentence_Output[1]],
                    -1)
                persona_sentence_State = tf.concat(
                    [persona_sentence_State[0], persona_sentence_State[1]], -1)
            # print("persona_sentence_State.h:",persona_sentence_State.h)

            persona_sentence_State = tf.reshape(persona_sentence_State,
                                                [self.args.batch_size, 1, -1])
            if i == 0:
                personas_enc = persona_sentence_State
                persona_word_enc = persona_sentence_Output
            else:
                personas_enc = tf.concat(
                    [personas_enc, persona_sentence_State],
                    1)  # sentenses memory
                persona_word_enc = tf.concat(
                    [persona_word_enc, persona_sentence_Output],
                    1)  # words memory

        print("personas_enc:",
              personas_enc)  # [batch_size, max_num_persona, hiddensize]
        print("persona_word_enc:", persona_word_enc
              )  # [batch_size, max_num_persona*persona_length, hiddensize]

        # ==============================================================================
        # encode history (HRED)
        # ==============================================================================
        print("encode history...")
        historys_sentence_enc = []
        for i in range(2 * self.args.max_num_history_turns + 1):
            with tf.variable_scope('history_sentence_EncoderRNN',
                                   reuse=tf.AUTO_REUSE) as varscope:
                history_sentence_Output, history_sentence_State = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=fw_encoder_cell_history_1,
                    cell_bw=bw_encoder_cell_history_1,
                    inputs=historys[i],
                    sequence_length=historys_lengths[i],
                    dtype=tf.float32,
                    scope=varscope)  # [batch_size, encoder_cell.state_size]
                history_sentence_Output = tf.concat(
                    [history_sentence_Output[0], history_sentence_Output[1]],
                    -1)
                history_sentence_State = tf.concat(
                    [history_sentence_State[0], history_sentence_State[1]], -1)
            # print("history_sentence_State:",history_sentence_State)

            history_sentence_State = tf.reshape(history_sentence_State,
                                                [self.args.batch_size, 1, -1])
            if i == 0:
                historys_sentence_enc = history_sentence_State
                message_word_enc = history_sentence_Output
            else:
                historys_sentence_enc = tf.concat(
                    [historys_sentence_enc, history_sentence_State], 1)
                message_word_enc = tf.concat(
                    [message_word_enc, history_sentence_Output], 1)
        print("historys_sentence_enc:",
              historys_sentence_enc)  # [batch_size, h_turn, hidden_size*2]
        print("message_word_enc:", message_word_enc)
        with tf.variable_scope('history_sequence_EncoderRNN',
                               reuse=tf.AUTO_REUSE) as varscope:
            history_sequence_Output, history_sequence_State = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=fw_encoder_cell_history_2,
                cell_bw=bw_encoder_cell_history_2,
                inputs=historys_sentence_enc,
                sequence_length=self.historys_turn,
                dtype=tf.float32,
                scope=varscope)  # [batch_size, encoder_cell.state_size]
            history_sequence_Output = tf.concat(
                [history_sequence_Output[0], history_sequence_Output[1]], -1)
            history_sequence_State = tf.concat(
                [history_sequence_State[0], history_sequence_State[1]], -1)
        print("history_sequence_output:",
              history_sequence_Output)  # [batch_size, h_turn, hidden_size]
        history_sequence_Output = tf.transpose(history_sequence_Output,
                                               [1, 0, 2])
        print("history_sequence_output:",
              history_sequence_Output)  # [h_turn, batch_size, hidden_size]
        print("history_sequence_State:",
              history_sequence_State)  # [batch_size, hidden_size]

        # ====================================================
        # context retrieve persona sentense memory
        # ====================================================
        print("query -->  persona_sentense-memory")
        personas_enc_key = tf.matmul(personas_enc, self.W_p_key)
        personas_enc_value = tf.matmul(personas_enc, self.W_p_value)
        persona_memory_enc = []
        persona_a_t_all = []

        query = 0
        for i in range(2 * self.args.max_num_history_turns + 1):
            query = query + history_sequence_Output[i]
            s = tf.reduce_sum(
                tf.multiply(tf.expand_dims(query, 1), personas_enc_key),
                2)  # [batch_size, len]
            a_t = tf.nn.softmax(s)
            # print("a_t:", a_t)
            persona_a_t_all.append(a_t)
            v_P = tf.reduce_sum(
                tf.multiply(tf.expand_dims(a_t, -1), personas_enc_value), 1)
            query = v_P
            # query = query + v_P
            persona_memory_enc.append(query)  # [len, batch, hiden_size]

        # Select the final memory result according to the context length
        a = tf.range(self.args.batch_size)
        b = self.historys_turn - 1
        index = tf.concat([tf.expand_dims(b, 1), tf.expand_dims(a, 1)], 1)
        print("index:", index)
        persona_memory = tf.gather_nd(persona_memory_enc, index)
        print("persona_memory:", persona_memory)
        self.persona_a_t = tf.gather_nd(persona_a_t_all, index)

        # ====================================================
        # Merge information,get s0
        # ====================================================
        encoder_State = tf.concat(
            values=[history_sequence_State,
                    persona_memory], axis=1)  # [batch_size, (2*hidden_size)*2]
        encoder_State = tf.layers.dense(encoder_State, self.args.rnnHiddenSize)
        print("encoder_State:", encoder_State)

        # attention sentences
        persona_sentence_attention_State = personas_enc
        print("persona_sentence_attention_State:",
              persona_sentence_attention_State)
        # attention words
        persona_attention_State = persona_word_enc
        message_attention_State = message_word_enc
        print("persona_attention_State:", persona_attention_State)
        print("message_attention_State:", message_attention_State)

        # ==============================================================================
        # decode
        # ==============================================================================
        print("decode ...")
        with tf.variable_scope('DecoderRNN'):
            # att_persona_sentence_mask = self.att_persona_sentence_mask
            att_persona_mask = self.att_persona_mask
            att_message_mask = self.att_message_mask
            topic_words_emb_ph = self.topic_words_emb_ph
            encoder_State_s0 = encoder_State
            if (self.type != "train") and self.args.beam_search:
                print("use beamsearch decoding..  num_BeamSearch=",
                      self.args.num_BeamSearch)
                persona_attention_State = tf.contrib.seq2seq.tile_batch(
                    persona_attention_State,
                    multiplier=self.args.num_BeamSearch)
                att_persona_mask = tf.contrib.seq2seq.tile_batch(
                    self.att_persona_mask, multiplier=self.args.num_BeamSearch)
                message_attention_State = tf.contrib.seq2seq.tile_batch(
                    message_attention_State,
                    multiplier=self.args.num_BeamSearch)
                att_message_mask = tf.contrib.seq2seq.tile_batch(
                    self.att_message_mask, multiplier=self.args.num_BeamSearch)
                topic_words_emb_ph = tf.contrib.seq2seq.tile_batch(
                    self.topic_words_emb_ph,
                    multiplier=self.args.num_BeamSearch)
                encoder_State_s0 = tf.contrib.seq2seq.tile_batch(
                    encoder_State, multiplier=self.args.num_BeamSearch)
                encoder_State = nest.map_structure(
                    lambda s: tf.contrib.seq2seq.tile_batch(
                        s, self.args.num_BeamSearch), encoder_State)

            # mask
            message_mask_inf = 1 - att_message_mask
            mask = np.zeros(att_message_mask.shape)
            for i in range(att_message_mask.shape[0]):
                for j in range(att_message_mask.shape[1]):
                    if message_mask_inf[i][j] == 1:
                        mask[i][j] = -np.inf
            att_message_mask_inf = message_mask_inf * mask

            persona_mask_inf = 1 - att_persona_mask
            mask = np.zeros(att_persona_mask.shape)
            for i in range(att_persona_mask.shape[0]):
                for j in range(att_persona_mask.shape[1]):
                    if persona_mask_inf[i][j] == 1:
                        mask[i][j] = -np.inf
            att_persona_mask_inf = persona_mask_inf * mask

            self.decoder_cell_ = MyCell(self.args.rnnHiddenSize,
                                        persona_attention_State,
                                        att_persona_mask_inf,
                                        message_attention_State,
                                        att_message_mask_inf, encoder_State_s0,
                                        topic_words_emb_ph)

            # ------------- dropout ------------------
            self.decoder_cell = tf.contrib.rnn.DropoutWrapper(
                self.decoder_cell_, input_keep_prob=self.args.keep_prob)

            # The decoder used by train and test is different, for the variable name correspondence
            if (self.type == "train"):
                output_layer = tf.compat.v1.layers.Dense(
                    vocabSize,
                    kernel_initializer=tf.truncated_normal_initializer(
                        mean=0.0, stddev=0.1),
                    name='decoder/dense')
            else:
                output_layer = tf.compat.v1.layers.Dense(
                    vocabSize,
                    kernel_initializer=tf.truncated_normal_initializer(
                        mean=0.0, stddev=0.1))

            if (self.type == "train"):
                answers = [
                    tf.squeeze(input=word, axis=1) for word in tf.split(
                        value=answers,
                        num_or_size_splits=self.args.max_num_Alength,
                        axis=1)
                ]
                # print("answer:",answer)
                answers = [START_EMB] + answers
                # answers = tf.transpose(answers, [1, 0, 2])  # [batch_size, A_length+1, embedding_size]
                print("answers:", answers)

                decoder_Outputs, decoder_State = static_rnn(
                    cell=self.decoder_cell,
                    inputs=answers,
                    initial_state=encoder_State,
                    sequence_length=self.answer_len_ph_,
                    dtype=tf.float32,
                    scope="decoder")
                decoder_Outputs = tf.stack(decoder_Outputs, 1)
                print("decoder_Outputs:", decoder_Outputs)

                self.decoder_logits_train = output_layer(
                    decoder_Outputs)  # [batch_size, A_len, vocab]
                print("self.decoder_logits_train:", self.decoder_logits_train)

                # result
                self.answers_predict = tf.argmax(self.decoder_logits_train,
                                                 axis=-1,
                                                 name='answers_predict')
                print("self.answers_predict:", self.answers_predict)

                mask = tf.cast(x=tf.not_equal(x=self.answer_targets_ph,
                                              y=self.args.PAD),
                               dtype=tf.float32)  # [batch_size, Alength+1]

                self.loss1 = tf.contrib.seq2seq.sequence_loss(
                    logits=self.decoder_logits_train,
                    targets=self.answer_targets_ph,
                    weights=mask)

                self.ppl = tf.reduce_mean(tf.exp(self.loss1))

                # ----------------------------------------------------------------------------------------------
                #  P-BoWs loss
                # -----------------------------------------------------------------------------------------------
                print("bow-loss-sigmoid-weight")
                print("lamba_loss1:", self.args.lamba_loss1)
                bow_state = tf.reduce_sum(self.decoder_logits_train,
                                          1)  # [batch_size, vocab]
                self.bow_prediction = tf.nn.sigmoid(bow_state)
                print("self.bow_prediction:",
                      self.bow_prediction)  # [batch_size, vocab]

                target_one_hot_bow = tf.one_hot(
                    indices=self.answer_targets_ph,
                    depth=vocabSize,
                    dtype=tf.float32)  # [batch_size, Alength+1, vocab]
                target_bow = tf.reduce_max(input_tensor=target_one_hot_bow,
                                           axis=1)  # [batch_size, vocab]
                # mask2 remove pad, eos, etc.
                m1 = [1.0 for _ in range(vocabSize - 4)]
                m2 = [0.0 for _ in range(4)]
                m3 = tf.reshape(tf.concat([m2, m1], 0), [1, -1])
                self.mask2 = tf.concat([m3] * self.args.batch_size, axis=0)
                print("mask2:", self.mask2)
                self.target_bow = target_bow * self.mask2 + answers_in_persona_label * self.args.lamba_persona_weight
                print("self.target_bow:", self.target_bow)
                # sigmoid loss ylogy+(1-y)log(1-y)
                self.loss2 = -tf.reduce_mean(
                    input_tensor=self.target_bow *
                    tf.log(self.bow_prediction + eps) +
                    (1 - self.target_bow) * tf.log(
                        (1 - self.bow_prediction) + eps),
                    axis=1)

                # ----------------------------------------------------------------------------------------------
                # P-Match loss
                # -----------------------------------------------------------------------------------------------
                print("lamba_loss2:", self.args.lamba_loss2)
                persona_a_t = tf.log(self.persona_a_t + eps)
                print("answer_attention_ph:", self.answer_attention_ph)
                print("persona_sentence_a_t:", persona_a_t)
                self.loss3 = -tf.reduce_sum(
                    input_tensor=persona_a_t * self.answer_attention_ph,
                    axis=1)  # [batch_size]
                print("self.loss3:", self.loss3)

                # total loss
                self.loss1 = tf.reduce_mean(self.loss1)
                self.loss2 = tf.reduce_mean(self.loss2)
                self.loss3 = tf.reduce_mean(self.loss3)

                self.loss = self.loss1 + self.args.lamba_loss1 * self.loss2 + self.args.lamba_loss2 * self.loss3

                # self.loss = tf.reduce_mean(self.loss1)

                # -----------tersonborad ------------------
                # tf.summary.scalar('loss', self.loss)

                params = tf.trainable_variables()
                gradients = tf.gradients(self.loss, params)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, self.args.max_gradient_norm)
                self.opt_op = tf.compat.v1.train.AdamOptimizer(
                    self.learning_rate).apply_gradients(
                        zip(clipped_gradients, params),
                        global_step=self.global_step)
            else:
                start_tokens = tf.ones([
                    self.args.batch_size,
                ], tf.int32) * self.args.GO
                end_token = self.args.EOS
                if self.args.beam_search:
                    print("decoder_cell:", self.decoder_cell)
                    inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embedding,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=encoder_State,
                        beam_width=self.args.num_BeamSearch,
                        output_layer=output_layer,
                        length_penalty_weight=0.5)
                else:
                    decoding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                        embedding=embedding,
                        start_tokens=start_tokens,
                        end_token=end_token)
                    inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=encoder_State,
                        output_layer=output_layer)
                decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder=inference_decoder,
                    maximum_iterations=self.args.max_num_Alength + 1,
                    scope="decoder")

                if self.args.beam_search:
                    self.decoder_predict_decode = decoder_outputs.predicted_ids
                else:
                    self.decoder_predict_decode = tf.expand_dims(
                        decoder_outputs.sample_id, -1)
                print("self.decoder_predict_decode:",
                      self.decoder_predict_decode)

                # 取第一个结果
                self.answers_predict = self.decoder_predict_decode[:, :, 0]
                print("answers_predict:", self.answers_predict)

        variable = [v for v in tf.trainable_variables()]
        for v in variable:
            print(v)

        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=99999999)
        print("build model finish")
示例#33
0
def main():
    opt = args.args()

    if opt.load_dir:
        assert os.path.isdir(opt.load_dir)
        opt.save_dir = opt.load_dir
    else:
        opt.save_dir = '{}/{}_{}_{}_{}'.format(opt.save_dir, opt.dataset,
                                               opt.model, opt.noise_type,
                                               int(opt.noise * 100))
    try:
        os.makedirs(opt.save_dir)
    except OSError:
        pass
    cudnn.benchmark = True

    logger = logging.getLogger("ydk_logger")
    fileHandler = logging.FileHandler(opt.save_dir + '/train.log')
    streamHandler = logging.StreamHandler()

    logger.addHandler(fileHandler)
    logger.addHandler(streamHandler)

    logger.setLevel(logging.INFO)
    logger.info(opt)
    ###################################################################################################
    if opt.dataset == 'cifar10_wo_val':
        num_classes = 10
        in_channels = 3
    else:
        logger.info('There exists no data')

    ##
    # Computing mean
    trainset = dset.ImageFolder(root='{}/{}/train'.format(
        opt.dataroot, opt.dataset),
                                transform=transforms.ToTensor())
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=opt.batchSize,
                                              shuffle=False,
                                              num_workers=opt.workers)
    mean = 0
    for i, data in enumerate(trainloader, 0):
        imgs, labels = data
        mean += torch.from_numpy(np.mean(np.asarray(imgs), axis=(2, 3))).sum(0)
    mean = mean / len(trainset)
    ##

    transform_train = transforms.Compose([
        transforms.Resize(opt.imageSize),
        transforms.RandomCrop(opt.imageSize, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((mean[0], mean[1], mean[2]), (1.0, 1.0, 1.0))
    ])

    transform_test = transforms.Compose([
        transforms.Resize(opt.imageSize),
        transforms.ToTensor(),
        transforms.Normalize((mean[0], mean[1], mean[2]), (1.0, 1.0, 1.0))
    ])

    logger.info(transform_train)
    logger.info(transform_test)

    with open(
            'noise/%s/train_labels_n%02d_%s' %
        (opt.noise_type, opt.noise * 000, opt.dataset), 'rb') as fp:
        clean_labels = pickle.load(fp)
    with open(
            'noise/%s/train_labels_n%02d_%s' %
        (opt.noise_type, opt.noise * 100, opt.dataset), 'rb') as fp:
        noisy_labels = pickle.load(fp)
    logger.info(
        float(np.sum(clean_labels != noisy_labels)) / len(clean_labels))

    trainset = noisy_folder.ImageFolder(root='{}/{}/train'.format(
        opt.dataroot, opt.dataset),
                                        noisy_labels=noisy_labels,
                                        transform=transform_train)
    testset = dset.ImageFolder(root='{}/{}/test'.format(
        opt.dataroot, opt.dataset),
                               transform=transform_test)

    clean_labels = list(clean_labels.astype(int))
    noisy_labels = list(noisy_labels.astype(int))

    # noise 样本的索引
    inds_noisy = np.asarray([
        ind for ind in range(len(trainset))
        if trainset.imgs[ind][-1] != clean_labels[ind]
    ])
    inds_clean = np.delete(np.arange(len(trainset)), inds_noisy)
    print(len(inds_noisy))

    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=opt.batchSize,
                                              shuffle=True,
                                              num_workers=opt.workers)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=opt.batchSize,
                                             shuffle=False,
                                             num_workers=opt.workers)

    if opt.model == 'resnet34':
        net = resnet.resnet34(in_channels=in_channels, num_classes=num_classes)
    else:
        logger.info('no model exists')

    weight = torch.FloatTensor(num_classes).zero_() + 1.
    for i in range(num_classes):
        weight[i] = (torch.from_numpy(
            np.array(trainset.imgs)[:, 1].astype(int)) == i).sum()
    weight = 1 / (weight / weight.max())

    criterion = nn.CrossEntropyLoss(weight=weight)
    criterion_nll = nn.NLLLoss()
    criterion_nr = nn.CrossEntropyLoss(reduce=False)

    # net
    # criterion
    # criterion_nll
    # criterion_nr

    optimizer = optim.SGD(net.parameters(),
                          lr=opt.lr,
                          momentum=opt.momentum,
                          weight_decay=opt.weight_decay)

    train_preds = torch.zeros(len(trainset), num_classes) - 1.
    num_hist = 10
    train_preds_hist = torch.zeros(len(trainset), num_hist, num_classes)
    pl_ratio = 0.
    nl_ratio = 1. - pl_ratio
    train_losses = torch.zeros(len(trainset)) - 1.

    if opt.load_dir:
        ckpt = torch.load(opt.load_dir + '/' + opt.load_pth)
        net.load_state_dict(ckpt['state_dict'])
        optimizer.load_state_dict(ckpt['optimizer'])
        train_preds_hist = ckpt['train_preds_hist']
        pl_ratio = ckpt['pl_ratio']
        nl_ratio = ckpt['nl_ratio']
        epoch_resume = ckpt['epoch']
        logger.info('loading network SUCCESSFUL')
    else:
        epoch_resume = 0
        logger.info('loading network FAILURE')
    ###################################################################################################
    # Start training

    best_test_acc = 0.0
    for epoch in range(epoch_resume, opt.max_epochs):
        train_loss = train_loss_neg = train_acc = 0.0
        pl = 0.
        nl = 0.
        if epoch in opt.epoch_step:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.1
                opt.lr = param_group['lr']

        for i, data in enumerate(trainloader, 0):

            net.zero_grad()
            imgs, labels, index = data
            labels_neg = (labels.unsqueeze(-1).repeat(1, opt.ln_neg) +
                          torch.LongTensor(len(labels), opt.ln_neg).random_(
                              1, num_classes)) % num_classes

            assert labels_neg.max() <= num_classes - 1
            assert labels_neg.min() >= 0
            assert (labels_neg != labels.unsqueeze(-1).repeat(
                1, opt.ln_neg)).sum() == len(labels) * opt.ln_neg

            imgs = Variable(imgs)
            labels = Variable(labels)
            labels_neg = Variable(labels_neg)

            logits = net(imgs)

            ##
            s_neg = torch.log(
                torch.clamp(1. - F.softmax(logits, -1), min=1e-5, max=1.))
            s_neg *= weight[labels].unsqueeze(-1).expand(s_neg.size())

            _, pred = torch.max(logits.data, -1)
            acc = float((pred == labels.data).sum())
            train_acc += acc

            train_loss += imgs.size(0) * criterion(logits, labels).data
            train_loss_neg += imgs.size(0) * criterion_nll(
                s_neg, labels_neg[:, 0]).data
            train_losses[index] = criterion_nr(logits, labels).cpu().data
            ##

            if epoch >= opt.switch_epoch:
                if epoch == opt.switch_epoch and i == 0:
                    logger.info('Switch to SelNL')
                labels_neg[train_preds_hist.mean(1)[index, labels] < 1 /
                           float(num_classes)] = -100
                labels = labels * 0 - 100
            else:
                labels = labels * 0 - 100

            loss = criterion(logits, labels) * float((labels >= 0).sum())
            loss_neg = criterion_nll(
                s_neg.repeat(opt.ln_neg, 1),
                labels_neg.t().contiguous().view(-1)) * float(
                    (labels_neg >= 0).sum())

            ((loss + loss_neg) / (float((labels >= 0).sum()) + float(
                (labels_neg[:, 0] >= 0).sum()))).backward()
            optimizer.step()

            train_preds[index.cpu()] = F.softmax(logits, -1).cpu().data
            pl += float((labels >= 0).sum())
            nl += float((labels_neg[:, 0] >= 0).sum())

        train_loss /= len(trainset)
        train_loss_neg /= len(trainset)
        train_acc /= len(trainset)
        pl_ratio = pl / float(len(trainset))
        nl_ratio = nl / float(len(trainset))
        noise_ratio = 1. - pl_ratio

        noise = (np.array(trainset.imgs)[:, 1].astype(int) !=
                 np.array(clean_labels)).sum()
        logger.info(
            '[%6d/%6d] loss: %5f, loss_neg: %5f, acc: %5f, lr: %5f, noise: %d, pl: %5f, nl: %5f, noise_ratio: %5f'
            % (epoch, opt.max_epochs, train_loss, train_loss_neg, train_acc,
               opt.lr, noise, pl_ratio, nl_ratio, noise_ratio))
        ###############################################################################################
        if epoch == 0:
            for i in range(in_channels):
                imgs.data[:, i] += mean[i]
            img = vutils.make_grid(imgs.data)
            vutils.save_image(img, '%s/x.jpg' % (opt.save_dir))
            logger.info('%s/x.jpg saved' % (opt.save_dir))

        net.eval()
        test_loss = test_acc = 0.0
        with torch.no_grad():
            for i, data in enumerate(testloader, 0):
                imgs, labels = data
                imgs = Variable(imgs)
                labels = Variable(labels)

                logits = net(imgs)
                loss = criterion(logits, labels)
                test_loss += imgs.size(0) * loss.data

                _, pred = torch.max(logits.data, -1)
                acc = float((pred == labels.data).sum())
                test_acc += acc

        test_loss /= len(testset)
        test_acc /= len(testset)

        inds = np.argsort(np.array(train_losses))[::-1]
        rnge = int(len(trainset) * noise_ratio)
        inds_filt = inds[:rnge]
        recall = float(len(np.intersect1d(inds_filt, inds_noisy))) / float(
            len(inds_noisy))
        precision = float(len(np.intersect1d(inds_filt,
                                             inds_noisy))) / float(rnge)
        ###############################################################################################
        logger.info(
            '\tTESTING...loss: %5f, acc: %5f, best_acc: %5f, recall: %5f, precision: %5f'
            % (test_loss, test_acc, best_test_acc, recall, precision))
        net.train()
        ###############################################################################################
        assert train_preds[train_preds < 0].nelement() == 0
        train_preds_hist[:, epoch % num_hist] = train_preds
        train_preds = train_preds * 0 - 1.
        assert train_losses[train_losses < 0].nelement() == 0
        train_losses = train_losses * 0 - 1.
        ###############################################################################################
        is_best = test_acc > best_test_acc
        best_test_acc = max(test_acc, best_test_acc)
        state = ({
            'epoch': epoch,
            'state_dict': net.state_dict(),
            'optimizer': optimizer.state_dict(),
            'train_preds_hist': train_preds_hist,
            'pl_ratio': pl_ratio,
            'nl_ratio': nl_ratio,
        })
        logger.info('saving model...')
        fn = os.path.join(opt.save_dir, 'checkpoint.pth.tar')
        torch.save(state, fn)
        if epoch % 100 == 0 or epoch == opt.switch_epoch - 1 or epoch == opt.max_epochs - 1:
            fn = os.path.join(opt.save_dir,
                              'checkpoint_epoch%d.pth.tar' % (epoch))
            torch.save(state, fn)
        # if is_best:
        # 	fn_best = os.path.join(opt.save_dir, 'model_best.pth.tar')
        # 	logger.info('saving best model...')
        # 	shutil.copyfile(fn, fn_best)

        if epoch % 10 == 0:
            logger.info('saving histogram...')
            plt.hist(train_preds_hist.mean(1)[
                torch.arange(len(trainset)),
                np.array(trainset.imgs)[:, 1].astype(int)],
                     bins=20,
                     range=(0., 1.),
                     edgecolor='black',
                     color='g')
            plt.xlabel('probability')
            plt.ylabel('number of data')
            plt.grid()
            plt.savefig(opt.save_dir + '/histogram_epoch%03d.jpg' % (epoch))
            plt.clf()

            logger.info('saving separated histogram...')
            plt.hist(train_preds_hist.mean(1)[
                torch.arange(len(trainset))[inds_clean],
                np.array(trainset.imgs)[:, 1].astype(int)[inds_clean]],
                     bins=20,
                     range=(0., 1.),
                     edgecolor='black',
                     alpha=0.5,
                     label='clean')
            plt.hist(train_preds_hist.mean(1)[
                torch.arange(len(trainset))[inds_noisy],
                np.array(trainset.imgs)[:, 1].astype(int)[inds_noisy]],
                     bins=20,
                     range=(0., 1.),
                     edgecolor='black',
                     alpha=0.5,
                     label='noisy')
            plt.xlabel('probability')
            plt.ylabel('number of data')
            plt.grid()
            plt.savefig(opt.save_dir + '/histogram_sep_epoch%03d.jpg' %
                        (epoch))
            plt.clf()
示例#34
0
文件: input.py 项目: cheidegg/MyPAF
	def setSources(self):
		## source can be
		## - sample
		## - dataset
		## - group
		## - gengroupbasic
		## - gengroupraw
		## - gengroupfine
		## - custom
		## source variable in header can take, only applies to tree input!
		## - sample
		## - dataset
		## - group

		self.vb.call("input", "setSources", [self], "Setting the sources of this instance.")

		self.sources = []

		if not self.cfg.hasVar("source"):
			self.vb.error("Default source is not specified in the header of the cfg file.")

		hs = self.cfg.hasVar("source")
		sn = self.cfg.getVar("source")
		
		for i, iobj in enumerate(self.cfg.getObjs("region=='input' and type=='tree'")):
			alist = args.args(iobj.argstring)

			if not iobj.type == "tree" and not alist.has("source"): 
				self.vb.error("No source is specified for input object " + iobj.name + ".")

			## source given to this input object
			if alist.has("source"): 
				s = alist.get("source")

			## source not given, take default defined in header
			else:
				if not iobj.type == "tree":			
					self.vb.error("No source is specified for input object " + iobj.name + ".")

				if sn == "dataset":
					s = self.db.getVar("samples", iobj.name, "dataset")
				elif sn == "group":
					s = self.db.getVar("samples", iobj.name, "group")
				else:
					s = iobj.name

			sidx = lib.findElm(self.sources, s)
			if sidx == -1:
				self.sources.append(s)
				sidx = len(self.sources) - 1

			iobj.setSource(sidx)


		for i, iobj in enumerate(self.cfg.getObjs("region=='output'")):
			alist = args.args(iobj.argstring)
			if alist.has("source"):
				sidx = lib.findElm(self.sources, alist.get("source"))
				if sidx == -1:
					self.sources.append(alist.get("source"))
					sidx = len(self.sources) - 1
示例#35
0
        # print(self.features.T)
        # print(self.features.T.shape)
        return self.features.T

    def preprocessFeature(self):
        if self.features.shape[1] > 200:
            U, S, VT = la.svd(self.features)
            Ud = U[:, 0:200]
            Sd = S[0:200]
            self.features = np.array(Ud) * Sd.reshape(200)


if __name__ == "__main__":
    warnings.filterwarnings("ignore", category=FutureWarning)
    rep_method = RepMethod(max_layer=2)
    arg_1 = args.args()
    arg_1.input = "data/test/karate.edgelist_1"
    arg_1.feature_file = "data/test/cora.features_1"
    t1 = time.time()
    nx_graph_1 = nx.read_edgelist(arg_1.input, nodetype=int, comments="%")
    adj_matrix_1 = nx.adjacency_matrix(nx_graph_1).todense()
    g_1 = Graph(adj_matrix_1)

    g_1.read_edgelist(filename=arg_1.input,
                      weighted=arg_1.weighted,
                      directed=arg_1.directed)
    g_1.read_node_features(arg_1.feature_file)
    xTawd_1 = XTADW(g_1, arg_1.representation_size)
    structure_feature_1 = xTawd_1.get_features(rep_method)
    # print(structure_feature_1)
    # print(structure_feature_1.shape)
示例#36
0
# 运行主程序前用来预处理数据
import glob
import os
from args import args

hparams = args()
parser = hparams.parser
hp = parser.parse_args()

files = glob.glob("raw-data/" + hp.dataset + "/*.txt")
print("共有 %d 个图" % len(files))
for k, file in enumerate(files):
    print("处理第 %d 个图" % (k + 1))
    f = open(file, 'r')
    edge_cnt = 0
    node_set = set()
    biao = {}
    for line in f:
        temp = line[:-1].split(' ')
        # print(tem)
        if (len(temp)) < 3:
            break
        x = int(temp[0])
        y = int(temp[1])
        edge_cnt += 1
        node_set.add(x)
        node_set.add(y)
    f.close()
    biao = {}
    cn = 1
    for i in node_set:
import sys

sys.path.append("..")
import math
import random
import tensorflow as tf
import numpy as np
import datetime
from data_util import data_preprocess3
from model import Model as Model
from args import args

# ==============================================================================
#                               Loading dataset
# ==============================================================================
args = args("train")
# args = args("test")
data_preprocess = data_preprocess3(args)
num_sample = data_preprocess.num_sample
print("num_sample:", num_sample)

if not os.path.exists(args.savePath):
    os.makedirs(args.savePath)
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# gpu_options = tf.GPUOptions(allow_growth=True)

# Iterations per epoch
numIterPerEpoch = int(math.ceil(num_sample / args.batch_size))
print('%d iter per epoch.\n' % numIterPerEpoch)
from tweetUtil import listToCsvMulti, csvToList, simple_tweet_search_j, auth_api, auth_api2, uploadVideo, csvToListMulti, urlReplyRemove, listToCsv
from args import args
from pprint import pprint
import functools

# speedy log
print = functools.partial(print, flush=True)
# get args
search_words, envName, slugid = args()
# Twiter Auth
api = auth_api(envName)
api2 = auth_api2(envName)


def main():
    csvname = envName + "_tweeted_movie.csv"
    uidName = envName + "_user_id_tweeted_movie.csv"
    # Extracting text and original URL
    copyIdAndImege = []
    rawJsonList = simple_tweet_search_j(search_words, envName)
    print("---------------------search target")
    # Exclude already been posted and
    tweetedIdList = csvToListMulti(csvname)
    tweetedIdList = list(map(lambda x: int(x[0]), tweetedIdList))
    uidList = csvToList(uidName)
    meId = api.me().screen_name
    followerIdsInt = api.followers_ids(meId)
    followerIds = [str(i) for i in followerIdsInt]
    print(
        "----------------------------------------------------------------Exclusion target (posted)"
    )
示例#39
0
文件: test.py 项目: chronus7/args.py

def equal(a, b):
    """Tests both items on equality"""
    print(a == b)


def join(c: str, *a):
    """Joins the arguments with the first one"""
    print(c.join(a))


def add(*i: lambda x: list(map(float, x))):
    """Adds the given numbers"""
    from functools import reduce

    print(reduce(float.__add__, i, 0.0))


def echo(i: str):
    """echoes the given string
    :param i: the string to print
    """
    print(i)


if __name__ == "__main__":
    import args

    args.args()
示例#40
0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from collections import deque
from tensorflow.losses import huber_loss

import args
import shutil
import subprocess
import sys
import threading
import numpy as np

import env_data

args = args.args()


def softmax(x):
    # 入力値の中で最大値を取得
    c = np.max(x)
    # オーバーフロー対策として、最大値cを引く。こうすることで値が小さくなる
    exp_a = np.exp(x - c)
    sum_exp_a = np.sum(exp_a, axis=0)

    y = exp_a / sum_exp_a
    return y


def readable_size(size):
    for unit in ['K', 'M']:
示例#41
0
#import matplotlib.pyplot as plt

import torch
import torchvision
from torchvision import datasets, transforms
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn

from resnet import ResNet56, ResNetNoShort56, ResNet110, ResNetNoShort110, ResNet20, ResNetNoShort20

from args import args
from utils import progress_bar

args = args()
if args.instance is None:
    args.instance = '{}_optim_{}_lr_{}_batch-size_{}_seed_{}'.format(
        args.arch, args.optim, args.lr, args.batch_size, args.seed)
print(args.instance)
torch.manual_seed(args.seed)

print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
示例#42
0
import args
import os
import numpy as np
import torchvision.datasets as dset
import torchvision.transforms as transforms
import random
import pickle

opt = args.args()

try:
    os.makedirs('noise/%s' % (opt.noise_type))
except OSError:
    pass
###################################################################################################
if opt.dataset == 'cifar10_wo_val': num_classes = 10
else: print('There exists no data')

trainset = dset.ImageFolder(root='{}/{}/train'.format(opt.dataroot,
                                                      opt.dataset),
                            transform=transforms.ToTensor())
clean_labels = np.array(trainset.imgs)[:, 1]

for n in range(10):

    trainset = dset.ImageFolder(root='{}/{}/train'.format(
        opt.dataroot, opt.dataset),
                                transform=transforms.ToTensor())

    noisy_idx = []
    for c in range(num_classes):
    print("TAWD", "begin...")
    print("Reading...")
    if args.graph_format == 'adjlist':
        g.read_adjlist(filename=args.input)
    elif args.graph_format == 'edgelist':
        g.read_edgelist(filename=args.input,
                        weighted=args.weighted,
                        directed=args.directed)

    g.read_node_label(args.label_file)
    g.read_node_features(args.feature_file)
    model = xtadw.TADW(graph=g, dim=args.representation_size, lamb=args.lamb)
    t2 = time.time()
    print(t2 - t1)
    print("Saving embeddings...")
    model.save_embeddings(args.output)
    vectors = model.vectors
    X, Y = read_node_label(args.label_file)
    print("Training classifier using {:.2f}% nodes...".format(args.clf_ratio *
                                                              100))
    clf = Classifier(vectors=vectors, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, args.clf_ratio)


if __name__ == "__main__":
    warnings.filterwarnings("ignore", category=FutureWarning)
    random.seed(32)
    np.random.seed(32)
    agrs = args.args()
    main(agrs)
示例#44
0
def main(arg_one_input="data/paper/truedata/facebook.net",
         arg_one_feature_file="data/paper/truedata/facebook_info",
         arg_two_input="data/paper/truedata/twitter.net",
         arg_two_feature_file="data/paper/truedata/twitter_info"):
    # syn data input as follow
    # arg_one_input="data/paper/synthetic/football_1.net",
    #          arg_one_feature_file="data/paper/synthetic/football_info_115_1",
    #          arg_two_input="data/paper/synthetic/football_1-0.01.net",
    #          arg_two_feature_file="data/paper/synthetic/football_info_115_2"

    # tue data input as follow
    # arg_one_input="data/paper/truedata/facebook.net",
    #          arg_one_feature_file="data/paper/truedata/facebook_info",
    #          arg_two_input="data/paper/truedata/twitter.net",
    #          arg_two_feature_file="data/paper/truedata/twitter_info"

    # disturb factor input as follow
    # arg_one_input="data/paper/disturb/football_1-0.1.net",
    #              arg_one_feature_file="data/paper/synthetic/football_info_115_1",
    #              arg_two_input="data/paper/disturb/football_1-0.1.net",
    #              arg_two_feature_file="data/paper/synthetic/football_info_115_2"

    warnings.filterwarnings("ignore", category=FutureWarning)
    t1 = time.time()
    # init graph
    # K is maxLayer
    # alpha is discount factor for higher layers
    rep_method = RepMethod(max_layer=4, alpha=0.1)
    arg_one = args.args()
    arg_one.input = arg_one_input
    arg_one.feature_file = arg_one_feature_file
    nx_graph_one = nx.read_edgelist(arg_one.input, nodetype=int, comments="%")
    adj_matrix_one = nx.adjacency_matrix(nx_graph_one).todense()
    g_one = Graph(adj_matrix_one)

    g_one.read_edgelist(filename=arg_one.input,
                        weighted=arg_one.weighted,
                        directed=arg_one.directed)
    g_one.read_node_features(arg_one.feature_file)

    arg_two = args.args()
    arg_two.input = arg_two_input
    arg_two.feature_file = arg_two_feature_file
    nx_graph_two = nx.read_edgelist(arg_two.input, nodetype=int, comments="%")
    adj_matrix_two = nx.adjacency_matrix(nx_graph_two).todense()
    g_two = Graph(adj_matrix_two)

    g_two.read_edgelist(filename=arg_two.input,
                        weighted=arg_two.weighted,
                        directed=arg_two.directed)
    g_two.read_node_features(arg_two.feature_file)
    # community detection
    # igraph.Graph.community_infomap()
    # SCAN
    # algorithm_one = SCAN(g_one.G, 0.5, 3)
    # communities_one = algorithm_one.execute()
    # algorithm_two = SCAN(g_two.G, 0.5, 3)
    # communities_two = algorithm_two.execute()

    # LV
    # G_one = load_graph_LV(arg_one_input)
    # algorithm_one = Louvain(G_one)
    # communities_one = algorithm_one.execute()
    # G_two=load_graph_LV(arg_two_input)
    # algorithm_two = Louvain(G_two)
    # communities_two = algorithm_two.execute()

    # CPM
    # algorithm_one = CPM()
    # communities_one = algorithm_one.execute(g_one.G, 4)
    # algorithm_two = CPM()
    # communities_two = algorithm_two.execute(g_two.G, 4)

    # GN other experiment run on this algorithm
    G_one = load_graph_GN(arg_one_input)
    algorithm_one = GN(G_one)
    communities_one = algorithm_one.execute()
    G_two = load_graph_GN(arg_two_input)
    algorithm_two = GN(G_two)
    communities_two = algorithm_two.execute()

    # LPA can not use this algorithm
    # algorithm_one = LPA(g_one.G)
    # communities_one = algorithm_one.execute()
    # algorithm_two = LPA(g_two.G)
    # communities_two = algorithm_two.execute()

    # LPA

    # algorithm_one = EM(g_one.G, 9)
    # communities_one = algorithm_one.execute()
    # algorithm_two = EM(g_two.G, 2)
    # communities_two = algorithm_two.execute()

    # LV
    # G_one = load_graph_LV(arg_one_input)
    # algorithm_one = Louvain(G_one)
    # communities_one = algorithm_one.execute()
    # G_two=load_graph_LV(arg_two_input)
    # algorithm_two = Louvain(G_two)
    # communities_two = algorithm_two.execute()

    # CPM
    # algorithm_one = CPM()
    # communities_one = algorithm_one.execute(g_one.G, 4)
    # algorithm_two = CPM()
    # communities_two = algorithm_two.execute(g_two.G, 4)

    # LFM
    # algorithm_one = LFM(g_one.G, 0.8)
    # communities_one = algorithm_one.execute()
    # algorithm_two = LFM(g_two.G, 0.8)
    # communities_two = algorithm_two.execute()

    # print(communities_one)
    # print(communities_two)

    # demo
    # algorithm = SCAN(g_one.G)
    # communities = algorithm.execute()
    # print(communities)

    # node embed

    x_tawd_one = XTADW(g_one, arg_one.representation_size)
    structure_feature_one = x_tawd_one.get_features(rep_method)

    x_tawd_two = XTADW(g_two, arg_two.representation_size)
    structure_feature_two = x_tawd_two.get_features(rep_method)

    structure_feature_one, structure_feature_two = completion_vec(
        structure_feature_one, structure_feature_two)
    combine_future = np.vstack((structure_feature_one, structure_feature_two))
    # S is dim the first para
    # lamb is the second para penalty factor
    recm = RECM(5, 0.1, g_one, g_two)
    recm.getT()
    g_one_node_embeding, g_two_node_embeding = recm.train(
        5, rep_method, combine_future)
    # print(communities_one)
    # print(communities_two)
    # print("shape", g_one_node_embeding.shape)
    res, len_community_pair = computer_pair(communities_one, communities_two,
                                            g_one_node_embeding,
                                            g_two_node_embeding)
    # print(res)
    TP_FP = len(res)
    TP = 0
    TP_FN = len_community_pair
    for tuple_ele in res:
        tuple_ele = tuple_ele[0]
        if tuple_ele[0] == tuple_ele[1]:
            TP = TP + 1
    pre = TP / TP_FP
    recall = TP / TP_FN
    print("pre: ", pre, "recall: ", recall)
    print("sum time ", time.time() - t1)
示例#45
0
import sys
import os
import args
import shutil 
from config import const

print(os.path.split(os.path.realpath(__file__))[0])
os.chdir(os.path.split(os.path.realpath(__file__))[0])

cmd = args.args()

print(cmd)

if cmd.get('c'):
    os.system('python client/main.py')
elif cmd.get('s'):
    import socket
    myname = socket.getfqdn(socket.gethostname())
    myaddr = socket.gethostbyname(myname)
    os.system('celery worker -A server.main -l info -n {0}'.format(myaddr))
elif cmd.get('renew_c'):
    ## delete  folder: client/config
    ## delete  folder: client/task
    ## copy ./config to client/config
    ## new     folder: client/task
    ## new       file: client/task/__init__.py
    ## new       file: client/task/task_api.py
    ## read ./task/task_api.py | filter | client/task/task_api.py
    # client.config
    if os.path.exists('client/config'):
        shutil.rmtree('client/config')
# -*- coding: utf-8 -*-
"""
Created on Fri Nov  1 13:27:46 2019

@author: Chenghai Li
"""

import csv
from args import args
import numpy as np
from matplotlib import pyplot as plt

arg = args()

step = arg.step
input_length = arg.input_length
predict_length = arg.predict_length
split_ratio = arg.split_ratio

file = open(r'data/EURUSD.csv')
file_csv = csv.reader(file)
data = []

for row in file_csv:
    row[2] = float(row[2])
    row[3] = float(row[3])
    row[4] = float(row[4])
    row[5] = float(row[5])
    data.append(row[2:6])

train = np.array(data[:int(len(data) * split_ratio)], dtype=np.float32)