示例#1
0
def test_filter_dict():
    d = {
        'name': 'eleme',
        '_name': 'e',
    }

    def f(k, v):
        return not k.startswith('_')

    assert utils.filter_dict(f, d) == {'name': 'eleme'}

    f = lambda k, v: not k.startswith('_') # noqa
    assert utils.filter_dict(f, d) == {'name': 'eleme'}
示例#2
0
def mineApyori(gs, **kwargs):
        """
        Mine association rules using apyori library
        min_support, min_confidence, min_lift and max_length are apyori arguments
        show_rules is a boolean flag to decide if rules will be printed to screen
        samples may be specified to indicate which news we want to keep and rest are filtered
        if samples is None, all news will get considered
        """
        apyoriKwargs = {}
        min_support = kwargs.get('min_support', 1e-4)
        min_confidence = kwargs.get('min_confidence', 0.0)
        min_lift = kwargs.get('min_lift', 0.0)
        max_length = kwargs.get('max_length', None)
        show_rules = kwargs.get('show_rules', True)
        samples = kwargs.get('samples', None)

        apyoriKwargs['min_support'] = min_support
        apyoriKwargs['min_confidence'] = min_confidence
        apyoriKwargs['min_lift'] = min_lift
        apyoriKwargs['max_length'] = max_length

        support_where = {}
        for sg in gs.subgraphs.values():
                support_where[sg.gid] = gs.support_where[sg.gid]
        if samples:
                support_where = utils.filter_dict(support_where, samples)
        records = getTransactions(support_where, len(gs.graphs))
        gen = apyori.apriori(records, **apyoriKwargs)

        if show_rules:
                print_rules(gen)

        return gen
示例#3
0
    def filter_model_fields(self):
        """
        Filter out all fields necessary for this model.

        Add new attribute with all fields.

        NOTE: How to cope with nested models?
        """
        return utils.filter_dict(self.data, self._model_fields)
示例#4
0
def save_docs(idxs, docs, params, name):
    '''Save the idxs, docs with pickle.
       Then you can load it again by using get_docs with name'''
    docs = np.array(docs).astype(np.int)
    idxs = np.array(idxs).astype(np.int)
    params = filter_dict(params, prec.default_params.keys()) # only save parameters relevant to the preproc
    params["docs_id"] = random_id()
    file_path = get_docs_path(name)
    file = open(file_path, "wb" )
    pickle.dump((idxs, docs, params),  file)
    file.close()
示例#5
0
def getSequences(gs, samples=None, days=1):
        """
        Returns sequences of subgraphs
        This sequence consists of subgraph IDs and are char (pymining works on chars)
        gs is gSpan object
        Samples specify which subgraphs should be considered (rest are filtered)
        days attribute specify how long (in terms of time) our sequences should cover
        For example: 7 days means sequence consists of the subgraphs seen in a week in order
        """
        support_where = {}
        for sg in gs.subgraphs.values():
                support_where[sg.gid] = gs.support_where[sg.gid]
        if samples:
                support_where = utils.filter_dict(support_where, samples)
        records = getTransactions(support_where, len(gs.graphs))

        # Need to modify following arguments for different months
        # jan : 0, feb : 895, mar : 1726, apr : 2578, may : 3533,
        # jun : 4549, jul : 5626, aug : 6575, sep : 7449,
        # oct : 8277, nov : 9292, dec : 10686
        # TODO: Embed date attribute to nodes in the database,
        #       get them directly instead of needing these values
        dates = _getDates(0, 0 + len(gs.graphs))
        # group by days
        group_count = math.ceil((max(dates) / days) + 1)
        groups = {x:[] for x in range(group_count)}

        for i in range(len(dates)):
                groups[math.floor(dates[i] / days)].append(i)

        sequences = []
        for k, v in groups.items():
                seq = []
                for trans_id in v:
                        trans = records[trans_id]
                        for subgid in trans:
                                seq.append(subgid)

                # convert to char for pymining
                sequences.append("".join([chr(x) for x in seq]))
                #print("Sequence {}: {}".format(k, seq))

        return sequences
示例#6
0
def get_user(sysNam, usrDict):
	try:
		usr = _usrs.by_name(sysNam, usrDict['username'])
	except KeyError:
		# add the 1st identity to a top level of the users' dict
		# ('cause POST API call to /users works with only one extern_uid)
		dictWithUid = filter_dict(dict(usrDict.items() + (usrDict['identities'] and usrDict['identities'][0].items() or [])),
			'admin',
			'bio',
			'can_create_group',
			'extern_uid',
			'linkedin',
			'password',
			'projects_limit',
			'provider',
			'skype',
			'twitter',
			'website_url')
		usr = _usrs.add(sysNam, usrDict['username'], usrDict['name'], usrDict['email'], confirm = False, **dictWithUid)
		# rebuild the cache after adding a new user
		_usrs.clr_cache(sysNam)
	return usr['id']
示例#7
0
 def update_layout(self, child, properties):
     child.widget.grid(**filter_dict(properties, Table.grid_rule_map))
示例#8
0
 def post_setup(self, child):
     child.widget.grid(**filter_dict(child.layout_properties,
             Table.grid_rule_map))
示例#9
0
 def update_font(self):
     font_info = filter_dict(self.widget_properties, {"font_size": "size"})
     font_info = dict([(k, v) for k, v in font_info.items() if v])
     font = tkFont.Font(**font_info)
     self.widget["font"] = font
示例#10
0
    # Cluster the subgraphs and sample them so that we get single representatives of similar subgraphs
    #gedObj = ged.GraphEditDistance(False, gs.subgraphs, node_subst_cost=2, node_del_cost=2, node_ins_cost=2, reduce_graphs=False)
    #clusters = gedObj.get_clusters(0.1)
    clusters = gohe.get_clusters(gs.subgraphs, 0.9)
    samples = utils.sample_clusters(clusters)

    # Uncomment lines to print the queries
    for sg in gs.subgraphs.values():
        #    print("")
        #    print_graph(sg)
        support_where[sg.gid] = gs.support_where[sg.gid]
    #    print(gSpan2query(sg))
    #    print("")

    # Filter support_where according to samples
    support_where = utils.filter_dict(support_where, samples)
    print("Reduced Subgraph Count :", len(samples))
    print("Mining frequent sequences...")

    # Mine frequent sequences
    freq_seqs = rm.frequentSequences(gs, samples, 3, 7, 1, 1)

    # reID variables, needed because when we run this script for different months, we get same ID's (always starts from 0)
    # But we need them to be different, thus we need to change start_ID and reID them
    subgraphs, samples, freq_seqs, support_where = utils.reID(gs.subgraphs,
                                                              samples,
                                                              freq_seqs,
                                                              support_where,
                                                              start_ID=0)

    # Mine rules from frequent sequences
示例#11
0
			pass
			try:

				# Create friendship edges
				userfeed = self._read_json_data("%s_feed" % fuid)
				logger.info("Now processing feed of user %s" % fuid)

				# For each activity
				for activity in userfeed:

					try:
						# Create Node
						node = DSLSerializable("Artifact", activity['id'])
						node.add_attr("time", activity['created_time'])
						node.add_attrs( filter_dict(activity, ['likes', 'shares', 'to', 'from', 'created_time', 'comments']) )
						self.write_dsl(node)

						if activity.get("from"):
							post_from = activity["from"]["id"]
							self.create_person_node_if_not_exists(activity["from"]["id"], activity["from"])
						else:
							post_from = fuid

						if activity.get("to"):
							post_to = [i["id"] for i in activity["to"]["data"] ]
							for i in activity["to"]["data"]:
								self.create_person_node_if_not_exists(i["id"], i)
						else:
							post_to = [fuid]
示例#12
0
	usage()

try:
	srcSys = opts['-s']
	dstSys = opts['-d']
	grpNam = opts['-g']
except KeyError:
	usage()

# create a group itself	
grp = Groups()
try:
	srcGid = grp.by_name(srcSys, grpNam)['id']
except KeyError:
	usage("Group with name '%s' doesn't exist in the source system" % grpNam)
dstGid = grp.add(dstSys, grpNam)['id']

# add members to the group
add_members(grp, srcGid, dstGid)

# copy projects with their members from the source group to the destination one
prj = Projects()
for p in prj.by_namespace(srcSys, srcGid):
	add_members(prj, p['id'], prj.add(dstSys, p['name'], namespace_id = dstGid, **filter_dict(p, 
		'description',
		'issues_enabled',
		'merge_requests_enabled',
		'wiki_enabled',
		'snippets_enabled',
		'visibility_level'))['id'])
示例#13
0
文件: train.py 项目: xingyu321/BiKCCA
def Predeal_dicts(src_wc_path, tgt_wc_path, dict_path, threthold):
    src_wc, tgt_wc = utils.load_word_count(src_wc_path), utils.load_word_count(
        tgt_wc_path)
    print('Src_wc:{} tgt_wc:{}'.format(len(src_wc), len(tgt_wc)))
    utils.filter_dict(dict_path, src_wc, tgt_wc, threthold=threthold)
示例#14
0
            try:

                # Create friendship edges
                userfeed = self._read_json_data("%s_feed" % fuid)
                logger.info("Now processing feed of user %s" % fuid)

                # For each activity
                for activity in userfeed:

                    try:
                        # Create Node
                        node = DSLSerializable("Artifact", activity['id'])
                        node.add_attr("time", activity['created_time'])
                        node.add_attrs(
                            filter_dict(activity, [
                                'likes', 'shares', 'to', 'from',
                                'created_time', 'comments'
                            ]))
                        self.write_dsl(node)

                        if activity.get("from"):
                            post_from = activity["from"]["id"]
                            self.create_person_node_if_not_exists(
                                activity["from"]["id"], activity["from"])
                        else:
                            post_from = fuid

                        if activity.get("to"):
                            post_to = [i["id"] for i in activity["to"]["data"]]
                            for i in activity["to"]["data"]:
                                self.create_person_node_if_not_exists(
                                    i["id"], i)