示例#1
0
def run():
    task = sys.argv[1]
    f = open(sys.argv[2])

    # parameters
    kwLimit = utils.get_parameter('kwLimit')
    edge_th = utils.get_parameter('edge_th')
    dispth = utils.get_parameter('dispth')
    ethunit = utils.get_parameter('ethunit')

    for years in f.readlines():
        currentyears = years.replace('\n', '').split(";")
        print('--> running ' + task + 'for' + str(currentyears))
        if task == '--raw-network':
            # keywords relevance
            relevant.relevant_full_corpus(currentyears, kwLimit, edge_th)
            # full network
            graph.construct_graph(currentyears, kwLimit, edge_th)
            # sensitivity
            graph.sensitivity(currentyears, kwLimit, edge_th)

        if task == '--classification':
            # construct communities
            graph.construct_communities(currentyears, kwLimit, edge_th, dispth,
                                        ethunit)
            # post processing
            postprocessing.export_classification(currentyears, kwLimit,
                                                 edge_th, dispth, ethunit)

        if task == '--custom':
            print("custom")
示例#2
0
 def train(self, X, w=None, **kwargs):
     W, D, L = construct_graph(X, **kwargs)
     W[W > 1E-50] = 1
     L = D - W
     eD, eV = np.linalg.eig(L)
     eidx = np.argsort(eD)
     Y = np.real(eV[:, eidx[:self._k]].T)
     return super(SpectralClustering, self).train(Y.T, w=w)
示例#3
0
def integration_test():
    grammar = {
        'root': {
            'type': 'rule',
            'oneOrMore': False,
            'value': [['diagnosis'], ['examination']]
        },
        'diagnosis': {
            'type': 'rule',
            'oneOrMore': False,
            'value': ['diagnosed', 'patient', 'with', 'disease']
        },
        'examination': {
            'type': 'rule',
            'oneOrMore': False,
            'value': ['performed', 'exams', 'on', 'patient']
        },
        'patient': {
            'type': 'variable',
            'oneOrMore': False,
            'value': '[a-zA-Z]+'
        },
        'disease': {
            'type': 'rule',
            'oneOrMore': False,
            'value': [['cancer'], ['diabetes'], ['aids']]
        },
        'exams': {
            'type': 'rule',
            'oneOrMore': True,
            'value': [['colonoscopy'], ['mri'], ['catscan']]
        }
    }

    graph = construct_graph(grammar)
    assert len(graph) == 5
    assert graph['root'] == ['diagnosis', 'examination']
    assert graph['diagnosis'] == ['disease']
    assert graph['examination'] == ['exams']
    assert graph['disease'] == []
    assert graph['exams'] == []

    valid, reason, rules = verify_structure(graph)
    assert valid

    keywords, variables = generate_keywords_and_variables(grammar)
    assert 'with' in keywords
    assert 'mri' in keywords
    assert variables == ['patient']
示例#4
0
def process_event(prefix, pt_min, n_phi_sectors, select_phi_sector,
                  phi_slope_max, phi_slope_mid_max, phi_slope_outer_max,
                  z0_max, no_missing_hits, n_tracks):
    # Load the data
    evtid = int(prefix[-9:])
    logging.info('Event %i, loading data' % evtid)
    hits, particles, truth = dataset.load_event(
        prefix, parts=['hits', 'particles', 'truth'])

    # Apply hit selection
    logging.info('Event %i, selecting hits' % evtid)
    hits = select_hits(hits,
                       truth,
                       particles,
                       pt_min=pt_min,
                       no_missing_hits=no_missing_hits).assign(evtid=evtid)
    hits_sectors = split_phi_sectors(hits,
                                     n_phi_sectors=n_phi_sectors,
                                     select_phi_sector=select_phi_sector)

    # Graph features and scale
    feature_names = ['r', 'phi', 'z']
    feature_scale = np.array([1000., np.pi / n_phi_sectors, 1000.])

    # Define adjacent layers
    n_det_layers = 10
    l = np.arange(n_det_layers)
    layer_pairs = np.stack([l[:-1], l[1:]], axis=1)

    # Construct the graph
    logging.info('Event %i, constructing graphs' % evtid)
    graphs = [
        construct_graph(sector_hits,
                        layer_pairs=layer_pairs,
                        phi_slope_max=phi_slope_max,
                        phi_slope_mid_max=phi_slope_mid_max,
                        phi_slope_outer_max=phi_slope_outer_max,
                        z0_max=z0_max,
                        feature_names=feature_names,
                        feature_scale=feature_scale,
                        max_tracks=n_tracks,
                        no_missing_hits=no_missing_hits)
        for sector_hits in hits_sectors
    ]

    return graphs
示例#5
0
文件: prepareGraphs.py 项目: bishk/q
def process_event(prefix, pt_min, n_phi_sectors, phi_slope_max, z0_max,
                  n_tracks):
    # Load the data
    evtid = int(prefix[-9:])
    logging.info('Event %i, loading data' % evtid)
    hits, particles, truth = dataset.load_event(
        prefix, parts=['hits', 'particles', 'truth'])

    blacklist_particles = pd.read_csv(
        prefix + "-blacklist_particles.csv").particle_id.values
    # Apply hit selection
    logging.info('Event %i, selecting hits' % evtid)
    hits = select_hits(hits, truth, particles,
                       pt_min=pt_min).assign(evtid=evtid)
    hits_sectors = split_phi_sectors(hits,
                                     blacklist_particles,
                                     n_tracks,
                                     n_phi_sectors=n_phi_sectors)

    # Graph features and scale
    feature_names = ['x', 'y', 'z']

    # Define adjacent layers
    n_det_layers = 10
    l = np.arange(n_det_layers)
    layer_pairs = np.stack([l[:-1], l[1:]], axis=1)

    # Construct the graph
    logging.info('Event %i, constructing graphs' % evtid)
    graphs = [
        construct_graph(sector_hits,
                        layer_pairs=layer_pairs,
                        phi_slope_max=phi_slope_max,
                        z0_max=z0_max,
                        feature_names=feature_names)
        for sector_hits in hits_sectors
    ]
    return graphs
示例#6
0
def process_grammar(grammar_id, grammar):
    # Produce a graph representing the grammar.
    graph = construct_graph(grammar)
    
    # Verify the structural integrity of the graph.
    # Rules are returned in topological order of dependency.
    valid, reason, rules = verify_structure(graph)
    if not valid:
        return valid, reason

    # Verify that there is only one user_id rule.
    user_id_rules = get_user_id_rules(grammar)
    if len(user_id_rules) > 1:
        return False, 'Only one of {} can be the user id'.format(', '.join(user_id_rules))

    # Parse to generate the keywords and variables from the grammar.
    keywords, variables = generate_keywords_and_variables(grammar)

    # Hash the grammar deterministically
    hsh = hash_grammar(grammar)

    # Attempt to persist the grammar to mongo.
    return persist_grammar(grammar_id, grammar, hsh, rules, keywords, variables)
示例#7
0
# -*- coding: UTF-8 -*-

# Program to do a breadth first traversal of a graph

import graph


def bfs(graph, start):

    visited = []

    queue = [start]
    visited.append(start)

    while queue:
        # pop the first element of the queue
        s = queue[0]
        print s
        del queue[0]

        for k in graph.edges[s]:
            if k not in visited:
                visited.append(k)
                queue.append(k)


if __name__ == '__main__':
    graph = graph.construct_graph()
    bfs(graph, 2)
			topological_sort(g1, i, visited, stack)

	stack.append(start)

def SCC(g1):

	visited = []
	stack = []

	for i in range(g1.V):
		if i not in visited:
			topological_sort(g1, i, visited, stack)

	# transpose the current graph
	gr = transpose(g1)

	# apply dfs on the transposed graph
	visited = []
	while (stack):

		s = stack[-1]
		del stack[-1]

		if s not in visited:
			print "Next SCC"
			dfs(gr, s, visited)

g1 = graph.construct_graph()

SCC(g1)
示例#9
0
def test_construct_graph():
    years = ['1976', '1977', '1978', '1979', '1980']
    kwLimit = 100000.0
    min_edge_th = 10.0

    graph.construct_graph(years, kwLimit, min_edge_th)
示例#10
0
    for key in clusters:
        outf_1.write('  Cluster ' + str(key) + '\n' + '  NB:        ' +
                     str(clusters[key][2]['NB']) + '\n' + '  BRCA:      ' +
                     str(clusters[key][2]['BRCA']) + '\n' + '  KIRC:      ' +
                     str(clusters[key][2]['KIRC']) + '\n' + '  COAD/READ: ' +
                     str(clusters[key][2]['COAD/READ']) + '\n')

outf_1.close()

##############################################################################
################### test hcs algorithm #######################################

# construct graph with approximately 0.1*n(n-1)/2 edges
dist = 1
perc = 0.1
G, threshold = construct_graph(data, None, perc, dist)

# make singleton set
G, S = singleton(G)
# retrieve sets of connected nodes
cc = connectedComponents(G)

# HCS algorithm
mincut_trials = 100
subgraphs = []
singles = []

# printing statements can be used to keep track of results
for i in range(len(cc)):
    # print("Connected component", i)
    G = cc[i]