def related( name=None, sample_count=loom.preql.SAMPLE_COUNT, debug=False, profile='time'): ''' Run related query. ''' loom.store.require(name, [ 'ingest.schema', 'ingest.encoding', 'samples.0.config', 'samples.0.model', 'samples.0.groups', ]) inputs, results = get_paths(name, 'related') loom.config.config_dump({}, inputs['query']['config']) root = inputs['root'] encoding = inputs['ingest']['encoding'] features = sorted(json_load(inputs['ingest']['schema']).keys()) print 'starting server' with loom.preql.get_server(root, encoding, debug, profile) as preql: print 'querying {} features'.format(len(features)) preql.relate(features, sample_count=sample_count)
def test_relate(root, encoding, **unused): with loom.query.get_server(root, debug=True) as query_server: with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): result_out = 'related_out.csv' preql = loom.preql.PreQL(query_server, encoding) preql.relate(preql.feature_names, result_out, sample_count=10) with open(result_out, 'r') as f: reader = csv.reader(f) for row in reader: pass
def test_relate(root, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): with loom.preql.get_server(root, debug=True) as preql: result_out = 'related_out.csv' preql.relate(preql.feature_names, result_out, sample_count=10) with open(result_out, 'r') as f: reader = csv.reader(f) header = reader.next() columns = header[1:] assert_equal(columns, preql.feature_names) zmatrix = numpy.zeros((len(columns), len(columns))) for i, row in enumerate(reader): column = row.pop(0) assert_equal(column, preql.feature_names[i]) for j, score in enumerate(row): score = float(score) zmatrix[i][j] = score assert_close(zmatrix, zmatrix.T)
def test_relate_pandas(root, rows_csv, schema, **unused): feature_count = len(json_load(schema)) with loom.preql.get_server(root, debug=True) as preql: result_string = preql.relate(preql.feature_names) result_df = pandas.read_csv(StringIO(result_string), index_col=0) print 'result_df =' print result_df assert_equal(result_df.ndim, 2) assert_equal(result_df.shape[0], feature_count) assert_equal(result_df.shape[1], feature_count)