def run_experiment(self): ''' Run especified experiments :return: Dict with metrics ''' pre = Preprocessing() print('Reading Data') train_df = DataSource().read_data(etapa_treino=True) test_df, y_test = DataSource().read_data(etapa_treino=False) y_test = y_test['SalePrice'] print('Preprocessing Data') X_train, y_train = pre.process(train_df, etapa_treino=True) print('Processing Test Data') X_test = pre.process(test_df[pre.train_features], etapa_treino=False) print('Training Model') models = Experiments().train_model(X_train, y_train) print('Running Metrics') for model in models.keys(): print(model) y_pred = models[model].predict(X_test) print(Metrics().calculate_regression(y_test, pd.Series(y_pred))) metrics = Metrics().calculate_regression(y_test, pd.Series(y_pred)) pd.DataFrame.from_dict( metrics, orient='index').to_csv('../output/' + model + '.csv') return metrics
def post(self): """ Execute GraphQL queries and mutations Use this endpoint to send http request to the GraphQL API. """ payload = request.json # Execute request on GraphQL API status, data = utils.execute_graphql_request(payload['query']) # Execute batch of indicators if status == 200 and 'executeBatch' in payload['query']: if 'id' in data['data']['executeBatch']['batch']: batch_id = str(data['data']['executeBatch']['batch']['id']) batch = Batch() batch.execute(batch_id) else: message = "Batch Id attribute is mandatory in the payload to be able to trigger the batch execution. Example: {'query': 'mutation{executeBatch(input:{indicatorGroupId:1}){batch{id}}}'" abort(400, message) # Test connectivity to a data source if status == 200 and 'testDataSource' in payload['query']: if 'id' in data['data']['testDataSource']['dataSource']: data_source_id = str( data['data']['testDataSource']['dataSource']['id']) data_source = DataSource() data = data_source.test(data_source_id) else: message = "Data Source Id attribute is mandatory in the payload to be able to test the connectivity. Example: {'query': 'mutation{testDataSource(input:{dataSourceId:1}){dataSource{id}}}'" abort(400, message) if status == 200: return jsonify(data) else: abort(500, data)
def generate_db_summary_file(generated_files_folder, config_info, upload_flag, validate_flag): db_summary_query = '''MATCH (entity) WITH labels(entity) AS entityTypes RETURN count(entityTypes) AS frequency, entityTypes''' if config_info.config["DEBUG"]: logger.info("DB Summary Query") logger.info(db_summary_query) start_time = time.time() logger.info("Start time: %s", time.strftime("%H:%M:%S", time.gmtime(start_time))) data_source = DataSource(get_neo_uri(config_info), db_summary_query) db_summary = db_summary_file_generator.DbSummaryFileGenerator( data_source, generated_files_folder, config_info) db_summary.generate_file(upload_flag=upload_flag, validate_flag=validate_flag) if config_info.config["DEBUG"]: end_time = time.time() logger.info("Created DB Summary file - End time: %s", time.strftime("%H:%M:%S", time.gmtime(end_time))) logger.info( "Time Elapsed: %s", time.strftime("%H:%M:%S", time.gmtime(end_time - start_time)))
def main(): """Entry point for our simple vlc player """ app = QtWidgets.QApplication(sys.argv) dataSource = DataSource( "https://my-json-server.typicode.com/ochkarik05/jsonservers/videos") player = Player() playList = PlayList(player) playListScanner = PlayListScanner(dataSource) playListScanner.dataLoaded.connect(playList.set_track_list) player.trackFinished.connect(playList.next) gui = MainWindow(player) gui.show() gui.resize(640, 480) gui.setWindowState(Qt.WindowFullScreen) sys.exit(app.exec_())
def run_experiment(self): ''' Run specified experiments return: dict with metrics ''' pre = PreProcessing() ds = DataSource() met = Metrics() print('Reading Data') train_df = ds.read_data(train=True) test_df = ds.read_data(train=False) y_test = test_df['y'] print('Preprocessing train data') X_train, y_train = pre.preprocess(train_df, train=True) print('Preprocessing test data') X_test = pre.preprocess(test_df[pre.train_features], train=False) print('Training model') models = Experiments().train_model(X_train, y_train) print('Running metrics') for model in models.keys(): print(model) y_pred = models[model].predict(X_test) print( met.calculate_classification(model, y_test, pd.Series(y_pred))) metrics = met.calculate_classification(model, y_test, pd.Series(y_pred)) pd.DataFrame.from_dict( metrics, orient='index').to_csv('../output/' + model + '.csv') return metrics
def generate_variant_allele_files(generated_files_folder, skip_chromosomes, config_info, upload_flag, validate_flag): species_query = """MATCH (s:Species) WHERE s.primaryKey <> "NCBITaxon:9606" RETURN s.primaryKey as speciesID""" species_data_source = DataSource(get_neo_uri(config_info), species_query) if config_info.config["DEBUG"]: start_time = time.time() logger.info("Start time for generating Variant Alleles files: %s", time.strftime("%H:%M:%S", time.gmtime(start_time))) for species_result in species_data_source: species = species_result["speciesID"] generate_variant_allele_species_file(species, generated_files_folder, skip_chromosomes, config_info, upload_flag, validate_flag) if config_info.config["DEBUG"]: end_time = time.time() logger.info("Created Variant Allele files - End time: %s", time.strftime("%H:%M:%S", time.gmtime(end_time))) logger.info( "Time Elapsed: %s", time.strftime("%H:%M:%S", time.gmtime(end_time - start_time)))
def generate_human_genes_interacting_with(generated_files_folder, config_info, upload_flag, validate_flag): query = '''MATCH (s:Species)-[:FROM_SPECIES]-(g:Gene)--(i:InteractionGeneJoin)--(g2:Gene)-[:FROM_SPECIES]-(s2:Species) WHERE s.primaryKey ='NCBITaxon:2697049' AND s2.primaryKey = 'NCBITaxon:9606' RETURN DISTINCT g2.primaryKey AS GeneID, g2.symbol AS Symbol, g2.name AS Name''' if config_info.config["DEBUG"]: logger.info("Human Genes Interacts With query") logger.info(query) start_time = time.time() logger.info("Start time: %s", time.strftime("%H:%M:%S", time.gmtime(start_time))) data_source = DataSource(get_neo_uri(config_info), query) hgiw = human_genes_interacting_with_file_generator.HumanGenesInteractingWithFileGenerator( data_source, config_info, generated_files_folder) hgiw.generate_file(upload_flag=upload_flag, validate_flag=validate_flag) if config_info.config["DEBUG"]: end_time = time.time() logger.info( "Created Human Genees Interacting with file - End time: %s", time.strftime("%H:%M:%S", time.gmtime(end_time))) logger.info( "Time Elapsed: %s", time.strftime("%H:%M:%S", time.gmtime(end_time - start_time)))
def generate_allele_gff(generated_files_folder, config_info, upload_flag, validate_flag): assembly_query = """MATCH (a:Assembly) RETURN a.primaryKey AS assemblyID""" assembly_data_source = DataSource(get_neo_uri(config_info), assembly_query) if config_info.config["DEBUG"]: start_time = time.time() logger.info("Start time for generating Allele GFF files: %s", time.strftime("%H:%M:%S", time.gmtime(start_time))) for assembly_result in assembly_data_source: assembly = assembly_result["assemblyID"] if assembly not in ignore_assemblies: generate_allele_gff_assembly(assembly, generated_files_folder, config_info, upload_flag, validate_flag) if config_info.config["DEBUG"]: end_time = time.time() logger.info("Created Allele GFF files - End time: %s", time.strftime("%H:%M:%S", time.gmtime(end_time))) logger.info( "Time Elapsed: %s", time.strftime("%H:%M:%S", time.gmtime(end_time - start_time)))
def predict(self): ''' Predict values using model trained. :return: pd.Series with predicted values. ''' # carregar o modelo treinado print('Loading the model') self.modelo = load('../output/modelo.pkl') # ler os dados de TESTE print('Loading Data') test_df, y_test = DataSource().read_data(etapa_treino=False) print('Preprocessing Data') X_test = self.modelo['preprocessing'].process(test_df, etapa_treino=False) print(X_test.isna().sum()) print('Predicting') y_pred = self.modelo['model_obj'].predict(X_test) # salvando resultado predito print('Saving Files') pd.DataFrame(y_pred).to_csv('../output/predito.csv') return y_pred
def test_get_index_data(self): data_source = DataSource(self.books_file) inverted_index = InvertedIndex(self.index_file) inverted_index.get_inverted_index(data_source.read_file()) with open(inverted_index.file_name, 'r') as f: keywords_dict = pickle.load(f) self.assertEqual(keywords_dict[self.keyword], ['B000UZNREG'])
def generate_gene_cross_reference_file(generated_files_folder, config_info, upload_flag, validate_flag): gene_cross_reference_query = '''MATCH (g:Gene)--(cr:CrossReference) RETURN g.primaryKey as GeneID, cr.globalCrossRefId as GlobalCrossReferenceID, cr.crossRefCompleteUrl as CrossReferenceCompleteURL, cr.page as ResourceDescriptorPage, g.taxonId as TaxonID''' if config_info.config["DEBUG"]: logger.info("Gene Cross Reference query") logger.info(gene_cross_reference_query) start_time = time.time() logger.info("Start time: %s", time.strftime("%H:%M:%S", time.gmtime(start_time))) data_source = DataSource(get_neo_uri(config_info), gene_cross_reference_query) gene_cross_reference = gene_cross_reference_file_generator.GeneCrossReferenceFileGenerator( data_source, generated_files_folder, config_info) gene_cross_reference.generate_file(upload_flag=upload_flag, validate_flag=validate_flag) if config_info.config["DEBUG"]: end_time = time.time() logger.info("Gene Cross Reference file - End time: %s", time.strftime("%H:%M:%S", time.gmtime(end_time))) logger.info( "Time Elapsed: %s", time.strftime("%H:%M:%S", time.gmtime(end_time - start_time)))
def generate_uniprot_cross_reference(generated_files_folder, config_info, upload_flag, validate_flag): uniprot_cross_reference_query = '''MATCH (g:Gene)--(cr:CrossReference) WHERE cr.prefix = "UniProtKB" RETURN g.primaryKey as GeneID, cr.globalCrossRefId as GlobalCrossReferenceID''' if config_info.config["DEBUG"]: logger.info("UniProt Cross Reference query") logger.info(uniprot_cross_reference_query) start_time = time.time() logger.info("Start time: %s", time.strftime("%H:%M:%S", time.gmtime(start_time))) data_source = DataSource(get_neo_uri(config_info), uniprot_cross_reference_query) ucf = uniprot_cross_reference_generator.UniProtGenerator( data_source, config_info, generated_files_folder) ucf.generate_file(upload_flag=upload_flag, validate_flag=validate_flag) if config_info.config["DEBUG"]: end_time = time.time() logger.info("Created UniProt Cross Reference file - End time: %s", time.strftime("%H:%M:%S", time.gmtime(end_time))) logger.info( "Time Elapsed: %s", time.strftime("%H:%M:%S", time.gmtime(end_time - start_time)))
def get_data_frame(self, data_source: pandas.DataFrame, request: str, dimensions: str, measures: str): """Get data from data source. Return a formatted data frame according to dimensions and measures parameters.""" # Get data source credentials query = '{dataSourceByName(name:"data_source"){id,connectionString,login,dataSourceTypeId}}' query = query.replace('data_source', data_source) response = utils.execute_graphql_request(query) # Get connection object if response['data']['dataSourceByName']: data_source_id = response['data']['dataSourceByName']['id'] data_source_type_id = response['data']['dataSourceByName'][ 'dataSourceTypeId'] connection_string = response['data']['dataSourceByName'][ 'connectionString'] login = response['data']['dataSourceByName']['login'] # Get data source password query = 'query{allDataSourcePasswords(condition:{id:data_source_id}){nodes{password}}}' query = query.replace( 'data_source_id', str(data_source_id) ) # Use replace() instead of format() because of curly braces response = utils.execute_graphql_request(query) if response['data']['allDataSourcePasswords']['nodes'][0]: data_source = response['data']['allDataSourcePasswords']['nodes'][ 0] password = data_source['password'] log.info('Connect to data source.') data_source = DataSource() connection = data_source.get_connection(data_source_type_id, connection_string, login, password) else: error_message = f'Data source {data_source} does not exist.' log.error(error_message) raise Exception(error_message) # Get data frame log.info('Execute request on data source.') data_frame = pandas.read_sql(request, connection) connection.close() if data_frame.empty: error_message = f'Request on data source {data_source} returned no data.' log.error(error_message) log.debug('Request: %s.', request) raise Exception(error_message) # Format data frame log.debug('Format data frame.') column_names = dimensions + measures data_frame.columns = column_names for column in dimensions: data_frame[column] = data_frame[column].astype( str) # Convert dimension values to string return data_frame
def data_get_distinct_values(): """ Returns all the distinct values of a data column """ request_data = request.get_json(force=True) data_file = request_data['data_file'] parameters = request_data['parameters'] data = DataSource(data_file, config['export_folder']) ret = data.get_distinct_values(parameters) return Response(json.dumps(ret), mimetype='application/json')
def __init__(self): self.data_source = DataSource() if self.data_source.settings.loaded['use_localization']: localization.start_plugin(self.data_source) self.root = os.environ[ 'HOME'] + "/Documentos/UFG-CDC/PFC/PFC2/Sistema/virtual_filesystem/mountpoint/" self.event_pattern = re.compile(f"^{event_directory}/[^/]*(/$|$)") self.localization_pattern = re.compile( f"^{localization_directory}/[^/]*(/$|$)")
def get_ordered_species_dict(config_info, taxon_ids): species_query = """MATCH (s:Species) RETURN s ORDER BY s.phylogeneticOrder""" species_data_source = DataSource(get_neo_uri(config_info), species_query) species = OrderedDict() for record in species_data_source: if record["s"]["primaryKey"] in taxon_ids: species[record["s"]["primaryKey"]] = record["s"]["name"] return species
def __init__(self): csvFile = "/Users/jamesjirgens/PycharmProjects/mine_graphed/resources/portfolio.csv" prtfReader = csv.DictReader(open(csvFile)) self.positions = [] for row in prtfReader: stock = DataSource(row["instrument"]) position = Position(row["instrument"], row["quantity"], stock.getName(), stock.getPrice()) self.positions.append(position)
def test_search(self): inverted_index = InvertedIndex(self.index_file) data_source = DataSource(self.books_file) searcher = Searcher(data_source, inverted_index) search_results = searcher.search(self.query) self.assertEqual(search_results, [ ('B000UZNREG', 'Deceptively Delicious: Simple Secrets to Get Your Kids Eating Good Food', 2), ('B00N2A6HLG', 'Good and Cheap: Eat Well on $4/Day', 1), ('B00BATL11W', "The Southerner's Handbook: A Guide to Living the Good Life", 1) ])
def main() -> None: # pylint: disable=no-member config = util.read_config_file("quality-control") parser = load_args_parser(config) args = parser.parse_args(sys.argv) if not args.check_working and not args.check_daily and not args.check_history: logger.info(" [default to all sources]") args.check_working = True args.check_daily = True args.check_history = True config = QCConfig( results_dir=args.results_dir, save_results=args.save_results, enable_experimental=args.enable_experimental, images_dir=args.images_dir, plot_models=args.plot_models, ) if config.save_results: logger.warning(f" [save results to {args.results_dir}]") if config.plot_models: logger.warning(f" [save forecast curves to {args.images_dir}]") if len(args.state) != 0: logger.error(" [states filter not implemented]") ds = DataSource() if args.check_working: logger.info( "--| QUALITY CONTROL --- GOOGLE WORKING SHEET |---------------------------------------------------" ) log = check_working(ds, config=config) log.print() if args.check_daily: logger.info( "--| QUALITY CONTROL --- CURRENT |-----------------------------------------------------------" ) log = check_current(ds, config=config) log.print() if args.check_history: logger.info( "--| QUALITY CONTROL --- HISTORY |-----------------------------------------------------------" ) log = check_history(ds) log.print()
def get_data_frame(self, data_source, request, dimensions, measures): """Get data from data source. Return a formatted data frame according to dimensions and measures parameters.""" # Get data source credentials query = '''{dataSourceByName(name:"data_source"){connectionString,login,password,dataSourceTypeId}}''' query = query.replace('data_source', data_source) response = utils.execute_graphql_request(query) # Get connection object if response['data']['dataSourceByName']: data_source_type_id = response['data']['dataSourceByName'][ 'dataSourceTypeId'] connection_string = response['data']['dataSourceByName'][ 'connectionString'] login = response['data']['dataSourceByName']['login'] password = response['data']['dataSourceByName']['password'] log.info('Connect to data source {data_source}.'.format( data_source=data_source)) data_source = DataSource() connection = data_source.get_connection(data_source_type_id, connection_string, login, password) else: error_message = 'Data source {data_source} does not exist.'.format( data_source=data_source) log.error(error_message) raise Exception(error_message) # Get data frame log.info( 'Execute request on data source.'.format(data_source=data_source)) data_frame = pandas.read_sql(request, connection) connection.close() if data_frame.empty: error_message = 'Request on data source {data_source} returned no data.'.format( data_source=data_source) log.error(error_message) log.debug('Request: {request}.'.format(request=request)) raise Exception(error_message) # Format data frame log.debug('Format data frame.') column_names = dimensions + measures data_frame.columns = column_names for column in dimensions: data_frame[column] = data_frame[column].astype( str) # Convert dimension values to string return data_frame
def get_data_frame(self, authorization: str, data_source: pandas.DataFrame, request: str, dimensions: str, measures: str): """Get data from data source. Return a formatted data frame according to dimensions and measures parameters.""" # Get data source credentials query = 'query getDataSource($name: String!){dataSourceByName(name: $name){id, connectionString, login, dataSourceTypeId}}' variables = {'name': data_source} payload = {'query': query, 'variables': variables} response = utils.execute_graphql_request(authorization, payload) # Get connection object if response['data']['dataSourceByName']: data_source_id = response['data']['dataSourceByName']['id'] data_source_type_id = response['data']['dataSourceByName'][ 'dataSourceTypeId'] connection_string = response['data']['dataSourceByName'][ 'connectionString'] login = response['data']['dataSourceByName']['login'] # Get data source password data_source = DataSource() password = data_source.get_password(authorization, data_source_id) # Connect to data source log.info('Connect to data source.') connection = data_source.get_connection(data_source_type_id, connection_string, login, password) # Get data frame log.info('Execute request on data source.') data_frame = pandas.read_sql(request, connection) connection.close() if data_frame.empty: error_message = f'Request on data source {data_source} returned no data.' log.error(error_message) log.debug('Request: %s.', request) raise Exception(error_message) # Format data frame log.debug('Format data frame.') column_names = dimensions + measures data_frame.columns = column_names for column in dimensions: data_frame[column] = data_frame[column].astype( str) # Convert dimension values to string return data_frame
def reset(self): self._working = None self._current = None self._history = None config = util.read_config_file("quality-control") self.config = QCConfig( results_dir=config["CHECKS"]["results_dir"], enable_experimental=config["CHECKS"]["enable_experimental"] == "True", save_results=config["CHECKS"]["save_results"] == "True", images_dir=config["MODEL"]["images_dir"], plot_models=config["MODEL"]["plot_models"] == "True", ) self.ds = DataSource()
def data_get_columns(): """ Initialized the experiment data object and parses the experiment file if required. Returns the column names in the experiment and the graph plugins. """ requested_path = request.get_json(force=True) app.logger.debug("Requested path: %s", requested_path) data = DataSource(requested_path, config['export_folder']) res = data.column_names if not res: raise Exception("No data in file.") return Response(json.dumps(res), mimetype='application/json')
def generate_allele_gff_assembly(assembly, generated_files_folder, config_info, upload_flag, validate_flag): query = '''MATCH (v:Variant)-[:ASSOCIATION]->(gl:GenomicLocation)-[:ASSOCIATION]->(:Assembly {primaryKey: "''' + assembly + '''"}), (a:Allele)<-[:VARIATION]-(v:Variant)-[:LOCATED_ON]->(c:Chromosome), (v:Variant)-[:VARIATION_TYPE]->(so:SOTerm), (v:Variant)<-[:COMPUTED_GENE]-(g:Gene)-[:ASSOCIATION]->(glc:GeneLevelConsequence)<-[:ASSOCIATION]-(v:Variant) WITH c,a,v,gl,so, COLLECT({geneID: g.primaryKey, geneSymbol: g.symbol, geneLevelConsequence: glc.geneLevelConsequence, impact: glc.impact}) AS glcs WITH c.primaryKey AS chromosome, a.primaryKey AS ID, a.symbol AS symbol, a.symbolText AS symbol_text, COLLECT(DISTINCT {ID: v.primaryKey, genomicVariantSequence: v.genomicVariantSequence, genomicReferenceSequence: v.genomicReferenceSequence, soTerm: so.name, start: gl.start, end: gl.end, chromosome: gl.chromosome, geneLevelConsequences: glcs}) AS variants, COUNT(DISTINCT v.primaryKey) AS num WHERE num > 1 RETURN chromosome, ID, symbol, symbol_text, variants ORDER BY chromosome''' if config_info.config["DEBUG"]: logger.info("Allele GFF query") logger.info(query) start_time = time.time() logger.info("Start time: %s", time.strftime("%H:%M:%S", time.gmtime(start_time))) data_source = DataSource(get_neo_uri(config_info), query) agff = allele_gff_file_generator.AlleleGffFileGenerator( assembly, data_source, generated_files_folder, config_info) agff.generate_assembly_file(upload_flag=upload_flag, validate_flag=validate_flag) if config_info.config["DEBUG"]: end_time = time.time() logger.info("Created Allele GFF file - End time: %s", time.strftime("%H:%M:%S", time.gmtime(end_time))) logger.info( "Time Elapsed: %s", time.strftime("%H:%M:%S", time.gmtime(end_time - start_time)))
def data_plot(): """ Plot a data file """ request_data = request.get_json(force=True) data_file = request_data['data_file'] graph_type = request_data['graph_type'] parameters = request_data['parameters'] data = DataSource(data_file, config['export_folder']) if not data.column_names: raise Exception("No data in file.") js, div = plugins[graph_type].plot(data, **parameters) return Response(json.dumps({ 'div': div, 'js': js }), mimetype='application/json')
def load_sources(self): log.warning(f"Loading sources...") plugins_dir = self.get_user_plugins_dir() for file in listdir(plugins_dir): module_path = join(plugins_dir, file) if isdir(module_path) or not module_path.endswith(".py"): continue module_name = f"{file}" log.warning(f"\tloading plugin {module_name} ({module_path})...") dsid = get_string_sha1(module_name) data_source = DataSource( source_code=Path(module_path).read_text(), runtime_id=dsid, name=module_name, module_path=module_path, ) self._plugins[dsid] = data_source
def __init__(self, _config): _srv_gen = PROCS[_config["SRV_PROC"]](_config["SRV_RATE"], None) _comp_gen = PROCS[_config["COMP_PROC"]](_config["COMP_COST"], None) self.__id = id_manager.get_next_id("Switch") self.__arr_buffers = DataSource(_config) self.__arr_buffers.init_fillup() self.__proc_buffer = Buffer() # processing buffer self.__srv_gen = _srv_gen self.__srv_cap = 0 self.__assoc_elements = dict({self.LOCAL_ADDR: self}) self.__cost_table = dict({self.LOCAL_ADDR: _comp_gen}) self.__next_costs = dict({}) self.__comm_cost = 0 self.__comp_cost = 0 self.generate_srv_rate()
def __init__(self, part, count): """ Initialize the plot class. Instanciate a DataSource and load data. Args: part (int): index sequence of video parts. count (int): amount of data to be ploted in this video part. """ self.data_source = DataSource() self.data_source.load() self.x_points = np.arange(self.size) self.part = part self.count = self.size if count == -1 else count self.start = self.part * self.count self.end = min(((self.part + 1) * self.count), self.size) - 1 self.count = self.end - self.start + 1
def generate_orthology_file(generated_files_folder, config_info, upload_flag, validate_flag): orthology_query = '''MATCH (species1)<-[sa:FROM_SPECIES]-(gene1:Gene)-[o:ORTHOLOGOUS]->(gene2:Gene)-[sa2:FROM_SPECIES]->(species2:Species) WHERE o.strictFilter OPTIONAL MATCH (algorithm:OrthoAlgorithm)-[m:MATCHED]-(ogj:OrthologyGeneJoin)-[association:ASSOCIATION]-(gene1) WHERE ogj.primaryKey = o.primaryKey OPTIONAL MATCH (algorithm2:OrthoAlgorithm)-[m2:NOT_MATCHED]-(ogj2:OrthologyGeneJoin)-[ASSOCIATION]-(gene1) WHERE ogj2.primaryKey = o.primaryKey RETURN gene1.primaryKey AS gene1ID, gene1.symbol AS gene1Symbol, gene2.primaryKey AS gene2ID, gene2.symbol AS gene2Symbol, COLLECT(DISTINCT algorithm.name) as Algorithms, count(DISTINCT algorithm.name) AS numAlgorithmMatch, count(DISTINCT algorithm2.name) AS numAlgorithmNotMatched, toString(o.isBestScore) AS best, toString(o.isBestRevScore) AS bestRev, species1.primaryKey AS species1TaxonID, species1.name AS species1Name, species2.primaryKey AS species2TaxonID, species2.name AS species2Name''' if config_info.config["DEBUG"]: logger.info("Orthology query") logger.info(orthology_query) start_time = time.time() logger.info("Start time: %s", time.strftime("%H:%M:%S", time.gmtime(start_time))) data_source = DataSource(get_neo_uri(config_info), orthology_query) of = orthology_file_generator.OrthologyFileGenerator( data_source, generated_files_folder, config_info) of.generate_file(upload_flag=upload_flag, validate_flag=validate_flag) if config_info.config["DEBUG"]: end_time = time.time() logger.info("Created VCF file - End time: %s", time.strftime("%H:%M:%S", time.gmtime(end_time))) logger.info( "Time Elapsed: %s", time.strftime("%H:%M:%S", time.gmtime(end_time - start_time)))
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-i', '--input', type=str, default=None, help='Path to the input file. ' 'If provided, the data will be replayed ' 'from this file instead of being acquired from the board. ' 'Ex: --input "data/output-2018-08-22.csv"') parser.add_argument('-f', '--filter', action='count', default=False, help='Whether to filter the input data.') parser.add_argument('-s', '--save', action='count', default=False, help='Whether to save the data to a CSV file. ' 'The CSV file will be saved to data/ and its ' 'name will be timestamped.') parser.add_argument( '-b', '--buffer', type=int, default=500, help='Size of the data history to display in the charts. This is also ' 'the window of points used for PSD computation.') args = parser.parse_args() source = DataSource(input_file=args.input, filter_data=args.filter, to_csv=args.save) source.start() try: run_plot(source) except KeyboardInterrupt: source.stop()