Exemplos de DatabaseManager.add_parsed_file em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: database_manager

Classe / Tipo: DatabaseManager

Método / Função: add_parsed_file

Exemplos em hotexamples.com: 3

DatabaseManager.add_parsed_file em Python - 3 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de database_manager.DatabaseManager.add_parsed_file em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

DatabaseManager(30)

get_connection(10)

close(6)

get_icd9_codes(5)

add_parsed_file(3)

did_parse_file(3)

check_password(2)

get_link(2)

get_bag_of_words_vectors(2)

fetch_one(2)

execute_query_no_return(2)

execute_query(2)

comments_containing(2)

close_database(2)

classifier_experiment_create(2)

get_user_info(2)

get_room_width_height_by_room_name(2)

get_corpus(2)

insert(2)

add_user(2)

initialize_connection(2)

get_icd9_codes_distribution(1)

put_article_with_args(1)

register_jabber_ids(1)

get_names_in_table(1)

get_products_for_replace(1)

get_number_products(1)

get_patients_with_icd9_codes(1)

get_patients_with_number_icd9_codes(1)

get_product_index(1)

get_product_name(1)

get_product_nutriscore(1)

start_experimental_session(1)

list_tables(1)

getting_database_instance(1)

get_question(1)

insert_session_order(1)

get_question_count(1)

get_rects_by_room_name(1)

insert_bag_of_words_vectors(1)

get_relation(1)

get_result(1)

get_robot_by_name(1)

get_highscores(1)

insert_attacks(1)

initialize_testing_database(1)

get_scenario_by_name(1)

get_title(1)

initialize_database(1)

list_columns(1)

Métodos Frequentes

DatabaseManager (30)

get_connection (10)

close (6)

get_icd9_codes (5)

add_parsed_file (3)

did_parse_file (3)

check_password (2)

get_link (2)

get_bag_of_words_vectors (2)

fetch_one (2)

Métodos Frequentes

execute_query_no_return (2)

execute_query (2)

comments_containing (2)

close_database (2)

classifier_experiment_create (2)

get_user_info (2)

get_room_width_height_by_room_name (2)

get_corpus (2)

insert (2)

add_user (2)

initialize_connection (2)

get_icd9_codes_distribution (1)

put_article_with_args (1)

register_jabber_ids (1)

get_names_in_table (1)

get_products_for_replace (1)

get_number_products (1)

get_patients_with_icd9_codes (1)

get_patients_with_number_icd9_codes (1)

get_product_index (1)

Métodos Frequentes

initialize_connection (2)

get_icd9_codes_distribution (1)

put_article_with_args (1)

register_jabber_ids (1)

get_names_in_table (1)

get_products_for_replace (1)

get_number_products (1)

get_patients_with_icd9_codes (1)

get_patients_with_number_icd9_codes (1)

get_product_index (1)

get_product_name (1)

get_product_nutriscore (1)

start_experimental_session (1)

list_tables (1)

getting_database_instance (1)

get_question (1)

insert_session_order (1)

get_question_count (1)

get_rects_by_room_name (1)

insert_bag_of_words_vectors (1)

get_relation (1)

get_result (1)

get_robot_by_name (1)

get_highscores (1)

insert_attacks (1)

initialize_testing_database (1)

get_scenario_by_name (1)

get_title (1)

initialize_database (1)

list_columns (1)

Métodos Frequentes

get_product_name (1)

get_product_nutriscore (1)

start_experimental_session (1)

list_tables (1)

getting_database_instance (1)

get_question (1)

insert_session_order (1)

get_question_count (1)

get_rects_by_room_name (1)

insert_bag_of_words_vectors (1)

get_relation (1)

get_result (1)

get_robot_by_name (1)

get_highscores (1)

insert_attacks (1)

initialize_testing_database (1)

get_scenario_by_name (1)

get_title (1)

initialize_database (1)

list_columns (1)

get_bag_of_words_vectors_rnn (1)

get_current_background (1)

categories_show (1)

classifier_experiment_insert_log_file (1)

check_user_existance (1)

check_name_ratio (1)

category_name_chosen (1)

category_from_database (1)

categories_to_database (1)

better_nutriscore (1)

create_databases (1)

bag_of_words_generator_experiment_insert_table_name (1)

bag_of_words_generator_experiment_insert_log_file (1)

bag_of_words_generator_experiment_create (1)

add_scenario (1)

add_review (1)

add_random_question (1)

add_income (1)

classifier_experiment_insert_metrics (1)

delete_account (1)

Exemplo n.º 1

0

Exibir arquivo

def parse_semantic_scholar_corpus_file(path, database_path="aip.db"): database = DatabaseManager(location=database_path) hash, parsed = database.did_parse_file(path) if parsed: return True file_iterator_func = iterload_file_lines_gzip if path.endswith("gz") else iterload_file_lines # print(corpus_file) # The json files contain stacked json objects, which is bad practice. It should be wrapped in a JSON array. # Libraries will throw errors if you attempt to load the file, so now we lazy load each object line by line. publication_iterator = file_iterator_func(path) for publication in publication_iterator: if publication is None: # Corrupt JSON line possibly. Skip it. continue if "venue" not in publication: # While parsing we sometimes get KeyError: 'venue'... continue # Try to match the publication to a venue we are interested in. # Wrap in str() as it sometimes is an int (???) venue_string = str(publication['venue']) if len(venue_string) == 0: continue # Check if any of the venue strings are a substring of the mentioned value, add it to that set. publication_title = publication['title'] publication_abstract = publication['paperAbstract'] publication_year = publication['year'] if 'year' in publication else -1 publication_journal_volume = publication['journalVolume'].replace(" ", "_") # Empty for conferences. # publication_keywords = publication['entities'] publication_id = publication['id'] num_citations = 0 if "inCitations" in publication: num_citations = len(publication["inCitations"]) publication_doi = publication['doi'] if publication_doi is None or len(publication_doi) == 0: publication_doi_url = publication['doiUrl'] if "doi.org/" in publication_doi_url: publication_doi = publication['doiUrl'][ publication['doiUrl'].index("doi.org/") + len("doi.org/"):] database.update_or_insert_paper(id=publication_id, doi=publication_doi, title=publication_title, abstract=publication_abstract, raw_venue_string=venue_string, year=publication_year, volume=publication_journal_volume, num_citations=num_citations) # database.flush_missing_venues() database.add_parsed_file(hash) database.close() return True

Exemplo n.º 2

0

Exibir arquivo

def parse_aminer_corpus_file(path, database_path="aip", logger_disabled=False): logger.disabled = logger_disabled database = DatabaseManager(location=database_path) hash, parsed = database.did_parse_file(path) if parsed: return True # print(path) # The json files contain stacked json objects, which is bad practice. # It should be wrapped in a JSON array. # Libraries will throw errors if you attempt to load the file, so now we lazy load each object. file_iterator_func = iterload_file_lines_gzip if path.endswith( "gz") else iterload_file_lines publication_iterator = file_iterator_func(path) for publication in tqdm(publication_iterator): if publication is None: # Corrupt JSON line possibly. Skip it. continue # Try to match the publication to a venue we are interested in. # Warning: contrary to the documentation, the key is "venue" NOT "venue.raw"! if 'venue' not in publication: logger.warning("Skipping line missing venue: %s in %s.", publication, path) continue if 'title' not in publication: logger.warning("Skipping line missing title: %s in %s.", publication, path) continue venue_string = publication['venue'] # Sometimes the venue string is yet another dict... if isinstance(venue_string, dict) and "raw" in venue_string: venue_string = venue_string["raw"] publication_title = str(publication['title']).rstrip(".") publication_abstract = publication[ 'abstract'] if 'abstract' in publication else "" publication_year = publication[ 'year'] if 'year' in publication else None publication_journal_volume = publication[ 'volume'] if 'volume' in publication else None # publication_keywords = publication['keywords'] publication_id = publication['id'] # citation_count = int(publication['n_citation']) if "n_citation" in publication else None publication_doi = publication['doi'] if 'doi' in publication else None # Sometimes in the urls, a doi link is used. If there is, we attempt to extract the doi from the link. if publication_doi is None or len(publication_doi) == 0: publication_doi_urls = publication[ 'url'] if 'url' in publication else [] for publication_doi_url in publication_doi_urls: if "doi.org/" in publication_doi_url: publication_doi = publication_doi_url[publication_doi_url. index("doi.org/") + len("doi.org/"):] break database.update_or_insert_paper(id=publication_id, doi=publication_doi, title=publication_title, abstract=publication_abstract, raw_venue_string=venue_string, year=publication_year, volume=publication_journal_volume) # database.flush_missing_venues() database.add_parsed_file(hash) database.close() return True

Exemplo n.º 3

0

Exibir arquivo

def parse(dblp_file, database_path="aip.db"): database = DatabaseManager(location=database_path) hash, parsed = database.did_parse_file(dblp_file) if parsed: return True counter = 0 # counter for new keys. # dtd = etree.DTD(file="/media/lfdversluis/datastore/dblp.dtd") for event, element in etree.iterparse(dblp_file, load_dtd=True, dtd_validation=True): if element.tag not in ['article', 'inproceedings', 'proceedings']: continue if 'key' in element.attrib: id = str(element.attrib['key']) else: id = "id" + str(counter) counter += 1 title = element.find('title') # type: Optional[str] if title is not None: title = str(title.text).rstrip(".") year = element.find('year') # type: Optional[int] if year is not None: try: year = int(re.search(r'\d+', str(year.text)).group()) if 20 < year < 100: # Weird cases like 92-93 year += 1900 elif year < 20: # weird cases like '12 year += 2000 except: year = None volume = element.find('volume') # type: Optional[int] if volume is not None: try: volume = int(volume.text) except: volume = None # authors = element.find('author') # type: Optional[str] venue = element.find('booktitle') # type: Optional[str] if venue is None and len(element.findall('journal')) > 0: venue = element.find('journal') if venue is not None and venue.text is not None: venue = str(venue.text) else: venue = None doi = None for ee in element.findall('ee'): ee_str = str(ee.text) if ee is not None and "doi.org" in ee_str: doi = ee_str[ee_str.index("doi.org/") + len("doi.org/"):] break if title is not None and year is not None and venue is not None: # Clean the title which may have HTML elements database.update_or_insert_paper(id=id, doi=doi, title=title, abstract="", raw_venue_string=venue, year=year, volume=volume, num_citations=-1) # Get the authors for this paper and add them to the database authors = [] # tuples of ID, orcid for author_element in element.findall('author'): orcid = None if "orcid" in author_element.attrib: orcid = str(author_element.attrib['orcid']) authors.append((author_element.text, orcid)) database.add_authors_for_article(authors=authors, article_id=id) element.clear() # database.flush_missing_venues() database.add_parsed_file(hash) database.close() return True