def clean(db_loc): """ Some fragments are duplicates, or are not included in the db, but there are still atom and pseudoatom records that refer to them. This is not ideal, so this script deletes these orphan records. :param db_loc: :return: """ # create the database for the output db = SqliteExtDatabase( db_loc, pragmas={ 'cache_size': -1024 * 64, # 64MB page-cache. 'journal_mode': 'wal', # Use WAL-mode (you should always use this!). 'foreign_keys': 0, 'wal_autocheckpoint': 10, }) # get the models Fragment, Heritage, PseudoAtoms, Atoms = lib_read(db) Fragment.create_table(safe=True) Heritage.create_table(safe=True) PseudoAtoms.create_table(safe=True) Atoms.create_table(safe=True) logger.debug("Trying to clean up the database:") logger.debug("Deleting missing ATOM records") with db.atomic(): bad_atoms = Atoms.delete().where( (Atoms.frag.not_in(Fragment.select()))) bad_atoms.execute() logger.debug("Deleting missing PSEUDO_ATOM records") with db.atomic(): bad_patoms = PseudoAtoms.delete().where( (PseudoAtoms.frag.not_in(Fragment.select()))) bad_patoms.execute() logger.debug("Deleting missing HERITAGE records") with db.atomic(): bad_heritage = Heritage.delete().where( (Heritage.frag.not_in(Fragment.select()))) bad_heritage.execute() logger.info("Done.")
db.connect() db.create_tables([ Journal, PLOSArticle, ArticleType, CoAuthorPLOSArticle, CorrespondingAuthor, JATSType, Affiliations, Country, SubjectsPLOSArticle, Subjects ]) corpus_dir = starterdir if args.starter else None all_files = Corpus(corpus_dir) num_files = len(all_files) if args.random is None else args.random for article in tqdm(islice(all_files, args.random), total=num_files): journal_name = journal_title_dict[article.journal.upper()] with db.atomic() as atomic: try: journal = Journal.create(journal=journal_name) except IntegrityError: db.rollback() journal = Journal.get(Journal.journal == journal_name) with db.atomic() as atomic: try: article_type = ArticleType.create(article_type=article.plostype) except IntegrityError: db.rollback() article_type = ArticleType.get( ArticleType.article_type == article.plostype) with db.atomic() as atomic: try: j_type = JATSType.create(jats_type=article.type_)
lang = CharField() created_at = DateTimeField() location = CharField() tweet_sentiment = IntegerField(default = 0) class Meta: primary_key = False word_costs = parse_afinn_file("AFINN-111.txt") db.connect() db.create_tables([Tweet]) items = parse_tweets_file("three_minutes_tweets.json.txt") with db.atomic(): for i in chunk(items, 100): sentiments = [t for t in i if t.get("lang") == "en"] for s in sentiments: s.update(tweet_sentiment = get_tweet_value(s.get("tweet_text"), word_costs)) Tweet.insert_many(i).execute() db.close() """ db.connect() all_tweets = Tweet.select() for tweet in all_tweets: tweet_sentiment = get_tweet_value(tweet.tweet_text, word_costs) if tweet_sentiment != 0:
def libgen(mol_list, output_name): """ function to generate a database format library of fragments from a mol, list of mol objects, .smi, or .sdf file :param mol_list: list of molecules, a single molecule, or a filename of molecules to read :type mol_list: str|Chem.Mol|[Chem.Mol] :param output_name: name of the database to use? :type output_name: str :return: """ # if a file not a list then read into list if isinstance(mol_list, str) and mol_list.endswith(".smi"): mol_list = Chem.SmilesMolSupplier(mol_list, delimiter="\t", titleLine=False) elif isinstance(mol_list, str) and mol_list.endswith(".sdf"): mol_list = Chem.SDMolSupplier(mol_list) elif type(mol_list) == Chem.Mol: mol_list = [mol_list] elif type(mol_list) == list: assert type(mol_list[0]) == Chem.Mol else: raise Exception( "Did you provide a list of mol objects? Input type error.") fragment_dict_deque = deque() heritage_dict_deque = deque() atoms_dict_deque = deque() pseudoatoms_dict_deque = deque() logger.info("Fragmenting:") n = len(mol_list) i = 0 t0 = time.time() for mol in mol_list: re_mol = RecomposerMol.fromMol(mol=mol) frag_list, heritage_list, atoms_list, pseudo_atoms_list = re_mol.get_all_fragments( 7) fragment_dict_deque.extend(frag_list) heritage_dict_deque.extend(heritage_list) atoms_dict_deque.extend(atoms_list) pseudoatoms_dict_deque.extend(pseudo_atoms_list) logger.info("DONE: %d/%d %.f" % (i, n, 1000 * (time.time() - t0) / (i + 1))) i += 1 logger.info("Done") logger.info("Saving:") # create the database for the output db = SqliteExtDatabase( output_name, pragmas={ 'cache_size': -1024 * 64, # 64MB page-cache. 'journal_mode': 'wal', # Use WAL-mode (you should always use this!). 'foreign_keys': 0, 'wal_autocheckpoint': 10, }) db.connect() # get the models Fragment, Heritage, PseudoAtoms, Atoms = lib_read(db) Fragment.create_table(safe=True) Heritage.create_table(safe=True) PseudoAtoms.create_table(safe=True) Atoms.create_table(safe=True) with db.atomic(): if len(fragment_dict_deque) > 0: for ents in chunked(fragment_dict_deque, 200): query = Fragment.replace_many(ents) query.execute() for ents in chunked(heritage_dict_deque, 200): query = Heritage.replace_many(ents) query.execute() for ents in chunked(pseudoatoms_dict_deque, 200): query = PseudoAtoms.replace_many(ents) query.execute() for ents in chunked(atoms_dict_deque, 200): query = Atoms.replace_many(ents) query.execute() db.close() clean(output_name) return 1
# uuid = CharField() # title = SearchField() # content = SearchField() # # class Meta: # database = db db.create_tables([Note, NoteIndex, Tags], safe = True) def load_json(f): return json.loads(open(f, 'r').read()) # Store notes with db.atomic(): for notebook in iglob(libpath + "/*.qvnotebook"): meta = load_json(list(iglob(notebook + "/meta.json"))[0]) nb_name = meta["name"] for c in iglob(notebook + "/*.qvnote/content.json"): meta = load_json(c.replace("content.json", "meta.json")) content = load_json(c) tagset = meta["tags"] full_content = ' '.join([x["data"] for x in content["cells"]]) snippets = [x for x in content["cells"] if x["type"] == "code"] # Store Notes n = Note.create(uuid = meta["uuid"], title = meta["title"], notebook = nb_name, last_modified = datetime.datetime.fromtimestamp(meta["updated_at"]))