def db(cls): if cls._database is not None: return cls._database else: if not bool(os.environ.get(_UNQLITE_READ_ONLY_ENV, False)): cls._database = UnQLite(cls.database_file) else: cls._database = UnQLite(cls.database_file, flags=0x00000001) atexit.register(cls._database.close) return cls._database
def __init__(self, location=None): self.location = location if type(self.location) == str and len(self.location) > 0: logger.debug("Connecting to database at {}".format( os.path.abspath(location))) self.db = UnQLite(self.location) else: # in-memory database logger.debug("Creating an in-memory database.") self.db = UnQLite() self.collections = dict()
def dump_info(dirinfo, out_path): """Dump FileInfo into a UnQLite database""" out_path.unlink(missing_ok=True) with UnQLite(out_path.as_posix()) as db: with db.transaction(): for info in dirinfo: db[info.name] = json.dumps(info.__dict__)
def write(): db = UnQLite(DB_FILE) with db.transaction(): db.collection('users').create() for line in FIRST_LINES: with db.transaction(): stories = db.collection('stories') stories.create() # Are there stories that have the same first line? same_first_line = stories.filter( lambda story: story['lines'][0].get('text') == line) if same_first_line: continue stories.store([{ "max_lines": MAX_LINES, "locked": False, "locked_by": None, "locked_at": None, "lines": [{ "text": line }] }])
def create_demo_db(db_file, data_path, user_count, truncate=True): if os.path.isfile(db_file): if truncate: os.remove(db_file) db = UnQLite(db_file) create_users(db, data_path, user_count) create_tags(db, data_path)
def get_user(user_uuid): db = UnQLite(db_file) user_col = get_collection('user') try: return user_col.fetch(db[user_uuid]) except KeyError: return None
def get_bug_report(data_prefix, vectorized_data, bug_report_id): bug_report_index_collection = UnQLite( data_prefix + "_bug_report_index_collection_index_db") bug_report = pickle.loads(bug_report_index_collection[bug_report_id]) bug_report_index_collection.close() index = bug_report['report'] return vectorized_data[index, :]
def __init__(self, db_path=''): self.db_path = db_path self.store = UnQLite( ) #loading db from databases freezes the process!! self.hits = 0 self.misses = 0 self.uncommited_recs = 0
def get_unqlite(name="default", path=None): '''Get unqlite object :param name: unqlite db name, will use the name to get path from settings.UNQLITE_DB_NAMES :param path: unqlite db file path, if None, will use configuration in get from name ''' from uliweb import settings from unqlite import UnQLite if not path: path = settings.UNQLITE_DB_NAMES.get(name) if path == ':mem:': #always use a same unqlite of ':mem:' if not _v.mem_unqlite: _v.mem_unqlite = UnQLite(path) return _v.mem_unqlite return UnQLite(path)
def setUp(self): self.app = bottle.Bottle(catchall=False) _, filename = tempfile.mkstemp(suffix='.unqlite') self.plugin = self.app.install(unqlite.Plugin(filename=filename)) self.conn = UnQLite(filename) self.conn.collection('todo').create() self.conn.close()
def __getitem__(self, entity) -> UnQLite: """ Get UnQLite index by entity name :param entity: :return: """ return UnQLite(self.uri_index_files_by_entity[entity], flags=UNQLITE_OPEN_READONLY)
def __init__(self, location=None): try: from unqlite import UnQLite except ImportError: raise ImportError( "The unqlite library is required for this feature.") self.location = location if type(self.location) == str and len(self.location) > 0: logger.debug("Connecting to database at {}".format( os.path.abspath(location))) self.db = UnQLite(self.location) else: # in-memory database logger.debug("Creating an in-memory database.") self.db = UnQLite() self.collections = dict()
async def test_performance_local_delivery_in_pure_memory_queue( self, unused_tcp_port: int): TEST_MAIL_NUMBER = 100 virtual_box = [] database = UnQLite(":mem:") async def delivery_handler(email: EmailMessage): virtual_box.append(email) ta = TransferAgent( mydomains=["localhost"], hostname="localhost", database=database, local_delivery_handler=delivery_handler, smtpd_auth_handler=smtpd_auth_rejectall, custom_queue=MemoryEmailQueue(), smtpd_port=unused_tcp_port, ) try: ta.start() email = EmailMessage() email["Message-Id"] = "<test1@localhost>" email["To"] = "user@localhost" email["From"] = "qa@localhost" t1 = perf_counter() lost_mail_count = 0 for x in range(0, TEST_MAIL_NUMBER): try: await aiosmtplib.send(email, hostname="localhost", port=unused_tcp_port) except: lost_mail_count += 1 async def wait_virtual_box(): while len(virtual_box) < TEST_MAIL_NUMBER: await asyncio.sleep(0) await asyncio.wait_for(wait_virtual_box(), 12) t2 = perf_counter() result = t2 - t1 logging.warning( "MemoryEmailQueue: %f sec./%smails, lost=%d", result, TEST_MAIL_NUMBER, lost_mail_count, ) assert result < ( TEST_MAIL_NUMBER / 100 * 4), "the MTA performance should be 25 per second at least" finally: ta.destory() if len(virtual_box) != TEST_MAIL_NUMBER: pytest.fail("except {} mails, got {}".format( TEST_MAIL_NUMBER, len(virtual_box)))
def __init__(self, filename=None): assert filename is not None if not os.path.exists(filename): directory = Path(filename[:filename.rfind('/')]) directory.mkdir(parents=True, exist_ok=True) self.__db = UnQLite(filename) if self.__db_get('master_password_hash'): current_app.config['INIT_STATE'] = 2
def insert_unqlite_items(number): db = UnQLite('tmp.unqlite') items = db.collection('items') items.create() for x in xrange(number): items.store([{ 'a': str(x), 'b': '2', 'c': '3', }])
def get_diff(dirinfo, db_path): """Yield entries that are different in the database (or not in it)""" with UnQLite(db_path.as_posix()) as db: for info in dirinfo: if info.name in db: dbinfo = json.loads(db[info.name]) if (info.size != dbinfo['size'] and info.mtime != dbinfo['mtime'] and info.md5 != dbinfo['md5']): yield info else: yield info
def get_db(source='orders', suffix='', directory=''): col_path = source + suffix base_path = directory if not base_path.endswith('/'): base_path = base_path + '/' if not os.path.isdir(base_path): os.mkdir(base_path) db = UnQLite(base_path + col_path + '.db') # Create an in-memory database. col = db.collection(col_path) col.create() return col, db
def extract_enriched_api(data_prefix, bug_report_full_sha): data = sparse.load_npz(data_prefix + '_raw_count_data.npz') bug_report_files_collection_db = UnQLite(data_prefix + "_bug_report_files_collection_db", flags=0x00000100 | 0x00000001) current_files = pickle.loads( bug_report_files_collection_db[bug_report_full_sha]) bug_report_files_collection_db.close() bug_report_id = bug_report_full_sha[0:7] shas = current_files['shas'] class_name_lookup = current_files['class_name_to_sha'] bug_report_data = [] bug_report_lookup = {} n_rows = 0 for ast_sha in shas: ast_data, lookup, current_ast_sha = add_types_source_to_bug_report_data( data, data_prefix, class_name_lookup, ast_sha) current_index = n_rows bug_report_data.append(ast_data) for k in lookup: lookup[k] += current_index bug_report_lookup[current_ast_sha] = lookup n_rows += ast_data.shape[0] bug_report_row = get_bug_report(data_prefix, data, bug_report_id) bug_report_data.append(bug_report_row) bug_report_data_matrix = sparse.vstack(bug_report_data) sparse.save_npz( data_prefix + '_' + bug_report_id + '_partial_enriched_api', bug_report_data_matrix) with open( data_prefix + '_' + bug_report_id + '_partial_enriched_api_index_lookup', 'w') as outfile: json.dump(bug_report_lookup, outfile) transformer = TfidfTransformer() tf_idf_data = transformer.fit_transform(bug_report_data_matrix) sparse.save_npz(data_prefix + '_' + bug_report_id + '_tfidf_enriched_api', tf_idf_data) # print "bug_report_id", bug_report_id return bug_report_id
def process(bug_reports, repository_path, data_prefix): ast_cache = prepare_ast_cache(repository_path) ast_cache_collection_db = UnQLite(data_prefix + "_ast_cache_collection_db") before = default_timer() for k, v in ast_cache.items(): ast_cache_collection_db[k] = pickle.dumps(v, -1) after = default_timer() total = after - before print("total ast cache saving time ", total) bug_report_files = prepare_bug_report_files(repository_path, bug_reports, ast_cache) before = default_timer() bug_report_files_collection_db = UnQLite(data_prefix + "_bug_report_files_collection_db") for k, v in bug_report_files.items(): bug_report_files_collection_db[k] = pickle.dumps(v, -1) after = default_timer() total = after - before print("total bug report files saving time ", total)
def index_by(index_dir: str, index_extension: str, data_iter: iter, key_fn: Callable, value_fn: Callable, checkpoint: int, object_name: str): """ Generate UnQlite data indices for each entity :param index_dir index directory :param index_extension index file extension :param data_iter iterable on data :param key_fn function to use on data to get the index key :param value_fn function to use on data to get the index value :param checkpoint commit index every checkpoints :return dict of index paths by entity name """ i = 0 index_path_by_entity = {} index_by_entity = {} for data in data_iter: entity = data['@type'] if entity not in index_path_by_entity: index_path = get_file_path([index_dir, entity], ext=index_extension) index_path_by_entity[entity] = index_path index = UnQLite(index_path_by_entity[entity]) index.begin() index_by_entity[entity] = index index = index_by_entity[entity] # Index index[str(key_fn(data))] = value_fn(data) i += 1 # Log if i % 50000 == 0: print(f'checkpoint: {i} {object_name}') # Checkpoint if i % checkpoint == 0: # Flush indices for index in index_by_entity.values(): index.commit() index.begin() print(f'checkpoint: {i} {object_name}') # Close indices for index in index_by_entity.values(): index.commit() index.close() # Output all indices return index_path_by_entity
def __init__(self, path, db_path, options=dict()): self.board = Board() self.db = UnQLite(db_path) self.engine = Popen(path, universal_newlines=True, stdin=PIPE, stdout=PIPE) self._put('uci') self._ready() for option, val in options.items(): self._set_option(option, val) self.num_games = 1 while True: self.board.reset() self.learn(200)
def create_user(name, mail, description, tags, active=False, is_admin=False): user_col = get_collection('user') user_uuid = gen_uuid() user = { 'id': user_uuid, 'name': name, 'mail': mail, 'description': description, 'tags': tags, 'active': active, 'is_admin': is_admin } user_id = user_col.store(user) db = UnQLite(db_file) db[user_uuid] = user_id return user_uuid
def main(): print("Start", datetime.datetime.now().isoformat()) before = default_timer() bug_report_file_path = sys.argv[1] print("bug report file path", bug_report_file_path) data_prefix = sys.argv[2] print("data prefix", data_prefix) bug_reports = load_bug_reports(bug_report_file_path) ast_cache_db = UnQLite(data_prefix + "_ast_cache_collection_db") vectorize(ast_cache_db, bug_reports, data_prefix) after = default_timer() total = after - before print("End", datetime.datetime.now().isoformat()) print("total time ", total) ast_cache_db.close()
def __init__(self, brpath, dbpath): self.br = Engine(brpath) # f*****g cute chess self.br.write('setoption name Threads value 3') self.br.write('setoption name Hash value 4096') # f*****g cute chess self.db = UnQLite(dbpath) self.board = Board() self.func_book = { 'uci': self.uci, 'isready': self.isready, 'go': self.go, 'quit': self.quit, 'position': self.position } self.hello() try: self.start() except KeyboardInterrupt: self.quit(None)
async def test_local_delivery_in_unqlite_memory_queue( self, unused_tcp_port: int): virtual_box = [] database = UnQLite(":mem:") async def delivery_handler(email: EmailMessage): virtual_box.append(email) ta = TransferAgent( mydomains=["localhost"], hostname="localhost", database=database, local_delivery_handler=delivery_handler, smtpd_auth_handler=smtpd_auth_rejectall, smtpd_port=unused_tcp_port, ) try: ta.start() email = EmailMessage() email["Message-Id"] = "<test1@localhost>" email["To"] = "user@localhost" email["From"] = "qa@localhost" await aiosmtplib.send(email, hostname="localhost", port=unused_tcp_port) async def wait_virtual_box(): while len(virtual_box) < 1: await asyncio.sleep(0) await asyncio.wait_for(wait_virtual_box(), 1) assert isinstance(virtual_box[0], EmailMessage) message = virtual_box[0] assert message['message-id'] == '<test1@localhost>' assert message['to'] == "user@localhost" assert message['from'] == 'qa@localhost' finally: ta.destory()
def __init__(self, *, hostname: str, mydomains: List[str], database_path: str, smtpd_port: Optional[int] = None) -> None: if not smtpd_port: smtpd_port = 8025 self.mydomains = mydomains self.hostname = hostname self.database_path = database_path self.database = UnQLite(database_path) self.storage_hub = StorageHub(self.database) self.transfer_agent = TransferAgent( mydomains=mydomains, local_delivery_handler=self.handle_local_delivering, database=self.storage_hub.database, smtpd_auth_handler=self.handle_smtpd_auth, hostname=self.hostname, self_name="transfer_agent.{}".format(self.hostname), smtpd_port=smtpd_port) super().__init__()
def add_types_source_to_bug_report_data(data, data_prefix, class_name_lookup, ast_sha): asts = UnQLite(data_prefix + "_ast_index_collection_index_db", flags=0x00000100 | 0x00000001) types = UnQLite(data_prefix + "_ast_types_collection_index_db", flags=0x00000100 | 0x00000001) # current_type = types[ast_sha] # print "searching", ast_sha current_type = pickle.loads(types[ast_sha]) # print "found", ast_sha # print current_type['methodVariableTypes'] # exit(0) types_per_method = current_type['methodVariableTypes'] cl = data.shape[1] current_index = 0 start = current_index enriched_apis = [] for method_types in types_per_method: method_type_shas = [] for method_type in method_types: if method_type in class_name_lookup: method_type_shas.append(class_name_lookup[method_type]) supertypes_shas_per_type = [ set(find_types_shas(types, class_name_lookup, s)) for s in method_type_shas ] indexes = [] for supertypes in supertypes_shas_per_type: indexes.extend(get_indexes(asts, supertypes)) if indexes == []: method_enriched_api = sparse.coo_matrix( np.zeros(cl).reshape(1, cl)) else: method_enriched_api = sparse.coo_matrix( np.sum((data[indexes, :]), axis=0)) enriched_apis.append(method_enriched_api) if enriched_apis == []: class_enriched_api = sparse.coo_matrix(np.zeros(cl).reshape(1, cl)) else: class_enriched_api = sparse.coo_matrix(np.sum(enriched_apis, axis=0)) enriched_apis.append(class_enriched_api) current_index += len(enriched_apis) asts.close() types.close() lookup = {} lookup['enrichedApiStart'] = start lookup['enrichedApiEnd'] = current_index - 1 enriched_apis_matrix = sparse.vstack(enriched_apis) return (enriched_apis_matrix, lookup, ast_sha)
def setUp(self): super(BaseTestCase, self).setUp() self.db = UnQLite(':mem:') self._filename = 'test.db' self.file_db = UnQLite(self._filename)
def calculate_graph_features(data_prefix, bug_report_id, bug_report_full_sha, repository_path, sha_to_note): bug_report_files_collection_db = UnQLite(data_prefix + "_bug_report_files_collection_db", flags=0x00000100 | 0x00000001) current_files = pickle.loads( bug_report_files_collection_db[bug_report_full_sha]) bug_report_files_collection_db.close() shas = current_files['shas'] sha_to_file_name = current_files['sha_to_file_name'] graph_features_data_list = [] graph_features_lookup = {} sha_to_imports = {} sha_to_class_name = {} for sha in shas: note_sha = sha_to_note[sha] note_content = cat_file_blob(repository_path, note_sha) imports = json.loads(note_content) sha_to_imports[sha] = imports if 'className' in imports and imports[ 'className'] is not None and imports['className'] != "": class_name = imports['className'] class_name = class_name.replace(".", "") sha_to_class_name[sha] = class_name graph_data = process_graph_results(sha_to_imports) current_index = 0 for sha in shas: current_file_name = sha_to_file_name[sha] # print(sha) # print(current_file_name) try: current_node_name = sha_to_class_name[sha] # print(current_node_name) # print(graph_data.loc[current_node_name]) # exit(0) values = graph_data.loc[current_node_name] feature_15 = values['in'] feature_16 = values['out'] feature_17 = values['pr'] feature_18 = values['a'] feature_19 = values['h'] except KeyError: feature_15 = 0.0 feature_16 = 0.0 feature_17 = 0.0 feature_18 = 0.0 feature_19 = 0.0 current_features = sparse.coo_matrix( [feature_15, feature_16, feature_17, feature_18, feature_19]) # print(current_features.shape) # print(current_features) # exit(0) graph_features_data_list.append(current_features) graph_features_lookup[sha] = current_index current_index += 1 graph_features_data = sparse.vstack(graph_features_data_list) sparse.save_npz( data_prefix + '_' + bug_report_id[0:7] + '_graph_features_data', graph_features_data) with open( data_prefix + '_' + bug_report_id[0:7] + '_graph_features_index_lookup', 'w') as outfile: json.dump(graph_features_lookup, outfile) return (bug_report_id)
def convert_tf_idf(data_prefix, bug_report_full_sha): bug_report_files_collection_db = UnQLite(data_prefix + "_bug_report_files_collection_db", flags=0x00000100 | 0x00000001) current_files = pickle.loads( bug_report_files_collection_db[bug_report_full_sha]) bug_report_files_collection_db.close() bug_report_id = bug_report_full_sha[0:7] shas = current_files['shas'] class_name_lookup = current_files['class_name_to_sha'] ast_index_collection = UnQLite(data_prefix + "_ast_index_collection_index_db", flags=0x00000100 | 0x00000001) data = sparse.load_npz(data_prefix + '_raw_count_data.npz') data_to_tf_idf = [] lookups = {} n_rows = 0 for sha in shas: current_indexes = pickle.loads(ast_index_collection[sha]) # print(sha) # print(current_indexes) (matrix, lookup) = extract_ast(data, current_indexes) # print(lookup) # print(matrix.shape) current_index = n_rows data_to_tf_idf.append(matrix) for k in lookup: lookup[k] += current_index lookups[sha] = lookup n_rows += matrix.shape[0] ast_index_collection.close() bug_report_index_collection = UnQLite( data_prefix + "_bug_report_index_collection_index_db", flags=0x00000100 | 0x00000001) current_bug_report_indexes = pickle.loads( bug_report_index_collection[bug_report_id]) bug_report_index_collection.close() bug_report_matrix, bug_report_lookup = extract_bug_report( data, current_bug_report_indexes) current_index = n_rows data_to_tf_idf.append(bug_report_matrix) for k in bug_report_lookup: bug_report_lookup[k] += current_index lookups[bug_report_id] = bug_report_lookup n_rows += bug_report_matrix.shape[0] data_matrix = sparse.vstack(data_to_tf_idf) transformer = TfidfTransformer() tf_idf_data = transformer.fit_transform(data_matrix) # print("tf_idf_data shape",tf_idf_data.shape) sparse.save_npz(data_prefix + '_' + bug_report_id + '_tf_idf_data', tf_idf_data) with open(data_prefix + '_' + bug_report_id + '_tf_idf_index_lookup', 'w') as outfile: json.dump(lookups, outfile)