def __init__(self, database_path=None, is_port_used_func=None): self.database = dataset.connect("sqlite:///:memory:") if database_path is not None: self.database = dataset.connect("sqlite:///" + database_path) self.port_table = self.database[self.PORT_TABLE_NAME] self.issuer_contact_table = self.database[self.ISSUER_CONTACT_TABLE_NAME] self.is_port_used_func = is_port_used_func
def bulk_upload(self): items_to_upload = [] append = items_to_upload.append credentials = get_db_credentials(self.settings) if 'sqlite3' in credentials['ENGINE']: db = dataset.connect("sqlite:///" + os.path.basename(credentials['NAME'])) if 'postgresql' in credentials['ENGINE']: db = dataset.connect('postgresql://' + credentials['USER'] + ':' + credentials['PASSWORD'] + '@' + credentials['HOST'] + ':' + credentials['PORT'] + '/' + credentials['NAME']) table = db['visitors_visitor'] print("Starting checks to see if we have this item in our database.") if len(self.items) == 0: print("Nothing to upload") else: for i in pyprind.prog_bar(range(len(self.items))): item = self.items[i] try: item['date'] = datetime.datetime.strptime( item['date'], '%Y-%m-%d', ) except ValueError: item['date'] = None append(item) print("uploading %i records for table %s" % (len(items_to_upload), self.mytable)) table.insert_many(items_to_upload)
def init(bot, testing=False): ''' Initialize updater ''' global DATABASE global config global botref global updater global logger global api_key global shortener_service global shortener if testing: DATABASE = dataset.connect('sqlite:///:memory:') else: DATABASE = dataset.connect('sqlite:///databases/rss.db') logger.info('RSS module initialized') botref = bot config = bot.config.get('module_rss', {}) api_key = config.get('api_key', api_key) if api_key: logger.info("Using shortener service {0} with API key {1}".format(shortener_service, api_key)) else: logger.warning("Google API key not found from config!") finalize() # As there's no signal if this is a rehash or restart # update feeds in 30 seconds updater = callLater(30, update_feeds)
def __init__(self, db_path): if db_path is None: self.engine = dataset.connect('sqlite:///:memory:') else: self.engine = dataset.connect('sqlite:///%s' % db_path) self.table = self.engine['files'] self.table.delete()
def run(): # get commandline args args = parse_args() initialize_database() # store start time & args, plus get an ID for this crawl with dataset.connect(DATABASE_URL) as db: crawl_id = db["crawl"].insert(dict(args=" ".join(sys.argv[1:]), start_time=datetime.now())) url_queue = Queue() # (url, num_timeouts) tuples result_queue = Queue() # read in URLs and populate the job queue with args.urls: urls = list(args.urls) # randomize crawl order shuffle(urls) for url in urls: url = url.strip() if not urlparse(url).scheme: url = "http://" + url url_queue.put((url, 0)) log = Logger().log if not args.quiet else lambda *args, **kwargs: None # launch browsers crawlers = [] for i in range(args.num_crawlers): crawler = Process( target=Crawler, args=(i + 1,), kwargs={ "crx": args.crx, "headless": args.headless, "logger": log, "timeout": args.timeout, "url_queue": url_queue, "result_queue": result_queue, }, ) crawler.start() crawlers.append(crawler) # start the collector process Process(target=collect, args=(crawl_id, result_queue, log)).start() # wait for all browsers to finish for crawler in crawlers: crawler.join() # tell collector we are done result_queue.put(None) # store completion time with dataset.connect(DATABASE_URL) as db: db["crawl"].update(dict(id=crawl_id, end_time=datetime.now()), "id") log("Main process all done!")
def __init__(self, test=False): if test: self.db = dataset.connect('sqlite:///:memory:') # Create a database in RAM else: self.db = dataset.connect('sqlite:///db.sqlite') # Connecting to a SQLite database # Get the table of user/password self.userTable = self.db['user']
def cli(): # Storages warehouse = dataset.connect(settings.WAREHOUSE_URL) database = dataset.connect(settings.DATABASE_URL) # Mapper mapper = Mapper(warehouse, database) mapper.map(sys.argv[1], sys.argv[2])
def cli(): # Storage warehouse = dataset.connect(settings.WAREHOUSE_URL) database = dataset.connect(settings.DATABASE_URL) # Exporter exporter = Exporter(warehouse, database) exporter.export(sys.argv[1])
def syncdb(filename, date, dbfile=DBFILE): '''Sync the date file into the db''' print 'Working on {}'.format(filename) if DEBUG: db = dataset.connect('sqlite:///:memory:') else: db = dataset.connect('sqlite://{}'.format(dbfile)) titles = db.get_table('titles') files = db.get_table('files') thisdate = db.get_table(tablekey(date)) if files.find_one(filename=filename) is not None: print 'File already added: {}'.format(filename) return db k = 0 with gzip.open(filename) as gz: for i,line in enumerate(gz): countrycode,title,requests,contentsize = line.strip().split() # remove percent encoded to utf-8 requests = int(requests) if 'en' not in countrycode: continue if any([b in title for b in BAD]): continue if DEBUG and (requests < LIMIT): continue try: # Try to use bytes -- otherwise use the string if failure title = urllib.unquote(title).decode('utf-8') except: print '\nBad Title: {}'.format(title) # get the title id if titles.find_one(title=title, code=countrycode) is None: # add if needed titles.insert(dict(title=title, code=countrycode, total=0)) row = titles.find_one(title=title) total = row['total'] + requests titles.update(dict(title=title, code=countrycode, total=total), ['title']) # add to day title_id=row['id'] if thisdate.find_one(title_id=title_id) is None: thisdate.insert(dict(title_id=title_id, count=0)) count = thisdate.find_one(title_id=title_id)['count'] + requests thisdate.update(dict(title_id=title_id, count=count), ['title_id']) k+=1 _update(k) if DEBUG and (k > 100): break print '\nDone!' files.insert(dict(filename=filename)) return thisdate, titles
def home(): if(request.method=='POST'): form=request.form form=dict(form) table='<tr> Filtering by: ' filters=[[1,1]] if(form['typefilter']!=[u'None']): filters.append(['acct_type',form['typefilter'][0]]) table+='<td>'+str(form['typefilter'][0])+'</td>' if(form['divisionfilter']!=[u'None']): filters.append(['division',form['divisionfilter'][0]]) table+='<td>'+str(form['divisionfilter'][0])+'</td>' table+='</tr>' if(table=='<tr> Filtering by: </tr>'): table='' try: database=dataset.connect('sqlite:///Skidata.db') users=database.get_table('users') table=makeList(database,filters,table) print table table=Markup(table) except: table=Markup('<tr><td>It did not work. Sorry.</td></tr>') response=render_template('home.html',TABLE=table) return response print form if 'gtoken' in request.cookies: gitkit_user = gitkit_instance.VerifyGitkitToken(request.cookies['gtoken']) table='' teamtable='' if(gitkit_user and request.method!='POST'): key=str(vars(gitkit_user)['user_id']) try: database=dataset.connect('sqlite:///Skidata.db') try: teams=database.query("SELECT * FROM teams WHERE team != '' ORDER BY total_Ks DESC") for team in teams: team= dict(team) print team['team'] teamtable+='<tr><td>'+team['team']+'</td><td>'+str(team['total_Ks'])+'</td></tr>' except: print "team fail" if(teamtable!=''): teamtable=Markup(teamtable) users=database.get_table('users') filters=[[1,1]] table=makeList(database,filters,table) table=Markup(table) except: table=Markup('<tr><td>There is nobody in that category. Try a different filter.</td></tr>') response=render_template('home.html',TABLE=table,TEAMTABLE=teamtable) return response return redirect(url_for('index')) else: return redirect(url_for('index'))
def __init__(self, salt: str, max_months_valid: int, consent_db_path: str = None): """ Constructor. :param consent_db_path: path to the SQLite db. If not specified an in-memory database will be used. """ super().__init__(salt, max_months_valid) if consent_db_path: self.consent_db = dataset.connect(consent_db_path) else: self.consent_db = dataset.connect('sqlite:///:memory:') self.consent_table = self.consent_db[self.CONSENT_TABLE_NAME]
def __init__(self, salt: str, consent_request_path: str = None): """ Constructor. :param consent_request_path: path to the SQLite db. If not specified an in-memory database will be used. """ super().__init__(salt) if consent_request_path: self.consent_request_db = dataset.connect(consent_request_path) else: self.consent_request_db = dataset.connect('sqlite:///:memory:') self.consent_request_table = self.consent_request_db['consent_request']
def dbs(request): """Connected ETLProcess object that clears the database after each test.""" dummy_data = [{ "name": NAMES[randint(0, len(NAMES)-1)], "age": randint(1, 99), "last_name": NAMES[randint(0, len(NAMES)-1)], } for i in range(0, 3)] dbs = (dataset.connect("mysql+mysqldb://[email protected]:3306/src"), dataset.connect("mysql+mysqldb://[email protected]:3306/target")) dbs[0]['mytable'].insert_many(dummy_data) yield dbs for db in dbs: for table in db.tables: db[table].drop()
def database_preimport_operations(): global new_database_connection global modified_new_folder_path original_database_connection_for_migrate = dataset.connect("sqlite:///" + original_database_path) backup_increment.do_backup(self.original_folder_path) modified_new_folder_path = backup_increment.do_backup(self.new_folder_path) original_db_version = original_database_connection_for_migrate["version"] original_db_version_dict = original_db_version.find_one(id=1) new_database_connection = dataset.connect("sqlite:///" + modified_new_folder_path) new_db_version = new_database_connection["version"] new_db_version_dict = new_db_version.find_one(id=1) if int(new_db_version_dict["version"]) < int(original_db_version_dict["version"]): print("db needs upgrading") folders_database_migrator.upgrade_database(new_database_connection, None, "Null")
def cli(argv): # Prepare conf dict conf = helpers.get_variables(config, lambda x: x.isupper()) # Prepare conn dict conn = { 'database': dataset.connect(config.DATABASE_URL), 'warehouse': dataset.connect(config.WAREHOUSE_URL), } # Get and call processor process = import_module('processors.%s' % argv[1]).process process(conf, conn, *argv[2:])
def parser(): if os.path.isfile(DB_FILE): return dataset.connect(DB_URL, row_type=stuf) db = dataset.connect(DB_URL) with gzip.open(DATA_FILE, 'rb') as f: with db as tx: for line in f: data = eval(line) tx['reviews'].insert(dict( reviewer_id=data['reviewerID'], movie=data['asin'], review_text=data['reviewText'], rating=data['overall'] ))
def report_cherry(): # cherry tweets with forbidden adverts # input a list of keywords keywords = os.path.join(config.local_folder, "keywords.txt") # make query query = "select * from tuits where " for line in codecs.open(keywords, "r", "utf8").readlines(): line = line.strip() query += "status like '%" + line + "%' OR " query = re.sub(" OR $", "", query) query += " order by tweet_id desc" # publicidad está prohibida desde esta fecha DATE_LIMIT = datetime(2014, 1, 24, 0, 0) dbfile = os.path.join(config.local_folder, "tuits.db") db = dataset.connect("sqlite:///" + dbfile) res = db.query(query) cherry_tweets = [] for i in res: date = datetime.strptime(i['created_at'], "%a %b %d %H:%M:%S +%f %Y") if date > DATE_LIMIT: i['created_at'] = date.strftime('%b %d, %Y') i['tweet_id'] = str(i['tweet_id']) cherry_tweets.append(i) f = codecs.open("cherry_tweets.json", "w", "utf-8") f.write(json.dumps(cherry_tweets)) f.close()
def update(self, irc, msg, args): """Update the namecheap pricing information.""" irc.reply("This could take a second....") response = self.namecheap('namecheap.users.getPricing', {'ProductType': 'DOMAIN'}) if response.get('Status') == "ERROR": for error in response[0]: log.error(error.text) irc.reply("Error! %s" % error.text) results = response.find("./{http://api.namecheap.com/xml.response}CommandResponse/{http://api.namecheap.com/xml.response}UserGetPricingResult") db = dataset.connect("sqlite:///%s" % self.dbfile) pricing_table = db['pricing'] pricing_table.delete(provider="Namecheap") categories = {} if results is not None: for product_type in results: for category in product_type: categories[category.attrib['Name']] = 0 for product in category: for duration in product: pricing_table.insert(dict(tld=product.attrib['Name'], years=duration.attrib['Duration'], category=category.attrib['Name'], price=duration.attrib['Price'], currency=duration.attrib['Currency'], provider="Namecheap")) categories[category.attrib['Name']] += 1 irc.reply("Loaded category %s (%s bits of pricing infoz)" % ( category.attrib['Name'], categories[category.attrib['Name']])) irc.reply("Done! Results: ")
def __init__(self, dbName="vestfin.db"): self.vestfin_db = dataset.connect("sqlite:///%s" % dbName) self.client = None self.trade = None self.portfolio_trades = None self.portfolio = None return
def check(self, irc, msg, args, domain): """<domain>. Checks if <domain> is available for purchase. """ response = self.namecheap('namecheap.domains.check', {'DomainList': domain}) if response.get('Status') == "ERROR": for error in response[0]: log.error(error.text) irc.reply("Error! %s" % error.text) results = response.find("{http://api.namecheap.com/xml.response}CommandResponse") if results is not None: for result in results: if result.attrib['Available'] == "true": db = dataset.connect("sqlite:///%s" % self.dbfile) tld = domain.split(".")[-1] prices = db['pricing'].find(tld=tld, category="register", years=1) no_prices = True for price in prices: no_prices = False purchase_url = "https://www.namecheap.com/domains/registration/results.aspx" purchase_url += "?domain=%s&aff=%s" % (domain, self.registryValue('affiliate_id')) irc.reply("[%s] Available from %s for %s $%s (%s)" % (domain, price['provider'], price['currency'], price['price'], purchase_url)) if no_prices: irc.reply("[%s] Allegedly available (pricing info not found for %s)" % (domain, tld)) else: irc.reply("[%s] Unavailable" % (result.attrib['Domain']))
def settings_remove_device(): db = dataset.connect('sqlite:///mydatabase.db') table = db['Device_table'] remove_value = request.form table.delete(id=remove_value['remove']) get_mac_dictionary() return redirect('/settings', Device_info_dic=get_mac_dictionary())
def init_db(): client = pyorient.OrientDB("localhost", 2424) session_id = client.connect( "admin", "admin" ) db_uri = # self.db_uri domain = # self.domain db = dataset.connect(db_uri) table = db.get_table(domain, primary_id='key', primary_type='String')
def main(): db = dataset.connect('sqlite:///reddit.db') if not os.path.exists('./deploy'): os.mkdir('./deploy') def get_items_from_day(date): return db.query('SELECT title, link, min(rank) as rank, (upvotes - downvotes) as votes, subreddit FROM status ' + \ ('JOIN entry ON status.eid=entry.id WHERE rank <= 10 %s GROUP BY eid ORDER BY rank, (upvotes - downvotes) DESC;' % date)) all_items = {} def process_items_from_day(items): data = [] for item in filter(lambda item: item['link'] is not None, [item for item in items]): item = process(item) if item['link'] not in all_items: all_items[item['link']] = True data.append(item) return data collections = ['AND observed > date("now", "start of day", "-1 day") AND observed < date("now", "start of day")', 'AND observed > date("now", "start of day", "-2 day") AND observed < date("now", "start of day", "-1 day")', 'AND observed > date("now", "start of day", "-3 day") AND observed < date("now", "start of day", "-2 day")'] collections = [process_items_from_day(get_items_from_day(date)) for date in collections] with open('./templates/newsletter.html', 'r') as newspaper: template = Template(newspaper.read()) html = template.render(title="Reddit News Agency", edition=len(os.listdir('./deploy')), collections=collections).encode('utf-8') f = open('./deploy/' + str(int(time())) + '.html', 'w') f.write(html) requests.post('http://reddit-snews-agency.herokuapp.com/', data=html, headers={ 'Authorization': '9f9fa431c64a86da8324bb370d05377bbf49dbf9' })
def get_mac_dictionary(): db = dataset.connect('sqlite:///mydatabase.db') table = db['Device_special_settings'] print(table.find_one(setting_info='device_name')) addr = hex(get_mac()).rstrip("L").lstrip("0x") or "0" Device_info_dic = {'Current_MAC_Address': ':'.join(''.join(pair) for pair in zip(*[iter(addr)]*2)),'Device_Name':table.find_one(setting_info='device_name')['data_info'],'Current_Master':table.find_one(setting_info='device_name')['data_info']} return Device_info_dic
def get_db(): if inspect(): pass else: create_db() _db = dataset.connect(_db_url) return _db
def join_query(db_name, table_name1, table_name2, cond1, cond2, res): db = dataset.connect(db_name) result = db.query('SELECT * FROM {} JOIN {} [ON ({}={})]'.format(table_name1, table_name2, cond1, cond2)) for row in result: print(row) db_name[res].insert(row) pass
def connect_db(): conn = "mysql://" + mysql_user + ":" conn += mysql_pass + "@" conn += mysql_host + "/" conn += mysql_db db = dataset.connect(conn) return db
def create_db(): if not os.path.exists(_db_directory): os.mkdir(_db_directory) db = dataset.connect(_db_url) # db.create_table('groups') # db['actions'].create_column('action') print "table created: %s" % 'groups'
def crawl_all(): """ Main function that goes through all sites, crawls them, and then waits a day untill it does it again. """ db = dataset.connect('sqlite:///snapshots.sqlite') table = db['snapshots'] while True: time1 = time.time() to_parse = [Aftonbladet, Expressen, Flamman, ETC, DN, SVT, SVD] shuffle(to_parse) for parser in to_parse: snapshot = get_snapshot(article_parser=parser) percent_women = calc_perc_women(snapshot) table.insert(dict(percent_women=percent_women, snapshot=" ".join(snapshot), datetime=datetime.datetime.now(), source=parser()[1], n_names=len(snapshot))) # Wait a day since crawling started time2 = time.time() print "Waiting a day since crawling started..." time.sleep(24*60*60-(time2-time1))
def new(url, params=None, **kwargs): db = dataset.connect('sqlite:///test.db') response = func(url, params, **kwargs) row = {'url': url, 'params': params, 'response': response} db['requests'].insert(row) return response
import os import pickle from console_logging.console import Console from kombu import Connection, Exchange, Queue from kombu.mixins import ConsumerMixin import dataset print("import dataset heheheheh") # https://dataset.readthedocs.io/en/latest/ setar para falar com banco db = dataset.connect('mysql://*****:*****@192.168.0.108:49153/COUNTER_TBL') tabela = db['EVENTOS'] console = Console() queue = "contador-carro-exchange" exchange = "contador-carro-exchange" routing_key = "contador-carro-exchange" rabbit_url = "amqp://*****:*****@192.168.0.108:5672//" # Rabbit config conn = Connection(rabbit_url) channel_ = conn.channel() exchange_ = Exchange(exchange, type="direct", delivery_mode=1) class Worker(ConsumerMixin): def __init__(self, connection, queues): self.connection = connection self.queues = queues
#!/usr/bin/env python # -*- coding: utf-8 -*- import dataset db = dataset.connect("sqlite:///bot.db")
def __init__(self): self.db = dataset.connect(config.DATABASE) self.table = self.db['healthdata']
import dataset import time import logging db = dataset.connect('sqlite:///twitchdata.db') tables = {} logger = logging.getLogger(__name__) def exists(tbl_name): table = tables.get(tbl_name, None) if not table: table = db[tbl_name] tables[tbl_name] = table try: return table.exists except Exception: logger.info("Table %s not found", tbl_name) return False def create(tbl_name, primary): logger.info("Create table %s", tbl_name) tbl = db.create_table(tbl_name, primary_id=primary, primary_type=db.types.text) tables[tbl_name] = tbl def get(tbl_name): table = tables.get(tbl_name, None) if not table: table = db[tbl_name]
#!/usr/bin/env python # -*- encoding: utf-8 -*- import dataset db = dataset.connect('sqlite:///noticias.db') noticias = db['noticias']
import settings import dataset from datafreeze import freeze db = dataset.connect(settings.CONNECTION_STRING) result = db[settings.TABLE_NAME].all() freeze(result, format='csv', filename=settings.TWEETS_FILE_CSV)
def is_key_match(key): logger.debug("Key is: {}".format(key)) with dataset.connect(config.db_name) as db: table = db['keys'] if table.find_one(key=key): return True
except KeyError: errors.append(row['artist']) sys.stdout.write("\rRetrieving artist info...\t{0} of {0}".format(str(totalArtists))) sys.stdout.flush() print("\rRetrieved artist info.") if errors: print("\nThe following artists could not be located within the last.fm database: \n " + "\n ".join(errors)) if __name__ == '__main__': if len(sys.argv) == 1: print("[ERROR] No last.fm username specified.") quit() if sys.argv[1] == '--all': with dataset.connect('sqlite:///last-fm.db') as db: sql = 'SELECT name as username FROM sqlite_master WHERE type=\'table\'' usernames = [row['username'] for row in db.query(sql)] else: usernames = sys.argv[1:] for username in usernames: scr = Scraper(username) with dataset.connect('sqlite:///last-fm.db') as db: sql = 'SELECT COUNT(name) as count FROM sqlite_master WHERE type=\'table\' AND name=\'%s\'' % username exists = int(db.query(sql).next()['count']) if exists: scr.update_scrobbles() else: scr.get_all_scrobbles()
if __name__ == "__main__": config_file = 'config.json' if not os.path.isfile(config_file): raise Exception("config.json is missing") else: with open(config_file) as json_data_file: config_data = json.load(json_data_file) # print(config_data) databaseConnector = config_data["databaseConnector"] databaseConnector2 = config_data["databaseConnector2"] start_prep_time = time.time() db = dataset.connect(databaseConnector) db2 = dataset.connect(databaseConnector2) # Create keyStorage trxStorage = TrxDB(db2) memberStorage = MemberDB(db2) confStorage = ConfigurationDB(db2) accStorage = AccountsDB(db2) keyStorage = KeysDB(db2) accounts = accStorage.get() conf_setup = confStorage.get() last_cycle = conf_setup["last_cycle"] share_cycle_min = conf_setup["share_cycle_min"] sp_share_ratio = conf_setup["sp_share_ratio"]
from os import environ import dataset from cachetools.func import ttl_cache import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) TELEGRAM_TOKEN = environ.get('TELEGRAM_TOKEN') ADMIN_USERNAMES = environ.get('ADMIN_USERNAMES', default='').split(',') SENTENCE_COMMAND = environ.get('SENTENCE_COMMAND', default='sentence') DATABASE_URL = environ.get('DATABASE_URL', default='sqlite:///:memory:') MODEL_CACHE_TTL = int(environ.get('MODEL_CACHE_TTL', default='300')) MESSAGE_LIMIT = environ.get('MESSAGE_LIMIT', default=5000) db = dataset.connect(DATABASE_URL)['messages'] bot = telebot.TeleBot(TELEGRAM_TOKEN) def is_from_admin(message): username = message.from_user.username chat_id = str(message.chat.id) username_admins = [ u.user.username for u in bot.get_chat_administrators(chat_id) ] return (username in username_admins + ADMIN_USERNAMES) @ttl_cache(ttl=MODEL_CACHE_TTL) def get_model(chat): logger.info(f'fetching messages for {chat.id}')
import os.path import urllib.parse import WebServiceData import dataset # ネットから取得 url = 'https://www.google.co.jp' data = WebServiceData.WebServiceData() data.Get(url) data.Write() # Google側が作ったファイル名で保存される data.Write('/tmp/icons/favicon.google.svg') # 指定したパスに保存される # DBから取得 data2 = WebServiceData.WebServiceData() db = dataset.connect('sqlite:///' + 'WebServices.sqlite3') data2.Load(db['Services'].find_one(Url=url)) data2.Write() # DBではファイル名を "{Classname}.{Extension}" とする
def filter_transformations(args): if os.path.exists(os.path.abspath(args.out_db)): print('Found output DB, overwriting...') os.system('rm \"%s\"' % os.path.abspath(args.out_db)) os.system(('cp \"%s\" \"%s\"' % (os.path.abspath(args.in_db), (os.path.abspath(args.out_db))))) db = dataset.connect('sqlite:///%s' % args.out_db) edges = [x for x in db['edges'].all()] transformations = {x['refNum']: x for x in db['transformations']} refTransformations = {x['name']: x for x in db['refTransformations']} knowns = {x['refNum']: x for x in db['knowns']} observations = {x['refNum']: x for x in db['observations']} nodes = {x['refNum']: x for x in db['nodes']} if not args.no_impossible: print('Pruning impossible transformations') impossible_count = 0 for trans in tqdm(transformations.values()): obs_from = observations[trans['obs_from']] obs_to = observations[trans['obs_to']] refTrans = refTransformations[trans['trans']] # if ast.literal_eval(str(obs_from['known'])) and ast.literal_eval(str(obs_to['known'])): if not isPossible(trans, obs_from, obs_to, refTrans): # Remove Transformation # Remove associated edges db['transformations'].delete(refNum=trans['refNum']) impossible_count += 1 for e in edges: if e['trans'] == trans['refNum']: db['edges'].delete(refNum=e['refNum']) print('Removed %i of %i total transformations' % (impossible_count, len(transformations))) else: print('Skipping impossible transformations step') if not args.no_optimization: optimize_mz_tolerance(db) else: print('Skipping tolerance optimization step') # if not args.no_pid_check: # print('Removing redundant transformations by PID number') # redun_count = 0 # edges = [x for x in db['edges'].all()] # transformations = {x['refNum']:x for x in db['transformations']} # refTransformations = {x['name']:x for x in db['refTransformations']} # knowns = {x['refNum']:x for x in db['knowns']} # observations = {x['refNum']:x for x in db['observations']} # nodes = {x['refNum']:x for x in db['nodes']} # checked_from_pids = [] # checked_to_pids = [] # for trans in tqdm(transformations.values()): # obs_from = observations[trans['obs_from']] # obs_to = observations[trans['obs_to']] # if obs_from['known']: # pid_from = obs_from['pid'] # if pid_from not in checked_from_pids: # transformations_to = [x for x in db.query('select * from transformations where obs_to==\'%s\' and obs_from !=\'%s\'' % (obs_to['refNum'], obs_from['refNum']))] # for tt in transformations_to: # check_pid = [x for x in db.query('select pid from observations where refNum==\'%s\'' % tt['obs_from'])][0]['pid'] # if check_pid == pid_from: # db['transformations'].delete(refNum=tt['refNum']) # checked_from_pids.append(pid_from) # if obs_to['known']: # pid_to = obs_to['pid'] # if pid_to not in checked_to_pids: # transformations_from = [x for x in db.query('select * from transformations where obs_from==\'%s\' and obs_to !=\'%s\'' % (obs_from['refNum'], obs_to['refNum']))] # for tf in transformations_from: # check_pid = [x for x in db.query('select pid from observations where refNum==\'%s\'' % tf['obs_to'])][0]['pid'] # if check_pid == pid_to: # db['transformations'].delete(refNum=tf['refNum']) # checked_from_pids.append(pid_to) # else: # print('Skipping PID number check') print('Done')
import dataset from datetime import datetime #setting current date and time now = datetime.now() #setting up database data_base = dataset.connect('sqlite:///database.db') acc_info = data_base['accinfo'] print("") print("# Welcome to Zinderbot E-Bank v1.0 ! #") print("--------------------------------------") print("") def main_program(): while True: try: #asking what the user wants to do print("Choose any one of the following by entering option no. --") print("") main_input = int( input( "[1] Deposit money [2] Withdraw money [3] Know your account info" )) print("")
def get_db(): return dataset.connect(db_url, row_type=dict)
def __init__(self, db_path_repo, req_param): self.req = req_param self.db_path_repo = db_path_repo self.db_repo = dataset.connect('sqlite:///' + db_path_repo) self.api = database.src.other_repo.insert.github.api.repositories.Repositories.Repositories(req_param) self.page = database.src.other_repo.insert.github.api.Pagenation.Pagenation(req_param)
def setUp(self): self.db = connect('sqlite:///:memory:') self.tbl = self.db['weather'] for row in TEST_DATA: self.tbl.insert(row)
def main(argvs): # audio = FLAC(r'D:\Music\14 - Close To You.flac') # audio["title"] = u"An --- exam Close To You" # audio.pprint() # pprint(audio.items()) # audio.save() # exit() # ================================================================================================================= # Buffer | 32768 | 1048576 | 5242880 | 10485760 | 15728640 | 52428800 | 104857600 | 157286400 | 209715200 | # Size | 32K | 1Mb | 5Mb | 10Mb | 15Mb | 51Mb(32)| 102.4Mb | 153.6Mb | 200Mb | 1Gb # | 0x8000 | 0xFA000 | 0x4E2000| 0x9C4000 | 0xEA6000 | 0x30D4000 | 0x61A8000 | 0x927C000 | 0xC350000 | # --------|--------|---------|---------|----------|----------|-----------|----------------------------------------- # CPUs'x1 | 137 | 44 | 33.25 | 31 | 29.7 | 27.1(27.6)| 29 | 31 | 35 | 104 # x2 | 178 | 101 | 34.41 | 30.68 | 30.47 | 28 | 28.8 | 30.21 | 33 | # ========|========|=========|=========|==========|==========|===================================================== OS_CPU_N = os.cpu_count() BUFF_SIZE = 0x2EE0000 # 49Mb totflist = [] excludes = ('desktop.ini', 'thumbs.db') # Files excluded from the process hashmap = {} # content signature -> list of file names t0 = time.time() # Start time marker path = r''.join(map(str, argvs[0:1])) # path = r'K:\alex\Test CDs' # path = r'Z:\Audio Music\FLACs\10CC' # path = r'Z:\Audio Music\FLACs\1988 Charlie Parker' # ------------- Start ------------ print('Start time:', strftime('%H:%M:%S')) print('CPUs : ', OS_CPU_N) print(len(argvs), argvs[:]) def hash_calc(fullname): def hash_calc_in(fullname): print('File processing: ', fullname) h = hashlib.md5() with open(fullname, 'rb') as f: d = f.read(BUFF_SIZE) while d: h.update(d) d = f.read(BUFF_SIZE) filelst = hashmap.setdefault(h.hexdigest(), []) filelst.append(fullname) print(' Done: ', fullname) with futures.ThreadPoolExecutor(OS_CPU_N) as e: for _ in fullname: e.submit(hash_calc_in, _) for path, dirs, files in os.walk(path): for filename in files: if filename.lower() in excludes: continue if not os.path.getsize(path + '\\' + filename ): # zero files check (True=1 False=0) continue fullname_tmp = os.path.join(path, filename) if '\xb4' in fullname_tmp: fullname = fullname_tmp.replace('\xb4', '\x27') os.rename(fullname_tmp, fullname) print('File renamed: ' + fullname_tmp + ' ----> ' + fullname) totflist.append(fullname) else: totflist.append(fullname_tmp) hash_calc(totflist) if not hashmap: exit('Path to files not found.') print(len(totflist)) pprint(list(hashmap.items()), depth=3, width=350) print('Records/files processed:', len(hashmap)) with open('AllFiles.lst', 'w') as f: # write list of the files to the one txt(lst)-file for m in hashmap.keys(): print(str(hashmap[m]).strip('\'["]').replace('\\\\', '\\'), file=f) db = dataset.connect('sqlite:///Allfiles.db') if db['Files']: print( 'The previous version of database has found and will be updated.') db.begin() for m in db['Files']: # Cleanup database if m['md5'] not in hashmap.keys(): db['Files'].delete() print('- record deleted: ', m['path']) def dup_files(key): # key - Boolen : True = duplicated db['Files'].upsert( dict(md5=m, path=str(hashmap[m]).strip('\'["]').replace('\\\\', '\\'), dups=key), ['md5']) for m in hashmap.keys(): # Update database if "', '" in str(hashmap[m]): dup_files(True) else: dup_files(False) db.commit() def ttm(t): msg = '{:.2f} s' print('End time:', strftime('%H:%M:%S'), ' ( The program has taken ', msg.format(t), ')') ttm(time.time() - t0) print('Duplicated record(s) found:') for t in db['Files'].find(dups=True): print(t['path'])
import dataset import numpy as np db = dataset.connect("sqlite:///data/enthalpy_formation_almgsi.db") mg_concs = np.linspace(0.0, 1.0, 21) si_concs = np.linspace(0.0, 0.5, 11) tbl = db["systems"] for c_mg in mg_concs: for c_si in si_concs: row = dict(mg_conc=c_mg, si_conc=c_si, status="new") tbl.insert(row)
def get_table(table): global db if db is None: db = dataset.connect(app.config['DATABASE_URI']) return db[table]
#!/usr/bin/env python import dataset db_roaming = dataset.connect('sqlite:///roaming.db') for message in db_roaming['message'].find(): if message['published'] == None: message['published'] = True db_roaming['message'].update(message, ['id'])
from starlette.applications import Starlette from starlette.config import Config from starlette.staticfiles import StaticFiles from starlette.responses import HTMLResponse from starlette.responses import JSONResponse from starlette.responses import RedirectResponse from starlette.templating import Jinja2Templates import uvicorn # Configuration from environment variables or '.env' file. config = Config('.env') DATABASE_URL = config('DATABASE_URL') version = f"{sys.version_info.major}.{sys.version_info.minor}" templates = Jinja2Templates(directory='templates') db = dataset.connect(DATABASE_URL) def startup(): print('Ready to go') def get_items(table): items = [] for item in db[table]: items.append(item) return items app = Starlette(debug=True, on_startup=[startup]) app.mount('/static', StaticFiles(directory='statics'), name='static')
if __name__ == "__main__": config_file = 'config.json' if not os.path.isfile(config_file): raise Exception("config.json is missing!") else: with open(config_file) as json_data_file: config_data = json.load(json_data_file) # print(config_data) accounts = config_data["accounts"] databaseConnector = config_data["databaseConnector"] databaseConnector2 = config_data["databaseConnector2"] other_accounts = config_data["other_accounts"] mgnt_shares = config_data["mgnt_shares"] hive_blockchain = config_data["hive_blockchain"] db2 = dataset.connect(databaseConnector2) db = dataset.connect(databaseConnector) transferStorage = TransferTrx(db) # Create keyStorage trxStorage = TrxDB(db2) memberStorage = MemberDB(db2) accountStorage = MemberHistDB(db) confStorage = ConfigurationDB(db2) conf_setup = confStorage.get() last_cycle = conf_setup["last_cycle"] share_cycle_min = conf_setup["share_cycle_min"] sp_share_ratio = conf_setup["sp_share_ratio"] rshares_per_cycle = conf_setup["rshares_per_cycle"] upvote_multiplier = conf_setup["upvote_multiplier"]
import dataset from pandas.io.sql import read_sql from pandas.io.sql import to_sql import statsmodels.api as sm db = dataset.connect('sqlite:///:memory:') table = db['books'] table.insert(dict(title="NumPy Beginner's Guide", author='Ivan Idris')) table.insert(dict(title="NumPy Cookbook", author='Ivan Idris')) table.insert(dict(title="Learning NumPy", author='Ivan Idris')) print(read_sql('SELECT * FROM books', db.executable.raw_connection())) #运行出错 data_loader = sm.datasets.sunspots.load_pandas() df = data_loader.data to_sql(df, "sunspots", db.executable.raw_connection()) table = db['sunspots'] for row in table.find(_limit=5): print(5) print("Tables", db.tables)
def __init__(self, path_gnu_licenses_sqlite3): # self.__db.Licenses = dataset.connect('sqlite:///' + path_gnu_licenses_sqlite3) self.__db_Licenses = dataset.connect('sqlite:///' + path_gnu_licenses_sqlite3)
def read_from_database(self): db = dt.connect(config.DATABASE_CONNECTION_STRING) runs_table = db["runs"] datadict = runs_table.find_one(hash=self.model_hash) return datadict
import requests from bot import settings logging.basicConfig( format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=getattr(logging, settings.LOG_LEVEL), ) logger = logging.getLogger(__name__) engine_config = ( {"connect_args": {"check_same_thread": False}} if settings.DATABASE_URL.startswith("sqlite") else {} ) db = dataset.connect(settings.DATABASE_URL, engine_kwargs=engine_config) def post_client(data): url = f"{settings.LOAN_API}/clients/" response = requests.post(url, json=data) if response.status_code == 201: client = data client.update(response.json()) table = db["clients"] table.upsert(client, ["client_id"]) return client logger.warning("Error saving client with CPF %s", data.get("cpf", None))
def _update_in_db(self, datadict): """expect a dictionary with the data to insert to the current run""" db = dt.connect(config.DATABASE_CONNECTION_STRING) runs_table = db["runs"] datadict["hash"] = self.model_hash runs_table.update(datadict, keys=["hash"])
import wiktextract as wk import dataset as ds DB = ds.connect('sqlite:///german.db') NOUNS = DB['noun'] def add_noun(word, conjugation): if "n" in conjugation and conjugation["n"] == 'sg': data = NOUNS.find_one(word=word) if data: data["plural_ending"] = None data["plural"] = None NOUNS.update(data, ["word"]) print("found one: " + word) def word_cb(data): if 'conjugation' in data: if data['pos'] in ['noun', 'name']: for conjugation in data['conjugation']: add_noun(data['word'], conjugation) wk.parse_wiktionary('enwiktionary.xml.bz2', word_cb, languages=['German']) print("Yay, all parsed")
def setUp(self): os.environ.setdefault('DATABASE_URL', 'sqlite:///:memory:') self.db = connect(os.environ['DATABASE_URL']) self.tbl = self.db['weather'] self.tbl.insert_many(TEST_DATA)
import requests import dataset import re from datetime import datetime from bs4 import BeautifulSoup from urllib.parse import urljoin, urlparse db = dataset.connect( 'sqlite:////home/shadowfox/projects/invest_scrapper/data/books.db') BASE_URL = 'http://books.toscrape.com/' def scrape_books(html_soup, url): for book in html_soup.select('article.product_pod'): book_url = book.find('h3').find('a').get('href') book_url = urljoin(url, book_url) path = urlparse(book_url).path book_id = path.split('/')[2] db['books'].upsert({ 'book_id': book_id, 'last_seen': datetime.now() }, ['book_id']) def scrape_book(html_soup, book_id): main = html_soup.find(class_='product_main') book = {} book['book_id'] = book_id book['title'] = main.find('h1').get_text(strip=True) book['price'] = main.find(class_='price_color').get_text(strip=True) book['stock'] = main.find(class_='availability').get_text(strip=True)