def run(self): """ Get doublicates of article of sameas webservice create datasources with resources updates resources (download content, save content to disk if it is new or was updated) """ self.done = 0 directoryBaseURL = self.config['directoryURL'] dbPediaURL = self.config['dbPediaURL'] directoryURL = "%s%s%s" % (directoryBaseURL, dbPediaURL, self.article) page = json.load(urllib2.urlopen(directoryURL)) duplicates = page[0]["duplicates"] self.total = len(duplicates) # create resources and append resources to datasources for url in duplicates: #DEBUG only list freebase and geonames if True or "freebase" in url or "geonames" in url: resource = Resource(url) if resource.domain not in self.datasources: datasource = Datasource(resource.domain, self.lastdate) self.datasources[resource.domain] = datasource datasource.resources.append(resource) # update datasources, dublicate detection, creation of json for domain, datasource in self.datasources.iteritems(): if not self._stop.is_set(): #do not proceed if stop is set datasource.update() self.done += 1 self.completed = 1 self.callback(self.datasources)
def __init__(self, jump_prob=0.15, e=math.exp(-8)): self.db = Datasource() self.jump_prob = jump_prob self.e = e self.doc_ids = sorted([int(v[0]) for v in self.db.get_all_link_docs()]) self.id2idx = {v: idx for idx, v in enumerate(self.doc_ids)}
def init_class_cache(self): Datasource.init_classes(self.classes_path) logger.debug("init Datasource classes (%d)" % len(Datasource.class_factory)) Datarecipient.init_classes(self.classes_path) logger.debug("init Datarecipient classes (%d)" % len(Datarecipient.class_factory)) Application.init_classes(self.classes_path) logger.debug("init Application classes (%d)" % len(Application.class_factory)) MonitoringDetail.init_classes(self.classes_path) logger.debug("init MonitoringDetail classes (%d)" % len(MonitoringDetail.class_factory))
def __init__(self, path): self.path = path self.update_pattern = re.compile(self.UPDATE_REGEX) self.delete_wiki_pattern = re.compile(self.DELETE_WIKI_REGEX) self.delete_link_pattern = re.compile(self.DELETE_LINK_REGEX) self.commit_pattern = re.compile(self.COMMIT_REGEX) self.rollback_pattern = re.compile(self.ROLLBACK_REGEX) self.db = Datasource() self.processing_transaction = []
def __init__(self, sche_path, log_path='prj2.log', search_path='search.txt'): self.search_path = search_path self.db = Datasource() self.scheduler = ScheduleParser(sche_path) self.recovery = RecoveryManagement(log_path) self.log_writer = LogWriter(log_path) self.search = SearchEngine() self.generator = SearchEngineGenerator()
def test_create_recipe_check_factories_read(self): self.print_header() self.generator.add_recipe(name="test4", **dict(self.config.items("recipe_TEST4"))) self.config.set("datasource_SIMPLESAMPLE", "name", "simplesample") cfg = self.config.items("datasource_SIMPLESAMPLE") ds = Datasource(**dict(cfg)) self.assert_(hasattr(ds, "only_the_test_simplesample")) objects = self.generator.recipes["test4"].objects ds.read(objects=objects) self.assert_(objects["hosts"]["test_host_0"].my_host == True) self.assert_(objects["applications"].values()[0].test4_linux == True) self.assert_(objects["applications"].values()[1].test4_windows == True)
def work(): data = Datasource() dirname = sys.argv[1] if len(sys.argv) > 1 else None min_current = int(sys.argv[2]) if len(sys.argv) > 2 else 30 max_temperature = int(sys.argv[3]) if len(sys.argv) > 3 else 40 for filename in sorted(os.listdir(dirname or '.')): if filename.startswith('data') and filename.endswith('.txt'): # try: # data += Datasource(filename, min_current=min_current, max_temperature=max_temperature) # except: # print("Skip broken file", filename, file=sys.stderr) # continue data += Datasource(filename, min_current=min_current, max_temperature=max_temperature) for timemark, temperature, current, probe_1, probe_2 in data: print('Time = {:3d}, Temperature = {:2d}, Current = {:2d}, Probe_1 = {:2d}, Probe_2 = {:2d}'.format( timemark, temperature, current, probe_1, probe_2))
def ranking_eval_5fold(model, split='dev'): """ Evaluate a trained model on either dev or test of the dataset it was trained on Evaluate separately on 5 1000-image splits, and average the metrics """ data = model['options']['data'] cnn = model['options']['cnn'] results = [] for fold in range(5): print 'Loading fold ' + str(fold) dataset = datasets.load_dataset(data, cnn, load_train=False, fold=fold) caps, ims = Datasource(dataset[split], model['worddict']).all() print 'Computing results...' c_emb = tools.encode_sentences(model, caps) i_emb = tools.encode_images(model, ims) errs = tools.compute_errors(model, c_emb, i_emb) r = t2i(errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r) ri = i2t(errs) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri) results.append(r + ri) print("-----------------------------------") print("Mean metrics: ") mean_metrics = numpy.array(results).mean(axis=0).flatten() print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[:5]) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[5:])
class Users: """ Класс управления пользовательскими данными """ ds = None def __init__(self): """ инициализация источника данных """ self.ds = Datasource() self.ds.switch_datasource('users') def get_active_accounts(self, fin_id=0): """ возвращает массив с параметрами пользовательских аккаунтов """ return self.ds.get_accounts({ 'A.disabled': 0, 'F.disabled': 0, 'A.fin_id': fin_id })
def add_datasource(self, **kwargs): for key in [k for k in kwargs.iterkeys() if isinstance(kwargs[k], str)]: kwargs[key] = re.sub('%.*?%', substenv, kwargs[key]) newcls = Datasource.get_class(kwargs) if newcls: datasource = newcls(**kwargs) self.datasources.append(datasource)
class ScheduleSimulator(SingletonInstance): def __init__(self, sche_path, log_path='prj2.log', search_path='search.txt'): self.search_path = search_path self.db = Datasource() self.scheduler = ScheduleParser(sche_path) self.recovery = RecoveryManagement(log_path) self.log_writer = LogWriter(log_path) self.search = SearchEngine() self.generator = SearchEngineGenerator() def __call__(self): transaction = {} for idx, schdule in enumerate(self.scheduler()): groups = schdule.groups() command_type = groups[0] if 'recover' in command_type: transaction = {} self.recovery(idx) self.log_writer.set_checkpoint([]) self.generator() else: self.log_writer(idx, schdule, transaction) if command_type.startswith("<T"): if command_type not in transaction.keys(): transaction[command_type] = [] if groups[1] in 'commit': for sql in transaction[command_type]: self.db.free_sql(sql) transaction[command_type] = [] self.generator() elif groups[1] in 'rollback': transaction[command_type] = [] else: transaction[command_type].append(groups[1]) elif 'search' in command_type: query = groups[1].replace("\n", "").strip() with open(self.search_path, "a") as f: f.write(f"search {idx+1}\n") f.write(f"query {query}\n") results = self.search(query) for doc in results: f.write(f"{self.search.result_formatting(doc)}\n")
class PageRank(object): def __init__(self, jump_prob=0.15, e=math.exp(-8)): self.db = Datasource() self.jump_prob = jump_prob self.e = e self.doc_ids = sorted([int(v[0]) for v in self.db.get_all_link_docs()]) self.id2idx = {v: idx for idx, v in enumerate(self.doc_ids)} def __call__(self): forward_link = self._linkdb_to_dict(self.db.get_forward_link()) doc_size = len(self.doc_ids) link_matrix = np.ones((doc_size, doc_size), dtype=np.float32) * (self.jump_prob / doc_size) scores = np.ones(doc_size, dtype=np.float32) / doc_size for i in range(doc_size): doc_id = self.doc_ids[i] if doc_id in forward_link.keys(): links = [self.id2idx[x] for x in forward_link[doc_id]] for link in links: link_matrix[i, link] += (1 - self.jump_prob) / len(links) else: link_matrix[i, :] += (1 - self.jump_prob) / doc_size link_matrix = np.transpose(link_matrix) scores = scores[:, np.newaxis] delta = 1 cnt = 0 while delta >= self.e: prev = scores scores = np.dot(link_matrix, scores) delta = np.sum(np.abs(prev - scores)) cnt += 1 print(f"\tPage rank iteration: {cnt}") return list(zip(self.doc_ids, scores.tolist())) def _linkdb_to_dict(self, data): return { k: [int(tmp) for tmp in v.split(',')] if v is not None else [] for k, v in data }
def test_ds_handshake(self): self.print_header() self.generator.add_recipe(name="test8", **dict(self.config.items("recipe_TEST8"))) self.config.set("datasource_HANDSH", "name", "handshake") cfg = self.config.items("datasource_HANDSH") ds = Datasource(**dict(cfg)) try: hosts, applications, contacts, contactgroups, appdetails, dependencies, bps = ds.read() except Exception, exp: pass
def main(): """ run program: preprocess data, train model, validate/test. """ os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu_id) subpath = 'noise_' + str(FLAGS.noise) FLAGS.logdir = os.path.join(FLAGS.logdir, FLAGS.datasource, subpath, FLAGS.exp_id) FLAGS.outdir = os.path.join(FLAGS.outdir, FLAGS.datasource, subpath, FLAGS.exp_id) if not os.path.exists(FLAGS.logdir): os.makedirs(FLAGS.logdir) if not os.path.exists(FLAGS.outdir): os.makedirs(FLAGS.outdir) import json with open(os.path.join(FLAGS.outdir, 'config.json'), 'w') as fp: json.dump(tf.app.flags.FLAGS.flag_values_dict(), fp, indent=4, separators=(',', ': ')) if FLAGS.dump: import sys sys.stdout = open(os.path.join(FLAGS.outdir, 'log.txt'), 'w') process_flags() gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) datasource = Datasource(sess) model_class = load_dynamic(FLAGS.model.upper(), FLAGS.model) model = model_class(sess, datasource) # run computational graph best_ckpt = FLAGS.ckpt if best_ckpt is not None: print('resuming ckpt supplied; restoring model from {}'.format( best_ckpt)) if FLAGS.train: learning_curves, best_ckpt = model.train(ckpt=best_ckpt) if FLAGS.test: if best_ckpt is None: log_file = os.path.join(FLAGS.outdir, 'log.txt') if os.path.exists(log_file): for line in open(log_file): if "Restoring ckpt at epoch" in line: best_ckpt = line.split()[-1] break model.test(ckpt=best_ckpt) model.reconstruct(ckpt=best_ckpt, pkl_file=FLAGS.pkl_file) model.markov_chain(ckpt=best_ckpt)
class SearchEngine(object): def __init__(self): self.db = Datasource() def __call__(self, query): terms = list(set(word_tokenize(query.strip()))) return self._calculate_rank(terms) def result_formatting(self, doc): return "{}, {}, {}, {}".format(*doc[1:]) def _calculate_rank(self, terms, topk=10): tfidf = [] search_results = {} results = self.db.get_document_containing_terms(terms) for term, docs in groupby(results, lambda x: x[2]): nt = 0 tmp_tfidf = [] for doc_id, doc_title, _, ndt, nd, rank in docs: if doc_id not in search_results.keys(): search_results[doc_id] = {'title': doc_title, 'rank': rank} nt += 1 tmp_tfidf.append((doc_id, self._tf(ndt, nd))) tmp_tfidf = map(lambda x: (x[0], x[1] * self._idf(nt)), tmp_tfidf) tfidf += tmp_tfidf try: tfidf = sorted(tfidf, key=lambda x: x[0]) except: print(tfidf) return for doc_id, scores in groupby(tfidf, lambda x: x[0]): total = sum(list(map(lambda x: x[1], scores))) search_results[doc_id]['tfidf'] = total search_results[doc_id]['score'] = total * \ search_results[doc_id]['rank'] docs = [(v['score'], k, v['title'], v['tfidf'], v['rank']) for k, v in search_results.items()] docs = sorted(docs, key=lambda x: (-x[0], x[1])) return docs[:topk] def _tf(self, ndt, nd): return math.log(1 + (ndt / nd)) def _idf(self, nt): return 1 / nt
def test_detail_keyvalues(self): self.print_header() cfg = self.config.items("datasource_CSVDETAILS") objects = self.generator.recipes['test6'].objects ds = Datasource(**dict(cfg)) ds.read(objects=objects) app1 = objects['applications'].values()[0] app1.resolve_monitoring_details() app2 = objects['applications'].values()[1] app2.resolve_monitoring_details() # swap threshold via KEYVALUES detail self.assert_(app1.swap_warning == "15%") self.assert_(app1.swap_critical == "8%") # cron threshold via KEYVALUES detail self.assert_(app1.cron_warning == "30") self.assert_(app1.cron_critical == "100") # swap threshold via class os_linux self.assert_(app2.swap_warning == "5%") self.assert_(app2.swap_critical == "15%") # neither class detail nor csv detail self.assert_(not hasattr(app2, "cron_warning")) self.assert_(hasattr(app2, "thresholds")) self.assert_(hasattr(app2.thresholds, "cron_warning")) self.assert_(app2.thresholds.cron_warning == "31")
def __init__(self): """ инициализация источника данных """ self.ds = Datasource() self.ds.switch_datasource('users')
class LogWriter(SingletonInstance): UPDATE_REGEX = r'^(UPDATE) (wiki) SET (title|text) = \'(.+)\' WHERE id = (\d+);$' DELETE_WIKI_REGEX = r'^(DELETE) FROM (wiki) WHERE (id) = (\d+);$' DELETE_LINK_REGEX = r'^(DELETE) FROM (link) WHERE (id_from|id_to) = (\d+);$' COMMIT_REGEX = r'^(commit)$' ROLLBACK_REGEX = r'^(rollback)$' def __init__(self, path): self.path = path self.update_pattern = re.compile(self.UPDATE_REGEX) self.delete_wiki_pattern = re.compile(self.DELETE_WIKI_REGEX) self.delete_link_pattern = re.compile(self.DELETE_LINK_REGEX) self.commit_pattern = re.compile(self.COMMIT_REGEX) self.rollback_pattern = re.compile(self.ROLLBACK_REGEX) self.db = Datasource() self.processing_transaction = [] def __call__(self, line_number, command, transaction): line_number += 1 command_type = command.groups()[0].strip() with open(self.path, 'a') as f: # 트랜잭션인 경우 if command_type.startswith("<T"): if command_type not in self.processing_transaction: f.write(f"{command_type} start\n") self.processing_transaction.append(command_type) sql = command.groups()[1].strip() for p in [ self.update_pattern, self.delete_wiki_pattern, self.delete_link_pattern, self.commit_pattern, self.rollback_pattern ]: m = p.match(sql) if m: sql_type = m.groups()[0] if sql_type in 'UPDATE': table, column, new_value, key = m.groups()[1:] old_value = self.db.get_old_value_for_log( table, column, key) f.write( f"{command_type}, <{table}>.<id:{key}>.<{column}>, <{old_value}>, <{new_value}>\n" ) elif sql_type in 'DELETE': table, target_column, key = m.groups()[1:] old_tuple = self.db.get_all_tuple_for_log( table, target_column, key) f.write( f"{command_type}, <{table}>.<{target_column}:{key}>, <{old_tuple}>, <None>\n" ) elif sql_type in 'commit': f.write(f"{command_type} commit\n") try: self.processing_transaction.remove( command_type) except ValueError: print( f"{command_type}\t스케쥴에 시작하지 않았던 트랜잭션입니다.") elif sql_type in 'rollback': self.undo_sql(f, command_type, transaction[command_type]) f.write(f"{command_type} abort\n") try: self.processing_transaction.remove( command_type) except ValueError: print( f"{command_type}\t스케쥴에 시작하지 않았던 트랜잭션입니다.") else: raise NotImplemented break else: raise NotImplemented # 체크포인트인 경우 elif command_type in 'checkpoint': f.write( f"{command_type} {','.join(self.processing_transaction)}\n" ) elif command_type in 'recover': f.write(f"{command_type} {line_number}\n") def recover(self, recover_line): with open(self.path, "a") as f: f.write(f"recover {recover_line + 1}\n") def checkpoint(self): with open(self.path, "a") as f: f.write("checkpoint\n") def set_checkpoint(self, checkpoints): self.processing_transaction = checkpoints def undo_sql(self, f, t_id, transaction): for t in reversed(transaction): log = self.convert_to_undo_log_from_sql(t_id, t) f.write(f"{log}\n") def convert_to_undo_log_from_sql(self, t_id, t): for p in [ self.update_pattern, self.delete_link_pattern, self.delete_wiki_pattern ]: m = p.match(t) if m: sql_type = m.groups()[0] if sql_type in 'UPDATE': table, column, new_value, key = m.groups()[1:] old_value = self.db.get_old_value_for_log( table, column, key) return f"{t_id}, <{table}>.<id:{key}>.<{column}>, <{new_value}>, <{old_value}>" elif sql_type in 'DELETE': table, target_column, key = m.groups()[1:] old_tuple = self.db.get_all_tuple_for_log( table, target_column, key) return f"{t_id}, <{table}>.<{target_column}:{key}>, <None>, <{old_tuple}>" else: raise NotImplemented def free_write(self, msg): with open(self.path, 'a') as f: f.write(f"{msg}\n")
if args.d is not None and args.c is not None: raise Exception("数据库及json配置不能共存") config = None log = None if args.d is not None: if args.i < 0: raise Exception("输入任务id,[-i]") with open(args.d, encoding='utf-8') as datasource_context: contents = datasource_context.read() content = json.loads(contents) my_source = Datasource(content['host'], content['port'], content['user'], content['password'], content['db'], content['charset']) config = ReadDatabaseConfig(my_source, args.i) exec_id = my_source.insert('insert into job_exec (start_time, job_id) values (sysdate(), ' + str(args.i) + ')') log = DatasourceLog(config, exec_id) elif args.c is not None: with open(args.c, encoding='utf-8') as json_context: contents = json_context.read() content = json.loads(contents) config = ReadJsonConfig(content) log = LocalLog('./log-' + str(int(time.time())) + '.log') proxy = None client = None
#!/usr/bin/env python #-*-coding:utf-8-*- """ инициализация модулей """ from datasource import Datasource from datasource import MySQL from datasource import Users from datasource import Request from datasource import Report from datasource import Stat from configurator.configurator import get_config Datasource.register_datasource("mysql", MySQL, get_config("mysql")) Datasource.register_datasource("users", Users, get_config("mysql")) Datasource.register_datasource("request", Request, get_config("mysql")) Datasource.register_datasource("report", Report, get_config("mysql")) Datasource.register_datasource("stat", Stat, get_config("mysql")) from reports import Manager Manager.set_report_configs(get_config("smtp"))
def __init__(self, log_path, path='recovery.txt'): self.path = path self.db = Datasource() self.log_parser = LogParser(log_path) self.log_writer = LogWriter(log_path) self.generator = SearchEngineGenerator()
class SearchEngineGenerator(SingletonInstance): def __init__(self): self.db = Datasource() self.pagerank = PageRank() def __call__(self): start = time.time() print("Building tables...") self._init() self._generate_inverted_index() self._generate_pagerank() self._update_database() print("Total... {:02f}s".format(time.time() - start)) print("Ready to search") def _init(self): self.inverted_index = {} self.doc_info = {} def _generate_inverted_index(self): ''' Inverted Index 생성 ''' start = time.time() result = self.db.get_all_text() for doc_id, text in result: if doc_id not in self.doc_info.keys(): self.doc_info[doc_id] = {} terms = word_tokenize(text.strip()) self.doc_info[doc_id]['nd'] = len(terms) num_terms_in_doc = Counter(terms) for term, freq in num_terms_in_doc.items(): if term not in self.inverted_index.keys(): self.inverted_index[term] = list() self.inverted_index[term].append((doc_id, freq)) print("\tGenerate inverted index...{:02f}s".format(time.time() - start)) def _generate_pagerank(self): ''' Page Rank 생성 ''' start = time.time() ranks = self.pagerank() for doc_id, rank in ranks: if doc_id not in self.doc_info.keys(): self.doc_info[doc_id] = {} self.doc_info[doc_id]['rank'] = rank print("\tGenerate page rank...{:02f}s".format(time.time() - start)) def _update_database(self): ''' Update Database ''' start = time.time() print("\tUpdate database...", end='') self.db.bulk_update_inverted_index(self.inverted_index) self.db.bulk_update_doc_info(self.doc_info) print("{:02f}s".format(time.time() - start))
class Manager: """ Класс управления запросами """ smtp_config = None ds = None interface = None def __init__(self): """ инициализация источника данных """ self.ds = Datasource() self.ds.switch_datasource('report') @classmethod def set_report_configs(cls, configs = {} ): """ Регистрация конфигурации способа отчёта """ cls.smtp_config = configs # TODO предусмотреть возможность хранения множества конфигураций def get_reports(self): """ возвращает массив объектами отчётов """ messages = self.ds.get_messages({'send_ts': None}) return [SmtpReportClient(self, row) for row in messages] def set_report(self, account, event_type): """ Формируе сообщение в БД """ recepient = account['user_email'] subject = account['curr_name'] + ' ' + event_type['type_descr'] if event_type['type_name'] == 'downtrend': body = """ {:%Y.%m.%d %H:%M} Тип: {}\tВалюта: {}\tЦена: {} Это больше чем {} на вкладе {} с объемом {}. """.format( datetime.datetime.now(), event_type['type_descr'], account['curr_name'], event_type['buy_price'], account['curr_price'], account['curr_name'], account['curr_volume'] ) else: body = """ {:%Y.%m.%d %H:%M} Тип: {}\tВалюта: {}\tЦена: {} Это меньше чем {} на вкладе {} с объемом {}. """.format( datetime.datetime.now(), event_type['type_descr'], account['curr_name'], event_type['sell_price'], account['curr_price'], account['curr_name'], account['curr_volume'] ) return self.ds.insert_message({'recepient': recepient, 'subject': subject, 'body': body, 'created_ts': int(datetime.datetime.now().timestamp())}) def get_smtp_config(self): """ возвращает параметры SMTP соединения """ return self.smtp_config def confirm_report_error(self, message_id: int = None): """ Подтверждение отправки отчёта """ return self.ds.update_message({'send_ts': int(datetime.datetime.now().timestamp())}, {'message_id': message_id}) def set_report_error(self, report_id: int = None, error = 'uncknown error'): """ Регистрируется ошибка полученная при отправке отчёта """ # =============== код для проверки всей фигни =============== # print("set_report_error report_id: {}, error: \"{}\"".format(report_id, error)) # =============== код для проверки всей фигни =============== # pass
from covid19api import Covid19Api from datasource import Datasource api = Covid19Api() datasource = Datasource() print('Downloading stats') stats = api.get_stats_for_country('sweden') print('Downloading stats completed') print('Saving stats') datasource.save(stats) print('Saving stats completed')
def __init__(self): self.db = Datasource() self.pagerank = PageRank()
class RecoveryManagement(object): def __init__(self, log_path, path='recovery.txt'): self.path = path self.db = Datasource() self.log_parser = LogParser(log_path) self.log_writer = LogWriter(log_path) self.generator = SearchEngineGenerator() def __call__(self, recover_line): with open(self.path, "a") as f: save_redo_list = [] n_line, undo_list = self.log_parser.find_checkpoint() for log in self.log_parser.forward(n_line=n_line): groups = log.groups() command_type = groups[0] if 'recover' in command_type: break else: if len(groups) == 2: if 'start' in groups[1]: undo_list.append(command_type) elif 'commit' in groups[1]: undo_list.remove(command_type) # transaction = self.log_parser.find_transaction(command_type) # for t in transaction: # self.execute_recovery(command_type, t, 'new', 'redo') save_redo_list.append(command_type) elif 'abort' in groups[1]: undo_list.remove(command_type) # transaction = self.log_parser.find_transaction(command_type) # for t in transaction: # self.execute_recovery(command_type, t, 'old', 'redo') save_redo_list.append(command_type) elif command_type.startswith('<T'): self.execute_recovery(command_type, groups[1:], 'new', 'redo') save_undo_list = undo_list.copy() for log in self.log_parser.backward(): groups = log.groups() command_type = groups[0] if command_type in undo_list and 'start' in groups[1]: undo_list.remove(command_type) # transaction = self.log_parser.find_transaction(command_type) # for t in reversed(transaction): # self.execute_recovery(command_type, t, 'old', 'undo') self.log_writer.free_write(f"{command_type} abort") elif command_type in undo_list and command_type.startswith( '<T'): self.execute_recovery(command_type, groups[1:], 'old', 'undo') self.log_writer.recover(recover_line) self.log_writer.checkpoint() f.write(f"recover {recover_line + 1}\n") f.write(f"redo {', '.join(save_redo_list)}\n") f.write(f"undo {', '.join(save_undo_list)}\n") def execute_recovery(self, t_id, t, value_type, do_type): ''' t (tuple): 트랜잭션 튜플 value_type (enum): old value인지 new value인지 ['old', 'new'] do_Type (enum): undo or redo ['undo', 'redo'] ''' assert value_type in ['old', 'new'], "Value 타입이 잘못되었습니다." assert do_type in ['undo', 'redo'], "Do 타입이 잘못되었습니다." is_undo = True if do_type == 'undo' else False # Update if len(t) == 6: table, key_field, key, target_field, old_value, new_value = t # value = new_value if value_type in 'new' else old_value value = new_value if do_type == 'redo' else old_value if is_undo: self.log_writer.free_write( f"{t_id}, <{table}>.<id:{key}>.<{target_field}>, <{new_value}>, <{old_value}>" ) else: self.log_writer.free_write(f"#redo {t_id}_{t}") self.db.update_table(table, key_field, key, target_field, value) # Delete elif len(t) == 5: table, key_field, key, old_tuple, _ = t if isinstance(old_tuple, str): if old_tuple != 'None': old_tuple = old_tuple[1:-1] if old_tuple.endswith(","): old_tuple = old_tuple[:-1] if value_type == 'new': if is_undo: self.log_writer.free_write( f"{t_id}, <{table}>.<{key_field}:{key}>, <{old_tuple}>, <None>" ) else: self.log_writer.free_write(f"#redo {t_id}_{t}") self.db.delete_table(table, key_field, key) else: if old_tuple != 'None': if is_undo: self.log_writer.free_write( f"{t_id}, <{table}>.<{key_field}:{key}>, <None>, <{old_tuple}>" ) else: self.log_writer.free_write(f"#redo {t_id}_{t}") self.db.insert_table(table, old_tuple)
#!/usr/bin/env python #-*-coding:utf-8-*- """ Модуль отображения текущей информации """ SECONDS_IN_DAY = 24 * 3600 * 1 DEFAULT_DAYS = 1 import init import sys import datetime from datasource import Datasource days = DEFAULT_DAYS if len(sys.argv) > 1: try: days = int(sys.argv[1]) except ValueError: days = DEFAULT_DAYS ds = Datasource() ds.switch_datasource('stat') for row in ds.get_rates({'event_ts >': datetime.datetime.now().timestamp() - days * SECONDS_IN_DAY}): row['event_dt_dispaly'] = row['event_dt'].strftime('%Y.%m.%d %H:%M:%S') print('{event_dt_dispaly} {curr_name} \tbuy: {buy_price} \tsell: {sell_price}'.format(**row))
def __init__(self): self.db = Datasource()
#!/usr/bin/env python #-*-coding:utf-8-*- """ инициализация источников данных и подготовка модулей """ from datasource import Datasource, MySQL #normal from configurator.configurator import get_config Datasource.register_datasource("mysql", MySQL, get_config("mysql"))
#!/usr/bin/env python #-*-coding:utf-8-*- """ Show exchange statistic data in console """ from datasource import Datasource from misc import TStoStr import init if __name__ == "__main__": print("Статистика по торгам в БД") ds = Datasource() for exchange in ds.get_exchange(disabled=0): print() print(exchange['exch_name']) print('---------------------------------') for pair in ds.get_pair(exch_id=exchange['exch_id']): start, stop = ds.get_trades_start_ts_range( pair['pair_id'])[0].values() if start != stop: print("%s (%s): %s - %s" % (pair['pair_name'], pair['pair_id'], TStoStr(start), TStoStr(stop)))
def main(): """ run program: preprocess data, train model, validate/test. """ # print ('a debugging version') # print (FLAGS.test) # tf.reset_default_graph() os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu_id) # subpath = 'wadv_' + str(FLAGS.wadv) + 'cew_' + str(FLAGS.cew) +'klw_' + str(FLAGS.klw) + 'noise_' + str(FLAGS.noise) subpath = 'miw_' + str(FLAGS.miw) + '_flip_' + str( FLAGS.flip_samples) + '_bits_' + str(FLAGS.n_bits) + '_epochs_' + str( FLAGS.n_epochs) print(subpath) # FLAGS.logdir = os.path.join(FLAGS.logdir, FLAGS.datasource, subpath, FLAGS.exp_id) # FLAGS.outdir = os.path.join(FLAGS.outdir, FLAGS.datasource, subpath, FLAGS.exp_id) # subpath = 'noise_' + str(FLAGS.noise) FLAGS.logdir = os.path.join(FLAGS.logdir, FLAGS.datasource, subpath, FLAGS.exp_id) FLAGS.outdir = os.path.join(FLAGS.outdir, FLAGS.datasource, subpath, FLAGS.exp_id) # FLAGS.outdir = '/mnt/cephfs_hl/arnold/vae/mcmc/run1/tasks/102803/log' if not os.path.exists(FLAGS.logdir): os.makedirs(FLAGS.logdir) if not os.path.exists(FLAGS.outdir): os.makedirs(FLAGS.outdir) # print('---------------------------------1') import json with open(os.path.join(FLAGS.outdir, 'config.json'), 'w') as fp: json.dump(tf.app.flags.FLAGS.flag_values_dict(), fp, indent=4, separators=(',', ': ')) if FLAGS.dump: import sys sys.stdout = open(os.path.join(FLAGS.outdir, 'log.txt'), 'w') process_flags() gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) datasource = Datasource(sess) model_class = load_dynamic(FLAGS.model.upper(), FLAGS.model) model = model_class(sess, datasource) # print('---------------------------------2') # run computational graph best_ckpt = FLAGS.ckpt if best_ckpt is not None: print('resuming ckpt supplied; restoring model from {}'.format( best_ckpt)) if FLAGS.train: learning_curves, best_ckpt = model.train(ckpt=best_ckpt) # print('---------------------------------3'X) if FLAGS.test: # print('-------------test---------------------4') if best_ckpt is None: log_file = os.path.join(FLAGS.outdir, 'log.txt') if os.path.exists(log_file): for line in open(log_file): if "Restoring ckpt at epoch" in line: best_ckpt = line.split()[-1] break model.test(ckpt=best_ckpt) model.reconstruct(ckpt=best_ckpt, pkl_file=FLAGS.pkl_file) model.markov_chain(ckpt=best_ckpt)
#!/usr/bin/env python #-*-coding:utf-8-*- """ Загружаем историю торгов """ import sys import init from datasource import Datasource ds = Datasource() ds.switch_datasource('request') if len(sys.argv) != 3: print( "Укажите fin_id и файл с исходниками торгов: \"update.py fin_id source_file\"" ) for row in ds.get_finances({'disabled =': 0}): print("{rate_category_name} \t{curr_name}: \t{fin_id}".format(**row)) quit() fin_id = int(sys.argv[1]) if len(ds.get_finances({'disabled =': 0, 'fin_id =': fin_id})) != 1: print("В БД нет записи fin_id = {}".format(fin_id)) quit() import os file_name = str(sys.argv[2]) if not os.path.isfile(file_name): print("Не найден файл \"{}\"".format(file_name)) quit()
def main(): """ Runs the ML loop. Preprocesses data, trains model, along with regular validation and testing. """ os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu_id) subpath = 'noise_' + str(FLAGS.noise_std) FLAGS.logdir = os.path.join(FLAGS.logdir, FLAGS.datasource, subpath, FLAGS.exp_id) FLAGS.outdir = os.path.join(FLAGS.outdir, FLAGS.datasource, subpath, FLAGS.exp_id) if FLAGS.transfer > 0: if FLAGS.transfer == 2: transfer_exp_id = str(int(int(FLAGS.exp_id) / 10)) source = FLAGS.datasource.split("2")[0] FLAGS.transfer_logdir = os.path.join(FLAGS.transfer_logdir, source, subpath, transfer_exp_id) FLAGS.transfer_outdir = os.path.join(FLAGS.transfer_outdir, source, subpath, transfer_exp_id) if not os.path.exists(FLAGS.logdir): os.makedirs(FLAGS.logdir) if not os.path.exists(FLAGS.outdir): os.makedirs(FLAGS.outdir) import json with open(os.path.join(FLAGS.outdir, 'config.json'), 'w') as fp: json.dump(tf.app.flags.FLAGS.flag_values_dict(), fp, indent=4, separators=(',', ': ')) if FLAGS.dump: import sys sys.stdout = open(os.path.join(FLAGS.outdir, 'log.txt'), 'w') process_flags() gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) datasource = Datasource(sess) model_class = load_dynamic(FLAGS.model.upper(), FLAGS.model) model = model_class(sess, datasource, vae=FLAGS.vae) # run computational graph best_ckpt = None if FLAGS.train: learning_curves, best_ckpt = model.train() if FLAGS.test: if best_ckpt is None: log_file = os.path.join(FLAGS.outdir, 'log.txt') if os.path.exists(log_file): for line in open(log_file): if "Restoring ckpt at epoch" in line: best_ckpt = line.split()[-1] break model.test(ckpt=best_ckpt) model.reconstruct(ckpt=best_ckpt, pkl_file=FLAGS.pkl_file)
def ranking_eval_Nfold(model, n_fold=1, subset='val'): """ Evaluate a trained model on either val or test of the dataset it was trained on Evaluate separately on n_fold image splits, and average the metrics Parameters: ----------- model: dict Dictionay containing the parameters of the current model n_fold: int Number of image splits to be evaluated on. Only supported n_fold=1 with provided datasets. subset: str subset to perform the evaluation on. One of: 'val', 'test' Returns: -------- results_dict: dict Dictionary containing the evaluaton results. Structured as results_dict['cap_ret', 'img_ret']['r1', 'r5', 'r10', 'medr'] score: float Score obtained, the sum of recalls for both problems caption retrival and image retrieval. """ results = [] for fold in range(n_fold): print 'Loading fold ' + str(fold) dataset = load_dataset(dataset_name=model['options']['data'], embedding=model['options']['embedding'], path_to_data=model['options']['data_path'], test_subset=model['options']['test_subset'], load_train=False, fold=fold) caps, ims = Datasource(dataset[subset], model['worddict']).all() print 'Computing results...' c_emb = tools.encode_sentences(model, caps) i_emb = tools.encode_images(model, ims) errs = tools.compute_errors(model, c_emb, i_emb) r = t2i(errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r) ri = i2t(errs) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri) results.append(r + ri) print("-----------------------------------") print("Mean metrics: ") mean_metrics = numpy.array(results).mean(axis=0).flatten() print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[:5]) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[5:]) # Join everything in a dict results_dict = OrderedDict([('cap_ret', OrderedDict([])), ('img_ret', OrderedDict([]))]) # Caption retrieval (image to text) results_dict["cap_ret"]["r1"] = mean_metrics[5] results_dict["cap_ret"]["r5"] = mean_metrics[6] results_dict["cap_ret"]["r10"] = mean_metrics[7] results_dict["cap_ret"]["medr"] = mean_metrics[8] # Image retrieval (text to image) results_dict["img_ret"]["r1"] = mean_metrics[0] results_dict["img_ret"]["r5"] = mean_metrics[1] results_dict["img_ret"]["r10"] = mean_metrics[2] results_dict["img_ret"]["medr"] = mean_metrics[3] score = mean_metrics[0:3].sum() + mean_metrics[5:8].sum() return results_dict, score
#!/usr/bin/env python #-*-coding:utf-8-*- """ Модуль проверки кода """ import init from datasource import Datasource ds = Datasource() ds.switch_datasource('stat') for fin_row in ds.get_finances({'disabled =': 0}): [top_rate] = ds.get_top_rate({'RL.fin_id': fin_row['fin_id']}) top_rate['event_dt_dispaly'] = top_rate['event_dt'].strftime( '%Y.%m.%d %H:%M:%S') top_rate['change_percent'] = ds.get_change_percent(fin_row['fin_id']) print( "{event_dt_dispaly} {curr_name} \tbuy: {buy_price} \tsell: {sell_price}\tchange percent: {change_percent:+05.2f}%" .format(**top_rate))