def _parse_sql_statements(cls, migration_sql): all_statements = [] last_statement = '' for statement in migration_sql.split(';'): if len(last_statement) > 0: curr_statement = '%s;%s' % (last_statement, statement) else: curr_statement = statement normalized_statement = Utils.normalize_sql(curr_statement) count = Utils.count_occurrences(normalized_statement) single_quotes = count.get("'", 0) double_quotes = count.get('"', 0) left_parenthesis = count.get('(', 0) right_parenthesis = count.get(')', 0) if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis: all_statements.append(curr_statement) last_statement = '' else: last_statement = curr_statement return [s.strip() for s in all_statements if ((s.strip() != "") and (last_statement == ""))]
def _parse_sql_statements(self, migration_sql): all_statements = [] last_statement = '' match_stmt = self.__re_objects.match(migration_sql) if not match_stmt: match_stmt = self.__re_anonymous.match(migration_sql) if match_stmt and match_stmt.re.groups > 0: if match_stmt.group('pre'): all_statements = all_statements + self._parse_sql_statements(match_stmt.group('pre')) if match_stmt.group('principal'): all_statements.append(match_stmt.group('principal')) if match_stmt.group('pos'): all_statements = all_statements + self._parse_sql_statements(match_stmt.group('pos')) else: for statement in migration_sql.split(';'): if len(last_statement) > 0: curr_statement = '%s;%s' % (last_statement, statement) else: curr_statement = statement single_quotes = Utils.how_many(curr_statement, "'") double_quotes = Utils.how_many(curr_statement, '"') left_parenthesis = Utils.how_many(curr_statement, '(') right_parenthesis = Utils.how_many(curr_statement, ')') if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis: all_statements.append(curr_statement) last_statement = '' else: last_statement = curr_statement return [s.strip() for s in all_statements if s.strip() != ""]
def init_db(callback): """Instantiate the database, replacing existing tables. We should only need to call this once A table, called 'company_index', of all ticker symbols (from the CSVs) will be created. For each symbol, a table (named after the symbol) of historical data will also be made """ confirm = raw_input("this will erase most tables in the database, do you wish to proceede? [y/n]: ") if confirm != 'y': return ut = Utils() dba = Accessor() all_companies = ut.get_company_list() dba.connect() #pull historical information (since 2009, or the ipo year) for all stocks, and #write them to a table (named after their ticker symbol) failures = [] for company in all_companies: symbol = company['Symbol'] ipo = company['IPOyear'] try: if 'n/a' not in ipo: prices = ut.get_historical_summary(symbol, start="{}-01-01".format(ipo)) else: prices = ut.get_historical_summary(symbol) prices['Raw_Change'] = prices['Close'] - prices['Open'] prices['Percent_Change'] = (prices['Close'] - prices['Open']) / prices['Open'] dba.write(symbol, prices, if_exists='replace') print "wrote {}".format(symbol) except: print "failed on {}".format(symbol) failures.append(symbol) all_companies = filter(lambda x: x not in failures, all_companies) #prepare the dataframe for SQL. This involves reindexing the frame to use #the ticker symbol as it's index. We have to remove the symbol field afterwards, #otherwise it will try to create a table with two Symbol fields, and MySQL will #throw an error. df = pd.DataFrame(all_companies) df.index = df['Symbol'] #make the symbols the index df = df.drop('Symbol', 1) #remove the symbol field as its already stored as the index dba.write('company_index', df[:-1], if_exists='replace') dba.close() with open('failures.csv', 'w') as f: f.write('Symbol,') for sym in failures: f.write(",{}".format(sym))
def _parse_sql_statements(cls, migration_sql): all_statements = [] last_statement = '' for statement in migration_sql.split(';'): if len(last_statement) > 0: curr_statement = '%s;%s' % (last_statement, statement) else: curr_statement = statement count = Utils.count_occurrences(curr_statement) single_quotes = count.get("'", 0) double_quotes = count.get('"', 0) left_parenthesis = count.get('(', 0) right_parenthesis = count.get(')', 0) if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis: all_statements.append(curr_statement) last_statement = '' else: last_statement = curr_statement return [ s.strip() for s in all_statements if ((s.strip() != "") and (last_statement == "")) ]
def _parse_sql_statements(cls, migration_sql): all_statements = [] last_statement = '' match_stmt = MySQL.__re_objects.match(migration_sql) if match_stmt and match_stmt.re.groups > 0: if match_stmt.group('pre'): all_statements = all_statements + MySQL._parse_sql_statements(match_stmt.group('pre')) if match_stmt.group('main'): all_statements.append(match_stmt.group('main')) if match_stmt.group('pos'): all_statements = all_statements + MySQL._parse_sql_statements(match_stmt.group('pos')) else: for statement in migration_sql.split(';'): if len(last_statement) > 0: curr_statement = '%s;%s' % (last_statement, statement) else: curr_statement = statement count = Utils.count_occurrences(curr_statement) single_quotes = count.get("'", 0) double_quotes = count.get('"', 0) left_parenthesis = count.get('(', 0) right_parenthesis = count.get(')', 0) if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis: all_statements.append(curr_statement) last_statement = '' else: last_statement = curr_statement return [s.strip() for s in all_statements if ((s.strip() != "") and (last_statement == ""))]
def filter_rec(func, iterable): ret = [] for item in iterable: if type(item) == list: ret.append(Utils.filter_rec(func, item)) else: if func(item): ret.append(item) return ret
def gen_replace_all(self): replaced_pattern = '%s|%s' % (self.PHONE_REPLACEMENT, self.URL_REPLACEMENT) for row in Utils.iter_csv(self.filename): sentence = row[self.nth] if not self.__is_valid(sentence): continue replaced = Replacer.replace_url(Replacer.replace_phone(sentence)) row[self.nth] = replaced if bool(re.search(replaced_pattern, row[self.nth])): self.logger.info('%s -> %s' % (sentence.decode('utf-8'), replaced.decode('utf-8'))) yield row
def _parse_sql_statements(self, migration_sql): all_statements = [] last_statement = '' for statement in migration_sql.split(';'): if len(last_statement) > 0: curr_statement = '%s;%s' % (last_statement, statement) else: curr_statement = statement single_quotes = Utils.how_many(curr_statement, "'") double_quotes = Utils.how_many(curr_statement, '"') left_parenthesis = Utils.how_many(curr_statement, '(') right_parenthesis = Utils.how_many(curr_statement, ')') if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis: all_statements.append(curr_statement) last_statement = '' else: last_statement = curr_statement return [s.strip() for s in all_statements if s.strip() != ""]
def update_db(callback=None, start=None): """db script to iterate over all operating companies in our database, and update them with the day's historical data.""" ut = Utils() dba = Accessor() all_companies = ut.get_company_list() start = start if start else ut.yesterday() dba.connect() for symbol in all_companies: try: new_row = ut.get_historical_summary(symbol, start=start, end=ut.today()) if callback: new_row = callback(new_row) dba.write(symbol, new_row) print "updated {}".format(symbol) except: print "failed on {}".format(symbol) dba.close()
def _parse_sql_statements(self, migration_sql): all_statements = [] last_statement = '' match_stmt = self.__re_objects.match(migration_sql) if not match_stmt: match_stmt = self.__re_anonymous.match(migration_sql) if match_stmt and match_stmt.re.groups > 0: if match_stmt.group('pre'): all_statements = all_statements + self._parse_sql_statements( match_stmt.group('pre')) if match_stmt.group('principal'): all_statements.append(match_stmt.group('principal')) if match_stmt.group('pos'): all_statements = all_statements + self._parse_sql_statements( match_stmt.group('pos')) else: for statement in migration_sql.split(';'): if len(last_statement) > 0: curr_statement = '%s;%s' % (last_statement, statement) else: curr_statement = statement single_quotes = Utils.how_many(curr_statement, "'") double_quotes = Utils.how_many(curr_statement, '"') left_parenthesis = Utils.how_many(curr_statement, '(') right_parenthesis = Utils.how_many(curr_statement, ')') if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis: all_statements.append(curr_statement) last_statement = '' else: last_statement = curr_statement return [s.strip() for s in all_statements if s.strip() != ""]
def execute_change(self, sparql_up, sparql_down, execution_log=None): """ Final Step. Execute the changes to the Database """ file_up = None file_down = None try: file_up = Utils.write_temporary_file(("set echo on;\n%s" % sparql_up), "file_up") #db = self.connect() stdout_value, stderr_value = self._run_isql(file_up, True) if len(stderr_value) > 0: #rollback file_down = Utils.write_temporary_file(("set echo on;\n%s" % sparql_down), "file_down") _, stderr_value_rollback = self._run_isql(file_down, True) if len(stderr_value_rollback) > 0: raise MigrationException("\nerror executing migration\ statement: %s\n\nRollback done\ partially: error executing rollback\ statement: %s" % (stderr_value, stderr_value_rollback)) else: raise MigrationException("\nerror executing migration\ statement: %s\n\nRollback done\ successfully!!!" % stderr_value) if execution_log: execution_log(stdout_value) finally: if file_up and os.path.exists(file_up): os.unlink(file_up) if file_down and os.path.exists(file_down): os.unlink(file_down)
def __init__(self, config_file="simple-db-migrate.conf", environment=''): # read configuration settings = Utils.get_variables_from_file(config_file) super(FileConfig, self).__init__(inital_config=settings) if environment: prefix = environment + "_" for key in self._config.keys(): if key.startswith(prefix): self.update(key[len(prefix):], self.get(key)) self.update("utc_timestamp", ast.literal_eval(str(self.get("utc_timestamp", 'False')))) migrations_dir = self.get("database_migrations_dir", None) if migrations_dir: config_dir = os.path.split(config_file)[0] self.update("database_migrations_dir", FileConfig._parse_migrations_dir(migrations_dir, config_dir))
def __init__(self, config_file="simple-db-migrate.conf", environment=''): # read configuration settings = Utils.get_variables_from_file(config_file) super(FileConfig, self).__init__(inital_config=settings) if environment: prefix = environment.lower() + "_" for key in self._config.keys(): if key.startswith(prefix): self.update(key[len(prefix):], self.get(key)) self.update("utc_timestamp", ast.literal_eval(str(self.get("utc_timestamp", 'False')))) migrations_dir = self.get("database_migrations_dir", None) if migrations_dir: config_dir = os.path.split(config_file)[0] self.update( "database_migrations_dir", FileConfig._parse_migrations_dir(migrations_dir, config_dir))
def _parse_sql_statements(self, migration_sql): all_statements = [] last_statement = '' #remove comments migration_sql = Oracle.__re_comments_multi_line.sub("\g<pre>", migration_sql) migration_sql = Oracle.__re_comments_single_line.sub("\g<pre>", migration_sql) match_stmt = Oracle.__re_objects.match(migration_sql) if not match_stmt: match_stmt = Oracle.__re_anonymous.match(migration_sql) if match_stmt and match_stmt.re.groups > 0: if match_stmt.group('pre'): all_statements = all_statements + Oracle._parse_sql_statements(match_stmt.group('pre')) if match_stmt.group('principal'): all_statements.append(match_stmt.group('principal')) if match_stmt.group('pos'): all_statements = all_statements + Oracle._parse_sql_statements(match_stmt.group('pos')) else: for statement in migration_sql.split(';'): if len(last_statement) > 0: curr_statement = '%s;%s' % (last_statement, statement) else: curr_statement = statement count = Utils.count_occurrences(curr_statement) single_quotes = count.get("'", 0) double_quotes = count.get('"', 0) left_parenthesis = count.get('(', 0) right_parenthesis = count.get(')', 0) if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis: all_statements.append(curr_statement) last_statement = '' else: last_statement = curr_statement return [s.strip() for s in all_statements if ((s.strip() != "") and (last_statement == ""))]
def _parse_sql_statements(self, migration_sql): all_statements = [] last_statement = "" # remove comments migration_sql = Oracle.__re_comments_multi_line.sub("\g<pre>", migration_sql) migration_sql = Oracle.__re_comments_single_line.sub("\g<pre>", migration_sql) match_stmt = Oracle.__re_objects.match(migration_sql) if not match_stmt: match_stmt = Oracle.__re_anonymous.match(migration_sql) if match_stmt and match_stmt.re.groups > 0: if match_stmt.group("pre"): all_statements = all_statements + Oracle._parse_sql_statements(match_stmt.group("pre")) if match_stmt.group("principal"): all_statements.append(match_stmt.group("principal")) if match_stmt.group("pos"): all_statements = all_statements + Oracle._parse_sql_statements(match_stmt.group("pos")) else: for statement in migration_sql.split(";"): if len(last_statement) > 0: curr_statement = "%s;%s" % (last_statement, statement) else: curr_statement = statement count = Utils.count_occurrences(curr_statement) single_quotes = count.get("'", 0) double_quotes = count.get('"', 0) left_parenthesis = count.get("(", 0) right_parenthesis = count.get(")", 0) if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis: all_statements.append(curr_statement) last_statement = "" else: last_statement = curr_statement return [s.strip() for s in all_statements if ((s.strip() != "") and (last_statement == ""))]
def _parse_sql_statements(cls, migration_sql): all_statements = [] last_statement = "" for statement in migration_sql.split(";"): if len(last_statement) > 0: curr_statement = "%s;%s" % (last_statement, statement) else: curr_statement = statement count = Utils.count_occurrences(curr_statement) single_quotes = count.get("'", 0) double_quotes = count.get('"', 0) left_parenthesis = count.get("(", 0) right_parenthesis = count.get(")", 0) if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis: all_statements.append(curr_statement) last_statement = "" else: last_statement = curr_statement return [s.strip() for s in all_statements if s.strip() != ""]
def extract_output(self, out_filename): with open(out_filename, 'w') as f: csv_writer = csv.writer(f, delimiter=',') for row in Progress(self.gen_replace_all(), Utils.file_length(self.filename), 10): csv_writer.writerow(row)
def filter_words(words): return Utils.filter_rec(lambda x: x.split('_')[-1] in LdaUtils.NECESSARIES, words)
def __init__(self, filename, nth=-1): self.filename = filename self.logger = Utils.logger(self.__class__.__name__, './replace.log') self.nth = nth
def filter_words(words): return Utils.filter_rec( lambda x: x.split('_')[-1] in LdaUtils.NECESSARIES, words)
def set_labels(self, sentences_csv_fname): self.labels = [int(x[0]) for x in Utils.iter_csv(sentences_csv_fname, 2).split()] return True
def __get_migrations_absolute_dir(self, config_file_path, migrations_dir): return os.path.abspath(Utils.get_path_without_config_file_name(config_file_path) + "/" + migrations_dir)
def set_labels(self, sentences_csv_fname): self.labels = [ int(x[0]) for x in Utils.iter_csv(sentences_csv_fname, 2).split() ] return True
def _generate_migration_sparql_commands(self, origin_store, destination_store): diff = (origin_store - destination_store) or [] checked = set() forward_migration = "" backward_migration = "" for subject, predicate, object_ in diff: if isinstance(subject, rdflib.term.BNode) and ( not subject in checked): checked.add(subject) query_get_blank_node = """\ prefix owl: <http://www.w3.org/2002/07/owl#> prefix xsd: <http://www.w3.org/2001/XMLSchema#> SELECT DISTINCT ?s ?p ?o WHERE {""" blank_node_as_an_object = "" triples_with_blank_node_as_object = sorted(diff.subject_predicates(subject)) for triple_subject, triple_predicate in triples_with_blank_node_as_object: query_get_blank_node = query_get_blank_node + "%s %s ?s . " % (triple_subject.n3(), triple_predicate.n3()) blank_node_as_an_object = blank_node_as_an_object + "%s %s " % ( triple_subject.n3(), triple_predicate.n3()) blank_node_as_a_subject = "" triples_with_blank_node_as_subject = sorted(diff.predicate_objects(subject)) for triple_predicate, triple_object in triples_with_blank_node_as_subject: query_get_blank_node = query_get_blank_node + "?s %s %s . " % ( triple_predicate.n3(), triple_object.n3()) blank_node_as_a_subject = blank_node_as_a_subject + "%s %s ; " % ( triple_predicate.n3(), Utils.get_normalized_n3(triple_object)) query_get_blank_node = query_get_blank_node + " ?s ?p ?o .} " blank_node_existing_triples = len(destination_store.query(query_get_blank_node)) blank_node_existed_triples = len(origin_store.query(query_get_blank_node)) blank_node_triples_changed = blank_node_existing_triples != blank_node_existed_triples if not blank_node_existing_triples or blank_node_triples_changed: forward_migration = forward_migration + \ u"\nSPARQL INSERT INTO <%s> { %s[%s] };" % ( self.__virtuoso_graph, blank_node_as_an_object, blank_node_as_a_subject) blank_node_as_a_subject = blank_node_as_a_subject[:-2] backward_migration = backward_migration + \ (u"\nSPARQL DELETE FROM <%s> { %s ?s. ?s %s } WHERE " "{ %s ?s. ?s %s };") % (self.__virtuoso_graph, blank_node_as_an_object, blank_node_as_a_subject, blank_node_as_an_object, blank_node_as_a_subject) if isinstance(subject, rdflib.term.URIRef) and \ not isinstance(object_, rdflib.term.BNode): forward_migration = forward_migration + \ u"\nSPARQL INSERT INTO <%s> {%s %s %s . };"\ % (self.__virtuoso_graph, subject.n3(), predicate.n3(), object_.n3()) backward_migration = backward_migration + \ u"\nSPARQL DELETE FROM <%s> {%s %s %s . };" % (self.__virtuoso_graph, subject.n3(), predicate.n3(), Utils.get_normalized_n3(object_)) return forward_migration, backward_migration