Exemplo n.º 1
0
    def _parse_sql_statements(cls, migration_sql):
        all_statements = []
        last_statement = ''
        
        for statement in migration_sql.split(';'):
            if len(last_statement) > 0:
                curr_statement = '%s;%s' % (last_statement, statement)
            else:
                curr_statement = statement
            
            normalized_statement = Utils.normalize_sql(curr_statement)
            
            count = Utils.count_occurrences(normalized_statement)
            single_quotes = count.get("'", 0)
            double_quotes = count.get('"', 0)
            left_parenthesis = count.get('(', 0)
            right_parenthesis = count.get(')', 0)

            if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
                all_statements.append(curr_statement)
                last_statement = ''
            else:
                last_statement = curr_statement

        return [s.strip() for s in all_statements if ((s.strip() != "") and (last_statement == ""))]
Exemplo n.º 2
0
    def _parse_sql_statements(self, migration_sql):
        all_statements = []
        last_statement = ''
        
        match_stmt = self.__re_objects.match(migration_sql)
        if not match_stmt:
            match_stmt = self.__re_anonymous.match(migration_sql)
            
        if match_stmt and match_stmt.re.groups > 0:
            if match_stmt.group('pre'):
                all_statements = all_statements + self._parse_sql_statements(match_stmt.group('pre'))
            if match_stmt.group('principal'):
                all_statements.append(match_stmt.group('principal'))
            if match_stmt.group('pos'):
                all_statements = all_statements + self._parse_sql_statements(match_stmt.group('pos'))

        else:
            for statement in migration_sql.split(';'):
                if len(last_statement) > 0:
                    curr_statement = '%s;%s' % (last_statement, statement)
                else:
                    curr_statement = statement
                
                single_quotes = Utils.how_many(curr_statement, "'")
                double_quotes = Utils.how_many(curr_statement, '"')
                left_parenthesis = Utils.how_many(curr_statement, '(')
                right_parenthesis = Utils.how_many(curr_statement, ')')
                
                if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
                    all_statements.append(curr_statement)
                    last_statement = ''
                else:
                    last_statement = curr_statement
            
        return [s.strip() for s in all_statements if s.strip() != ""]
Exemplo n.º 3
0
def init_db(callback):
	"""Instantiate the database, replacing existing tables. We should only need to call this once

	A table, called 'company_index', of all ticker symbols (from the CSVs) will be created.
	For each symbol, a table (named after the symbol) of historical data will also be made
	"""

	confirm = raw_input("this will erase most tables in the database, do you wish to proceede? [y/n]: ")

	if confirm != 'y':
		return

	ut = Utils()
	dba = Accessor()
	all_companies = ut.get_company_list()

	dba.connect()

	#pull historical information (since 2009, or the ipo year) for all stocks, and 
	#write them to a table (named after their ticker symbol)
	failures = []
	for company in all_companies:
		symbol = company['Symbol']
		ipo = company['IPOyear']

		try:
			if 'n/a' not in ipo:
				prices = ut.get_historical_summary(symbol, start="{}-01-01".format(ipo))
			else:
				prices = ut.get_historical_summary(symbol)

			prices['Raw_Change'] = prices['Close'] - prices['Open']
			prices['Percent_Change'] = (prices['Close'] - prices['Open']) / prices['Open']
			dba.write(symbol, prices, if_exists='replace')
			print "wrote {}".format(symbol)
		except:
			print "failed on {}".format(symbol)
			failures.append(symbol)

	all_companies = filter(lambda x: x not in failures, all_companies)
	#prepare the dataframe for SQL. This involves reindexing the frame to use
	#the ticker symbol as it's index. We have to remove the symbol field afterwards,
	#otherwise it will try to create a table with two Symbol fields, and MySQL will
	#throw an error.
	df = pd.DataFrame(all_companies)
	df.index = df['Symbol'] #make the symbols the index
	df = df.drop('Symbol', 1) #remove the symbol field as its already stored as the index
	dba.write('company_index', df[:-1], if_exists='replace')

	dba.close()
	
	with open('failures.csv', 'w') as f:
		f.write('Symbol,')
		for sym in failures:
			f.write(",{}".format(sym))
Exemplo n.º 4
0
    def _parse_sql_statements(cls, migration_sql):
        all_statements = []
        last_statement = ''

        for statement in migration_sql.split(';'):
            if len(last_statement) > 0:
                curr_statement = '%s;%s' % (last_statement, statement)
            else:
                curr_statement = statement

            count = Utils.count_occurrences(curr_statement)
            single_quotes = count.get("'", 0)
            double_quotes = count.get('"', 0)
            left_parenthesis = count.get('(', 0)
            right_parenthesis = count.get(')', 0)

            if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
                all_statements.append(curr_statement)
                last_statement = ''
            else:
                last_statement = curr_statement

        return [
            s.strip() for s in all_statements
            if ((s.strip() != "") and (last_statement == ""))
        ]
Exemplo n.º 5
0
    def _parse_sql_statements(cls, migration_sql):
        all_statements = []
        last_statement = ''

        match_stmt = MySQL.__re_objects.match(migration_sql)

        if match_stmt and match_stmt.re.groups > 0:
            if match_stmt.group('pre'):
                all_statements = all_statements + MySQL._parse_sql_statements(match_stmt.group('pre'))
            if match_stmt.group('main'):
                all_statements.append(match_stmt.group('main'))
            if match_stmt.group('pos'):
                all_statements = all_statements + MySQL._parse_sql_statements(match_stmt.group('pos'))

        else:
            for statement in migration_sql.split(';'):
                if len(last_statement) > 0:
                    curr_statement = '%s;%s' % (last_statement, statement)
                else:
                    curr_statement = statement

                count = Utils.count_occurrences(curr_statement)
                single_quotes = count.get("'", 0)
                double_quotes = count.get('"', 0)
                left_parenthesis = count.get('(', 0)
                right_parenthesis = count.get(')', 0)

                if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
                    all_statements.append(curr_statement)
                    last_statement = ''
                else:
                    last_statement = curr_statement

        return [s.strip() for s in all_statements if ((s.strip() != "") and (last_statement == ""))]
Exemplo n.º 6
0
    def _parse_sql_statements(cls, migration_sql):
        all_statements = []
        last_statement = ''

        match_stmt = MySQL.__re_objects.match(migration_sql)

        if match_stmt and match_stmt.re.groups > 0:
            if match_stmt.group('pre'):
                all_statements = all_statements + MySQL._parse_sql_statements(match_stmt.group('pre'))
            if match_stmt.group('main'):
                all_statements.append(match_stmt.group('main'))
            if match_stmt.group('pos'):
                all_statements = all_statements + MySQL._parse_sql_statements(match_stmt.group('pos'))

        else:
            for statement in migration_sql.split(';'):
                if len(last_statement) > 0:
                    curr_statement = '%s;%s' % (last_statement, statement)
                else:
                    curr_statement = statement

                count = Utils.count_occurrences(curr_statement)
                single_quotes = count.get("'", 0)
                double_quotes = count.get('"', 0)
                left_parenthesis = count.get('(', 0)
                right_parenthesis = count.get(')', 0)

                if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
                    all_statements.append(curr_statement)
                    last_statement = ''
                else:
                    last_statement = curr_statement

        return [s.strip() for s in all_statements if ((s.strip() != "") and (last_statement == ""))]
Exemplo n.º 7
0
 def filter_rec(func, iterable):
     ret = []
     for item in iterable:
         if type(item) == list:
             ret.append(Utils.filter_rec(func, item))
         else:
             if func(item):
                 ret.append(item)
     return ret
Exemplo n.º 8
0
 def filter_rec(func, iterable):
     ret = []
     for item in iterable:
         if type(item) == list:
             ret.append(Utils.filter_rec(func, item))
         else:
             if func(item):
                 ret.append(item)
     return ret
Exemplo n.º 9
0
 def gen_replace_all(self):
     replaced_pattern = '%s|%s' % (self.PHONE_REPLACEMENT, self.URL_REPLACEMENT)
     for row in Utils.iter_csv(self.filename):
         sentence = row[self.nth]
         if not self.__is_valid(sentence):
             continue
         replaced = Replacer.replace_url(Replacer.replace_phone(sentence))
         row[self.nth] = replaced
         if bool(re.search(replaced_pattern, row[self.nth])):
             self.logger.info('%s -> %s' % (sentence.decode('utf-8'), replaced.decode('utf-8')))
         yield row
Exemplo n.º 10
0
    def _parse_sql_statements(self, migration_sql):
        all_statements = []
        last_statement = ''

        for statement in migration_sql.split(';'):
            if len(last_statement) > 0:
                curr_statement = '%s;%s' % (last_statement, statement)
            else:
                curr_statement = statement

            single_quotes = Utils.how_many(curr_statement, "'")
            double_quotes = Utils.how_many(curr_statement, '"')
            left_parenthesis = Utils.how_many(curr_statement, '(')
            right_parenthesis = Utils.how_many(curr_statement, ')')

            if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
                all_statements.append(curr_statement)
                last_statement = ''
            else:
                last_statement = curr_statement

        return [s.strip() for s in all_statements if s.strip() != ""]
Exemplo n.º 11
0
 def _parse_sql_statements(self, migration_sql):
     all_statements = []
     last_statement = ''
     
     for statement in migration_sql.split(';'):
         if len(last_statement) > 0:
             curr_statement = '%s;%s' % (last_statement, statement)
         else:
             curr_statement = statement
         
         single_quotes = Utils.how_many(curr_statement, "'")
         double_quotes = Utils.how_many(curr_statement, '"')
         left_parenthesis = Utils.how_many(curr_statement, '(')
         right_parenthesis = Utils.how_many(curr_statement, ')')
         
         if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
             all_statements.append(curr_statement)
             last_statement = ''
         else:
             last_statement = curr_statement
         
     return [s.strip() for s in all_statements if s.strip() != ""]
Exemplo n.º 12
0
def update_db(callback=None, start=None):
	"""db script to iterate over all operating companies in our database, and update them with 
	the day's historical data."""
	ut = Utils()
	dba = Accessor()
	all_companies = ut.get_company_list()
	start = start if start else ut.yesterday()

	dba.connect()

	for symbol in all_companies:
		try:
			new_row = ut.get_historical_summary(symbol, start=start, end=ut.today())
			if callback:
				new_row = callback(new_row)
			dba.write(symbol, new_row)
			print "updated {}".format(symbol)
		except:
			print "failed on {}".format(symbol)


	dba.close()
Exemplo n.º 13
0
    def _parse_sql_statements(self, migration_sql):
        all_statements = []
        last_statement = ''

        match_stmt = self.__re_objects.match(migration_sql)
        if not match_stmt:
            match_stmt = self.__re_anonymous.match(migration_sql)

        if match_stmt and match_stmt.re.groups > 0:
            if match_stmt.group('pre'):
                all_statements = all_statements + self._parse_sql_statements(
                    match_stmt.group('pre'))
            if match_stmt.group('principal'):
                all_statements.append(match_stmt.group('principal'))
            if match_stmt.group('pos'):
                all_statements = all_statements + self._parse_sql_statements(
                    match_stmt.group('pos'))

        else:
            for statement in migration_sql.split(';'):
                if len(last_statement) > 0:
                    curr_statement = '%s;%s' % (last_statement, statement)
                else:
                    curr_statement = statement

                single_quotes = Utils.how_many(curr_statement, "'")
                double_quotes = Utils.how_many(curr_statement, '"')
                left_parenthesis = Utils.how_many(curr_statement, '(')
                right_parenthesis = Utils.how_many(curr_statement, ')')

                if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
                    all_statements.append(curr_statement)
                    last_statement = ''
                else:
                    last_statement = curr_statement

        return [s.strip() for s in all_statements if s.strip() != ""]
    def execute_change(self, sparql_up, sparql_down, execution_log=None):
        """ Final Step. Execute the changes to the Database """

        file_up = None
        file_down = None
        try:
            file_up = Utils.write_temporary_file(("set echo on;\n%s" %
                                                                    sparql_up),
                                                 "file_up")

            #db = self.connect()
            stdout_value, stderr_value = self._run_isql(file_up, True)
            if len(stderr_value) > 0:
                #rollback
                file_down = Utils.write_temporary_file(("set echo on;\n%s" %
                                                                sparql_down),
                                                       "file_down")
                _, stderr_value_rollback = self._run_isql(file_down, True)
                if len(stderr_value_rollback) > 0:
                    raise MigrationException("\nerror executing migration\
                                        statement: %s\n\nRollback done\
                                        partially: error executing rollback\
                                        statement: %s" % (stderr_value,
                                                        stderr_value_rollback))
                else:
                    raise MigrationException("\nerror executing migration\
                                             statement: %s\n\nRollback done\
                                             successfully!!!" % stderr_value)

            if execution_log:
                execution_log(stdout_value)
        finally:
            if file_up and os.path.exists(file_up):
                os.unlink(file_up)

            if file_down and os.path.exists(file_down):
                os.unlink(file_down)
Exemplo n.º 15
0
    def __init__(self, config_file="simple-db-migrate.conf", environment=''):
        # read configuration
        settings = Utils.get_variables_from_file(config_file)

        super(FileConfig, self).__init__(inital_config=settings)

        if environment:
            prefix = environment + "_"
            for key in self._config.keys():
                if key.startswith(prefix):
                    self.update(key[len(prefix):], self.get(key))

        self.update("utc_timestamp", ast.literal_eval(str(self.get("utc_timestamp", 'False'))))

        migrations_dir = self.get("database_migrations_dir", None)
        if migrations_dir:
            config_dir = os.path.split(config_file)[0]
            self.update("database_migrations_dir", FileConfig._parse_migrations_dir(migrations_dir, config_dir))
Exemplo n.º 16
0
    def __init__(self, config_file="simple-db-migrate.conf", environment=''):
        # read configuration
        settings = Utils.get_variables_from_file(config_file)

        super(FileConfig, self).__init__(inital_config=settings)

        if environment:
            prefix = environment.lower() + "_"
            for key in self._config.keys():
                if key.startswith(prefix):
                    self.update(key[len(prefix):], self.get(key))

        self.update("utc_timestamp",
                    ast.literal_eval(str(self.get("utc_timestamp", 'False'))))

        migrations_dir = self.get("database_migrations_dir", None)
        if migrations_dir:
            config_dir = os.path.split(config_file)[0]
            self.update(
                "database_migrations_dir",
                FileConfig._parse_migrations_dir(migrations_dir, config_dir))
Exemplo n.º 17
0
    def _parse_sql_statements(self, migration_sql):
        all_statements = []
        last_statement = ''

        #remove comments
        migration_sql = Oracle.__re_comments_multi_line.sub("\g<pre>", migration_sql)
        migration_sql = Oracle.__re_comments_single_line.sub("\g<pre>", migration_sql)

        match_stmt = Oracle.__re_objects.match(migration_sql)
        if not match_stmt:
            match_stmt = Oracle.__re_anonymous.match(migration_sql)

        if match_stmt and match_stmt.re.groups > 0:
            if match_stmt.group('pre'):
                all_statements = all_statements + Oracle._parse_sql_statements(match_stmt.group('pre'))
            if match_stmt.group('principal'):
                all_statements.append(match_stmt.group('principal'))
            if match_stmt.group('pos'):
                all_statements = all_statements + Oracle._parse_sql_statements(match_stmt.group('pos'))

        else:
            for statement in migration_sql.split(';'):
                if len(last_statement) > 0:
                    curr_statement = '%s;%s' % (last_statement, statement)
                else:
                    curr_statement = statement

                count = Utils.count_occurrences(curr_statement)
                single_quotes = count.get("'", 0)
                double_quotes = count.get('"', 0)
                left_parenthesis = count.get('(', 0)
                right_parenthesis = count.get(')', 0)

                if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
                    all_statements.append(curr_statement)
                    last_statement = ''
                else:
                    last_statement = curr_statement

        return [s.strip() for s in all_statements if ((s.strip() != "") and (last_statement == ""))]
Exemplo n.º 18
0
    def _parse_sql_statements(self, migration_sql):
        all_statements = []
        last_statement = ""

        # remove comments
        migration_sql = Oracle.__re_comments_multi_line.sub("\g<pre>", migration_sql)
        migration_sql = Oracle.__re_comments_single_line.sub("\g<pre>", migration_sql)

        match_stmt = Oracle.__re_objects.match(migration_sql)
        if not match_stmt:
            match_stmt = Oracle.__re_anonymous.match(migration_sql)

        if match_stmt and match_stmt.re.groups > 0:
            if match_stmt.group("pre"):
                all_statements = all_statements + Oracle._parse_sql_statements(match_stmt.group("pre"))
            if match_stmt.group("principal"):
                all_statements.append(match_stmt.group("principal"))
            if match_stmt.group("pos"):
                all_statements = all_statements + Oracle._parse_sql_statements(match_stmt.group("pos"))

        else:
            for statement in migration_sql.split(";"):
                if len(last_statement) > 0:
                    curr_statement = "%s;%s" % (last_statement, statement)
                else:
                    curr_statement = statement

                count = Utils.count_occurrences(curr_statement)
                single_quotes = count.get("'", 0)
                double_quotes = count.get('"', 0)
                left_parenthesis = count.get("(", 0)
                right_parenthesis = count.get(")", 0)

                if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
                    all_statements.append(curr_statement)
                    last_statement = ""
                else:
                    last_statement = curr_statement

        return [s.strip() for s in all_statements if ((s.strip() != "") and (last_statement == ""))]
Exemplo n.º 19
0
    def _parse_sql_statements(cls, migration_sql):
        all_statements = []
        last_statement = ""

        for statement in migration_sql.split(";"):
            if len(last_statement) > 0:
                curr_statement = "%s;%s" % (last_statement, statement)
            else:
                curr_statement = statement

            count = Utils.count_occurrences(curr_statement)
            single_quotes = count.get("'", 0)
            double_quotes = count.get('"', 0)
            left_parenthesis = count.get("(", 0)
            right_parenthesis = count.get(")", 0)

            if single_quotes % 2 == 0 and double_quotes % 2 == 0 and left_parenthesis == right_parenthesis:
                all_statements.append(curr_statement)
                last_statement = ""
            else:
                last_statement = curr_statement

        return [s.strip() for s in all_statements if s.strip() != ""]
Exemplo n.º 20
0
 def extract_output(self, out_filename):
     with open(out_filename, 'w') as f:
         csv_writer = csv.writer(f, delimiter=',')
         for row in Progress(self.gen_replace_all(), Utils.file_length(self.filename), 10):
             csv_writer.writerow(row)
Exemplo n.º 21
0
 def filter_words(words):
     return Utils.filter_rec(lambda x: x.split('_')[-1] in LdaUtils.NECESSARIES, words)
Exemplo n.º 22
0
 def __init__(self, filename, nth=-1):
     self.filename = filename
     self.logger = Utils.logger(self.__class__.__name__, './replace.log')
     self.nth = nth
Exemplo n.º 23
0
 def filter_words(words):
     return Utils.filter_rec(
         lambda x: x.split('_')[-1] in LdaUtils.NECESSARIES, words)
Exemplo n.º 24
0
 def set_labels(self, sentences_csv_fname):
     self.labels = [int(x[0]) for x in Utils.iter_csv(sentences_csv_fname, 2).split()]
     return True
Exemplo n.º 25
0
 def __get_migrations_absolute_dir(self, config_file_path, migrations_dir):
     return os.path.abspath(Utils.get_path_without_config_file_name(config_file_path) + "/" + migrations_dir)
Exemplo n.º 26
0
 def set_labels(self, sentences_csv_fname):
     self.labels = [
         int(x[0]) for x in Utils.iter_csv(sentences_csv_fname, 2).split()
     ]
     return True
Exemplo n.º 27
0
    def _generate_migration_sparql_commands(self, origin_store,
                                            destination_store):
        diff = (origin_store - destination_store) or []
        checked = set()
        forward_migration = ""
        backward_migration = ""

        for subject, predicate, object_ in diff:

           if isinstance(subject, rdflib.term.BNode) and (
                                                    not subject in checked):
                checked.add(subject)

                query_get_blank_node = """\
                prefix owl: <http://www.w3.org/2002/07/owl#>
                prefix xsd: <http://www.w3.org/2001/XMLSchema#>
                SELECT DISTINCT ?s ?p ?o WHERE
                {"""

                blank_node_as_an_object = ""
                triples_with_blank_node_as_object = sorted(diff.subject_predicates(subject))
                for triple_subject, triple_predicate in triples_with_blank_node_as_object:
                    query_get_blank_node = query_get_blank_node + "%s %s ?s . " % (triple_subject.n3(),
                                                         triple_predicate.n3())
                    blank_node_as_an_object = blank_node_as_an_object + "%s %s " % (
                                                              triple_subject.n3(),
                                                              triple_predicate.n3())

                blank_node_as_a_subject = ""
                triples_with_blank_node_as_subject = sorted(diff.predicate_objects(subject))
                for triple_predicate, triple_object in triples_with_blank_node_as_subject:
                    query_get_blank_node = query_get_blank_node + "?s %s %s . " % (
                        triple_predicate.n3(), triple_object.n3())
                    blank_node_as_a_subject = blank_node_as_a_subject + "%s %s ; " % (
                        triple_predicate.n3(), Utils.get_normalized_n3(triple_object))

                query_get_blank_node = query_get_blank_node + " ?s ?p ?o .} "

                blank_node_existing_triples = len(destination_store.query(query_get_blank_node))
                blank_node_existed_triples = len(origin_store.query(query_get_blank_node))

                blank_node_triples_changed = blank_node_existing_triples != blank_node_existed_triples

                if not blank_node_existing_triples or blank_node_triples_changed:
                    forward_migration = forward_migration + \
                        u"\nSPARQL INSERT INTO <%s> { %s[%s] };" % (
                                                            self.__virtuoso_graph,
                                                            blank_node_as_an_object,
                                                            blank_node_as_a_subject)
                    blank_node_as_a_subject = blank_node_as_a_subject[:-2]

                    backward_migration = backward_migration + \
                    (u"\nSPARQL DELETE FROM <%s> { %s ?s. ?s %s } WHERE "
                    "{ %s ?s. ?s %s };") % (self.__virtuoso_graph, blank_node_as_an_object,
                                           blank_node_as_a_subject,
                                           blank_node_as_an_object,
                                           blank_node_as_a_subject)

           if isinstance(subject, rdflib.term.URIRef) and \
                    not isinstance(object_, rdflib.term.BNode):
                forward_migration = forward_migration + \
                                u"\nSPARQL INSERT INTO <%s> {%s %s %s . };"\
                                % (self.__virtuoso_graph, subject.n3(), predicate.n3(),
                                   object_.n3())
                backward_migration = backward_migration + \
                    u"\nSPARQL DELETE FROM <%s> {%s %s %s . };" % (self.__virtuoso_graph,
                                                            subject.n3(),
                                                            predicate.n3(),
                                                            Utils.get_normalized_n3(object_))

        return forward_migration, backward_migration