def get_company(company: Company, directory_import=False): company.standardise() company_id = get_company_id(company.name, company.ticker, company.isin) possible_companies = [] if company_id is None and not directory_import: companies = get_all_companies_info() possible_companies = company.get_possible_matches(companies) elif company_id is not None: update_company(company_id, company) return company_id, possible_companies
def save_value_to_database(name, isin, value, end_date, overlapping_info: {}, unification_info: [], save: bool, override: bool): company = Company(name=name, isin=isin) company_id, possible_companies = get_company(company, save or override) if company_id is None and possible_companies: unification_info.append( common.Utils.unification_info.GPWUnificationInfo( company=company, possible_matches=possible_companies, value=value, end_date=str(end_date))) else: if company_id is None and not possible_companies: company_id = insert_company(company) table_name = 'MarketValues' columns = ['CompanyID', 'Period end', 'Market value'] values = [company_id, str(end_date), value] if save: insert_values(table_name=table_name, columns=columns, values=values) elif override: replace_values(table_name=table_name, columns=columns, values=values) else: try: insert_market_value(company_id, value, end_date) except IntegrityError: add_overlapping_info(overlapping_info, company_id, name, value, end_date)
def update_company(connection, company_id, company: Company): if company.ekd_section is not None and company.ekd_class is not None: company.ekd_section = get_ekd_section_id_from_value(ekd_section=company.ekd_section) company.ekd_class = get_ekd_class_id_from_value(ekd_class=company.ekd_class) values = company.isin, company.ticker, company.bloomberg, company.ekd_section, company.ekd_class, company_id command = '''UPDATE OR IGNORE Company SET ISIN = ifnull(ISIN, ?), Ticker = ifnull(Ticker, ?), Bloomberg = ifnull(Bloomberg, ?), EKDSectionID = ifnull(EKDSectionID, ?), EKDClassID = ifnull(EKDClassID, ?) WHERE ID = ?''' with connection: connection.execute(command, values)
def from_json(ui_json): class_from_data_type = { 'gpw': GPWUnificationInfo, 'stooq': StooqUnificationInfo, 'notoria': NotoriaUnificationInfo } ui_dict = json.loads(ui_json) ui_dict['company'] = Company(**ui_dict['company']) return class_from_data_type[ui_dict['data_type']](**ui_dict)
def insert_company(connection, company: Company): if company.ekd_section is not None and company.ekd_class is not None: company.ekd_section = get_ekd_section_id_from_value( ekd_section=company.ekd_section) company.ekd_class = get_ekd_class_id_from_value( ekd_class=company.ekd_class) company.standardise() values = company.name, company.ticker, company.isin, company.bloomberg, company.ekd_section, company.ekd_class command = '''INSERT INTO Company(Name, Ticker, ISIN, Bloomberg, EKDSectionID, EKDClassID) VALUES (?, ?, ?, ?, ?, ?)''' with connection: cursor = connection.cursor() cursor.execute(command, values) company_id = cursor.lastrowid return company_id
def parse_company(self, path): def parse_ekd(ekd): parsed_ekd = ekd.split('.') if len(parsed_ekd) == 2: return parsed_ekd[0], parsed_ekd[1] else: return None, None excel_sheet = get_sheet(path, 'Info') attribute_column = 0 value_column = 1 name_row = 2 isin_row = 17 isin_column = 3 ticker_row = 12 bloomberg_row = 16 ekd_row = 25 if excel_sheet.cell(name_row, attribute_column).value == 'Nazwa': company_name = excel_sheet.cell(name_row, value_column).value else: raise ParseError(path, '(A3=Nazwa) of company should be in B3 cell') if excel_sheet.cell(isin_row, isin_column).value == 'ISIN': isin = excel_sheet.cell(isin_row, isin_column + 1).value else: raise ParseError(path, '(D18=ISIN) of company should be in E18 cell') if excel_sheet.cell(ticker_row, attribute_column).value == 'TICKER': company_ticker = excel_sheet.cell(ticker_row, value_column).value else: raise ParseError(path, '(A13=TICKER) of company should be in B13 cell') if excel_sheet.cell(bloomberg_row, attribute_column).value == 'Bloomberg': company_bloomberg = excel_sheet.cell(bloomberg_row, value_column).value else: raise ParseError( path, '(A17=Bloomberg) of company should be in B17 cell') if excel_sheet.cell(ekd_row, attribute_column).value == 'EKD 1': company_ekd = excel_sheet.cell(ekd_row, value_column).value else: raise ParseError(path, '(A26=EKD 1) of company should be in B26 cell') ekd_section, ekd_class = parse_ekd(company_ekd) if ekd_section and ekd_class: insert_ekd_data(ekd_section, ekd_class) return Company(name=company_name, ticker=company_ticker, isin=isin, bloomberg=company_bloomberg, ekd_section=ekd_section, ekd_class=ekd_class)
def download_all_companies(self, user_date): day, month, year = user_date.day, user_date.month, user_date.year interval_id = get_interval_id_from_shortcut('d') overlapping_stock = {} i = 1 frames = [] frames_change = [] found = False while True: url = self._all_companies_date_ulr_base.format(number=i, day=day, month=month, year=year) site_html = requests.get(url).content.decode("utf-8") try: df_list = pd.read_html(site_html) except ValueError: break except lxml.etree.ParserError: break if len(df_list) == 0: break for df in df_list: if 'Symbol' in df.columns and 'Name' in df.columns and 'Last' in df.columns: if not df.empty and not df.Symbol.apply(lambda x: bool( self._tables_filter.match(str(x)))).any(): frames.append(df) found = True if not found: break i += 1 found = False i = 1 found = False while True: url_change = self._all_companies_date_ulr_change.format( number=i, day=day, month=month, year=year) site_html_change = requests.get(url_change).content try: df_list_change = pd.read_html(site_html_change) except ValueError: break except lxml.etree.ParserError: break if len(df_list_change) == 0: break for df in df_list_change: if 'Symbol' in df.columns and 'Name' in df.columns and 'Change' in df.columns: if not df.empty and not df.Symbol.apply(lambda x: bool( self._tables_filter.match(str(x)))).any(): frames_change.append(df) found = True if not found: break i += 1 found = False if frames is None or len(frames) == 0: raise ParseError(url, "No stock quotes found for given date") if frames_change is None or len(frames_change) == 0: raise ParseError(url_change, "No stock quotes found for given date") result = pd.concat(frames) result_change = pd.concat(frames_change) result_change = result_change[['Symbol', 'Change.1']] result = result.join(result_change.set_index('Symbol'), on='Symbol') result = result.where(result.notnull(), None) try: result['Volume'] = result['Volume'].apply( lambda x: _convert_kmb(x)) result['Turnover'] = result['Turnover'].apply( lambda x: _convert_kmb(x)) except ValueError: raise ParseError(url, 'Wrong data in Volume/Turnover column') unification_info = [] for index, row in result.iterrows(): parsed_data = date(year, month, day) ticker = row['Symbol'].upper() company = Company(name=row['Name'], ticker=ticker) company_id, possible_companies = get_company(company) if company_id is None and not possible_companies: company_id = insert_company(company) if row['Last'] is None: continue stock_quotes = [ company_id, str(parsed_data), row['Last'], row['Change.1'], row['Open'], row['High'], row['Low'], row['Volume'], row['Turnover'], interval_id ] if possible_companies: unification_info.append( StooqUnificationInfo(company=company, possible_matches=possible_companies, data=[stock_quotes])) else: stock_quotes_to_insert = (company_id, parsed_data, float_or_none(row['Last']), float_or_none(row['Change.1']), float_or_none(row['Open']), float_or_none(row['High']), float_or_none(row['Low']), int_or_none(row['Volume']), int_or_none(row['Turnover']), interval_id) try: insert_stock_quotes(stock_quotes_to_insert) except IntegrityError: if not exactly_same_stock_quote(stock_quotes_to_insert): if not overlapping_stock: self._init_overlapping_info(overlapping_stock) overlapping_stock["values"].append(stock_quotes) if unification_info: if overlapping_stock: result = ParsingResult(unification_info=unification_info, overlapping_info=overlapping_stock) else: result = ParsingResult(unification_info=unification_info) return result if overlapping_stock: raise UniqueError(overlapping_stock)
def download_company(self, company, start_date, end_date, interval='d'): # no turnover start_day, start_month, start_year = start_date.day, start_date.month, start_date.year end_day, end_month, end_year = end_date.day, end_date.month, end_date.year i = 1 frames = [] found = False interval_id = get_interval_id_from_shortcut(interval) ticker = company.upper() overlapping_stock = {} url = self._company_url_base.format(number=1, company=ticker, day1=start_day, month1=start_month, year1=start_year, day2=end_day, month2=end_month, year2=end_year, interval=interval) site_html = requests.get(url).content.decode("utf-8") company_name = re.search('Historical data: (.*) \(', str(site_html)).group(1) company = Company(name=company_name, ticker=ticker) company_id, possible_companies = get_company(company) if company_id is None and not possible_companies: company_id = insert_company(company) unification_info = StooqUnificationInfo( company=company, possible_matches=possible_companies, data=[]) while True: url = self._company_url_base.format(number=i, company=ticker, day1=start_day, month1=start_month, year1=start_year, day2=end_day, month2=end_month, year2=end_year, interval=interval) site_html = requests.get(url).content.decode("utf-8") try: df_list = pd.read_html(site_html) except ValueError: break except lxml.etree.ParserError: break if len(df_list) == 0: break for df in df_list: if 'Date' in df.columns and 'Close' in df.columns: if not df.empty and not df.Date.isnull().any(): frames.append(df) found = True if not found: break i += 1 found = False if frames is None or len(frames) == 0: raise ParseError(url, "No stock quotes found for given data") result = pd.concat(frames) result = result[::-1] result = result.where(result.notnull(), None) try: result['Volume'] = result['Volume'].apply( lambda x: _convert_kmb(x)) except ValueError: raise ParseError(url, 'Wrong data in Volume column') for index, row in result.iterrows(): if pd.isnull(row['No.']): continue if row['Close'] is None: continue try: parsed_date = _parse_date(row['Date']) except (ValueError, TypeError): raise ParseError(url, 'Wrong date format') stock_quotes = [ company_id, str(parsed_date), row['Close'], row['Change.1'], row['Open'], row['High'], row['Low'], row['Volume'], None, interval_id ] if possible_companies: unification_info.add_data(stock_quotes) else: stock_quotes_to_insert = (company_id, parsed_date, float_or_none(row['Close']), float_or_none(row['Change.1']), float_or_none(row['Open']), float_or_none(row['High']), float_or_none(row['Low']), int_or_none(row['Volume']), None, interval_id) try: insert_stock_quotes(stock_quotes_to_insert) except IntegrityError: if not exactly_same_stock_quote(stock_quotes_to_insert): if not overlapping_stock: self._init_overlapping_info(overlapping_stock) overlapping_stock["values"].append(stock_quotes) if unification_info.data: return ParsingResult(unification_info=[unification_info]) if overlapping_stock: raise UniqueError(overlapping_stock)