def __get_portfolio_url(self, portfolio_name): """ Private function used to return the portfolio url from a given id/name. """ # If the user has provided an ID (Portfolio ID is always an int) if isinstance(portfolio_name, int): # Raise error for invalid portfolio ID if not len(str(portfolio_name)) == PORTFOLIO_DIGIT_COUNT: raise InvalidPortfolioID(portfolio_name) else: return http_request_get( url=f"{PORTFOLIO_URL}?pid={portfolio_name}", session=self._session, parse=False, ) else: # else the user has passed a name # We remove the first element, since it's redundant for portfolio in html.fromstring( self._page_content).cssselect("option")[1:]: if portfolio.text == portfolio_name: return http_request_get( url=f"{PORTFOLIO_URL}?pid={portfolio.get('value')}", session=self._session, parse=False, ) # Raise Non-existing PortfolioName if none of the names match raise NonexistentPortfolioName(portfolio_name)
def get_crypto(pair): """ :param pair: crypto pair :return: dictionary """ page_parsed, _ = http_request_get(url=CRYPTO_URL, parse=True) page_html, _ = http_request_get(url=CRYPTO_URL, parse=False) crypto_headers = page_parsed.cssselect('tr[valign="middle"]')[0].xpath('td//text()') crypto_table_data = get_table(page_html, crypto_headers) return crypto_table_data[pair]
def __search_screener(self): """ Private function used to return data from the FinViz screener. """ self._page_content, self._url = http_request_get( 'https://finviz.com/screener.ashx', payload={ 'v': self._table, 't': ','.join(self._tickers), 'f': ','.join(self._filters), 'o': self._order, 's': self._signal, 'c': ','.join(self._custom) }) self._rows = self.__check_rows() self.headers = self.__get_table_headers() page_urls = scrape.get_page_urls(self._page_content, self._rows, self._url) pages_data = sequential_data_scrape(scrape.get_table, page_urls, self._delay, self.headers, self._rows) data = [] for page in pages_data: for row in page: data.append(row) return data
def __init__(self, email, password, portfolio=None): """ Logs in to FinViz and send a GET request to the portfolio. """ payload = {"email": email, "password": password} # Create a session and log in by sending a POST request self._session = requests.session() auth_response = self._session.post( LOGIN_URL, data=payload, headers={"User-Agent": generate_user_agent()}) if not auth_response.ok: # If the post request wasn't successful auth_response.raise_for_status() # Get the parsed HTML and the URL of the base portfolio page self._page_content, self.portfolio_url = http_request_get( url=PORTFOLIO_URL, session=self._session, parse=False) # If the user has not created a portfolio it redirects the request to <url>?v=2) self.created = True if self.portfolio_url == f"{PORTFOLIO_URL}?v=2": self.created = False if self.created: if portfolio: self._page_content, _ = self.__get_portfolio_url(portfolio) self.data = get_table(self._page_content, PORTFOLIO_HEADERS)
def __search_screener(self): """ Private function used to return data from the FinViz screener. """ self._page_content, self._url = http_request_get( "https://finviz.com/screener.ashx", payload={ "v": self._table, "t": ",".join(self._tickers), "f": ",".join(self._filters), "o": self._order, "s": self._signal, "c": ",".join(self._custom), }, ) self._rows = self.__check_rows() self.headers = self.__get_table_headers() pages_data = sequential_data_scrape( scrape.get_table, scrape.get_page_urls(self._page_content, self._rows, self._url), self._delay, self.headers, self._rows, ) data = [] for page in pages_data: for row in page: data.append(row) return data
def get_page(ticker): global STOCK_PAGE if ticker not in STOCK_PAGE: STOCK_PAGE[ticker], _ = http_request_get(url=STOCK_URL, payload={"t": ticker}, parse=True)
def get_stock(ticker): """ Returns a dictionary containing stock data. :param ticker: stock symbol :type ticker: str :return dict """ data = {} page_parsed, _ = http_request_get(url=STOCK_URL, payload={'t': ticker}, parse=True) all_rows = [ row.xpath('td//text()') for row in page_parsed.cssselect('tr[class="table-dark-row"]') ] for row in all_rows: for column in range(0, 11, 2): data[row[column]] = row[column + 1] for link in page_parsed.find_class('tab-link'): if "ind" in link.get('href'): data['Industry'] = link.text data['Industry Filter'] = link.get('href').split("=")[-1] return data
def get_all_news(): """ Returns a list of sets containing time, headline and url :return: list """ page_parsed, _ = http_request_get(url=NEWS_URL, parse=True) all_dates = [row.text_content() for row in page_parsed.cssselect('td[class="nn-date"]')] all_headlines = [row.text_content() for row in page_parsed.cssselect('a[class="nn-tab-link"]')] all_links = [row.get('href') for row in page_parsed.cssselect('a[class="nn-tab-link"]')] return list(zip(all_dates, all_headlines, all_links))
def create_portfolio(self, name, file, drop_invalid_ticker=False): """ Creates a new portfolio from a .csv file. The .csv file must be in the following format: Ticker,Transaction,Date,Shares,Price NVDA,2,14-04-2018,43,148.26 AAPL,1,01-05-2019,12 WMT,1,25-02-2015,20 ENGH:CA,1,,1, (!) For transaction - 1 = BUY, 2 = SELL (!) Note that if the price is ommited the function will take today's ticker price """ data = { "portfolio_id": "0", "portfolio_name": name, } with open(file, "r") as infile: reader = csv.reader(infile) next(reader, None) # Skip the headers for row_number, row in enumerate(reader, 0): row_number_string = str(row_number) data["ticker" + row_number_string] = row[0] data["transaction" + row_number_string] = row[1] data["date" + row_number_string] = row[2] data["shares" + row_number_string] = row[3] try: # empty string is no price, so try get today's price assert data["price" + row_number_string] != "" data["price" + row_number_string] = row[4] except (IndexError, KeyError): current_price_page, _ = http_request_get( PRICE_REQUEST_URL, payload={"t": row[0]}, parse=True) # if price not available on finviz don't upload that ticker to portfolio if current_price_page.text == "NA": if not drop_invalid_ticker: raise InvalidTicker(row[0]) del data["ticker" + row_number_string] del data["transaction" + row_number_string] del data["date" + row_number_string] del data["shares" + row_number_string] else: data["price" + row_number_string] = current_price_page.text self._session.post(PORTFOLIO_SUBMIT_URL, data=data)
def get_news(ticker): """ Returns a list of sets containing news headline and url :param ticker: stock symbol :return: list """ page_parsed, _ = http_request_get(url=STOCK_URL, payload={'t': ticker}, parse=True) all_news = page_parsed.cssselect('a[class="tab-link-news"]') headlines = [row.xpath('text()')[0] for row in all_news] urls = [row.get('href') for row in all_news] return list(zip(headlines, urls))
def get_insider(ticker): """ Returns a list of dictionaries containing all recent insider transactions. :param ticker: stock symbol :return: list """ page_parsed, _ = http_request_get(url=STOCK_URL, payload={'t': ticker}, parse=True) table = page_parsed.cssselect('table[class="body-table"]')[0] headers = table[0].xpath('td//text()') data = [dict(zip(headers, row.xpath('td//text()'))) for row in table[1:]] return data
def get_analyst_price_targets(ticker): """ Returns a list of dictionaries containing all analyst ratings and Price targets - if any of 'price_from' or 'price_to' are not available in the DATA, then those values are set to default 0 :param ticker: stock symbol :return: list """ import datetime page_parsed, _ = http_request_get(url=STOCK_URL, payload={'t': ticker}, parse=True) table = page_parsed.cssselect('table[class="fullview-ratings-outer"]')[0] ratings_list = [row.xpath('td//text()') for row in table[1:]] ratings_list = [[val for val in row if val != '\n'] for row in ratings_list] #remove new line entries headers = [ 'date', 'category', 'analyst', 'rating', 'price_from', 'price_to' ] # header names analyst_price_targets = [] for row in ratings_list: price_from, price_to = 0, 0 # defalut values for len(row) == 4 , that is there is NO price information if len(row) == 5: strings = row[4].split('→') #print(strings) if len(strings) == 1: price_to = int( strings[0].strip(' ').strip('$') ) # if only ONE price is avalable then it is 'price_to' value else: price_from = int(strings[0].strip(' ').strip( '$')) # both '_from' & '_to' prices available price_to = int(strings[1].strip(' ').strip('$')) elements = row[: 4] # only take first 4 elements, discard last element if exists elements.append(price_from) elements.append(price_to) elements[0] = datetime.datetime.strptime( elements[0], '%b-%d-%y').strftime('%Y-%m-%d') # convert date format data = dict(zip(headers, elements)) analyst_price_targets.append(data) return analyst_price_targets
def get_stock(ticker): """ Returns a dictionary containing stock data. :param ticker: stock symbol :type ticker: str :return dict """ data = {} page_parsed, _ = http_request_get(url=STOCK_URL, payload={'t': ticker}, parse=True) all_rows = [row.xpath('td//text()') for row in page_parsed.cssselect('tr[class="table-dark-row"]')] for row in all_rows: for column in range(0, 11): if column % 2 == 0: data[row[column]] = row[column + 1] return data
def create_portfolio(self, name, file): """ Creates a new portfolio from a .csv file. The .csv file must be in the following format: Ticker,Transaction,Date,Shares,Price NVDA,2,14-04-2018,43,148.26 AAPL,1,01-05-2019,12 WMT,1,25-02-2015,20 (!) For transaction - 1 = BUY, 2 = SELL (!) Note that if the price is ommited the function will take today's ticker price """ data = { 'portfolio_id': '0', 'portfolio_name': name, } with open(file, 'r') as infile: reader = csv.reader(infile) next(reader, None) # Skip the headers for row_number, row in enumerate(reader, 0): row_number_string = str(row_number) data['ticker' + row_number_string] = row[0] data['transaction' + row_number_string] = row[1] data['date' + row_number_string] = row[2] data['shares' + row_number_string] = row[3] try: data['price' + row_number_string] = row[4] except IndexError: current_price_page, _ = http_request_get( PRICE_REQUEST_URL, payload={'t': row[0]}, parse=True) data['price' + row_number_string] = current_price_page.text self._session.post(PORTFOLIO_SUBMIT_URL, data=data)
def get_stock(ticker): """ Returns a dictionary containing stock data. :param ticker: stock symbol :type ticker: str :return dict """ page_parsed, _ = http_request_get(url=STOCK_URL, payload={'t': ticker}, parse=True) title = page_parsed.cssselect('table[class="fullview-title"]')[0] keys = ['Company', 'Sector', 'Industry', 'Country'] fields = [f.text_content() for f in title.cssselect('a[class="tab-link"]')] data = dict(zip(keys, fields)) all_rows = [row.xpath('td//text()') for row in page_parsed.cssselect('tr[class="table-dark-row"]')] for row in all_rows: for column in range(0, 11, 2): data[row[column]] = row[column + 1] return data
def get_news(ticker): """ Returns a list of sets containing news headline and url :param ticker: stock symbol :return: list """ page_parsed, _ = http_request_get(url=STOCK_URL, payload={'t': ticker}, parse=True) all_news = page_parsed.cssselect('a[class="tab-link-news"]') dates = [] for i in range(len(all_news)): tr = all_news[i].getparent().getparent() date_str = tr[0].text.strip() if ' ' not in date_str: # This is only time, need to grab date from upper sibling news line. tbody = tr.getparent() previous_date_str = '' j = 1 while ' ' not in previous_date_str: try: previous_date_str = tbody[i - j][0].text.strip() except IndexError: break j += 1 # Combine date from earlier news with time from current news. date_str = ' '.join([previous_date_str.split(' ')[0], date_str]) dates.append(date_str) headlines = [row.xpath('text()')[0] for row in all_news] urls = [row.get('href') for row in all_news] return list(zip(dates, headlines, urls))