def readsavedpdf(path,link):
    file = None
    tfile = None
    try:
        file = open(path, "rb")
        path = path.replace('.pdf','')
        tfile = open(path +".txt",'w')
        reader = PdfFileReader(file)
        data = ""
        for i in range(0, reader.getNumPages()):
            data += reader.getPage(i).extractText() + "\n"
        #data = " ".join(data.replace(u"\xa0", " ").strip().split())
        data = " ".join(data.replace(u"\u02c7", " ").strip().split())
        tfile.write(link)
        tfile.write("\n")
        tfile.write(data)
        tfile.flush()

        #print("Successfully read pdf file")
    except Exception as e:
        writelog("Exception in reading: " + path + " " +str(e))
    finally:
        if file != None:
            file.close()
        if tfile != None:
            tfile.close()
def readpdffromweb(url):
    link = url
    tfile = None
    try:
        response = urlopen(url)
        tempfile = url.split('/')[-1]
        tfile = open("collection/"+tempfile, 'wb')
        tfile.write(response.read())
        
        tfile.flush()
        #tfile.close()
        
        readsavedpdf("collection/"+tempfile,link)
        tfile.close()
        #os.remove("collection/" + tempfile)
        #os.remove("collection/" + tempfile)
    except Exception as e:
        writelog("Exception in crawling: " + url + " " + str(e))
        #print("could not read ")
    finally:
        if tfile != None:
            if not tfile.closed:
                tfile.close()
            if tfile.closed:
                os.remove("collection/" + tempfile)
示例#3
0
def amigo_init(tick, category, freq):
    # Get column name
    cname = getconf(category)
    if not cname:
        p = []
        p.append(tick)
        p.append(category)
        p.append(freq)
        writelog('[CRITICAL] No Configuration File Found', 'amigo_init', p)
        sys.exit('[CRITICAL] No Configuration File Found')
        return 1
    tname = 'amigo_' + tick + '_' + category.replace('-', '_') + '_' + freq

    # Erase WipeOut Old table
    tdrop = 'DROP TABLE IF EXISTS ' + tname
    dberase(tdrop)

    # Create table
    tcreate1 = 'CREATE TABLE ' + tname + ' ( id INT(3) UNSIGNED AUTO_INCREMENT PRIMARY KEY, '
    tcreate2 = ''
    for li in cname:
        tcreate2 = tcreate2 + str(li) + ' VARCHAR(10), '
    tcreate2 = tcreate2[:-2] + ')'
    dbquery(tcreate1 + tcreate2)
    return 0
def yql_real(tick, attempts):
	p = []
	p.append(tick)
	p.append(attempts)
    # Web Scrapping
	try:
		req = Request(
			'http://finance.yahoo.com/d/quotes.csv?s=' + tick + '&f=b2b3c6ej3m2r2j1',
			data=None,
			headers={
				   'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
			}
		)
		html = urlopen(req)
		data = html.read()

	        # Parsing
		soup = BeautifulSoup(data, 'html.parser')
	except URLError as e:
		writelog('[CRITICAL] URL ERROR Encountered' + str(e), 'yql_real', p)
		return 1
	except HTTPError as e:
		writelog('[WARNING] HTTP ERROR Encountered ' + str(e), 'yql_real', p)
		return 1
	except http.client.IncompleteRead as e:
                writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_real', p)
                if (attempts < 3):
                        r = yql_growth(tick, attempts + 1)
                else:
                        writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_real', p)
                        return 1

                if (r == 0):
                        return 0
                else:
                        writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_real', p)
                        return 1
	
	# Remove subscripts
	for tag in soup.find_all('sup'):
		tag.replaceWith('')
	
	soup = str(soup)
	ts = soup.split(',')
	
	# Delete Row
	dquery = 'DELETE FROM yql_real WHERE tick = \'' + tick + '\''
	dbquery(dquery)

	# Insert Row
	iquery = 'INSERT INTO yql_real (tick, ask, bid, rchange, es, marketcap, dayr, pe, smc) VALUES (\'' + tick + '\','
	for ele in ts:
		iquery = iquery + '\'' + ele + '\', '
	iquery = iquery[:-2] + ')'
	dbquery(iquery)
	return 0
示例#5
0
def dberase(query):
	cn = getconf('db')
	if not cn:
		writelog('Unable to Read Database Configuration File!', 'dbquery', 'query')
		sys.exit('Database Configuration Not Found --- Exiting...')
	conn = connect( host = cn[0], port = int(cn[1]), user= cn[2], passwd = cn[3], db = cn[4])
	cursor = conn.cursor()
	cursor.execute(query)
	conn.commit()
	cursor.close()
	conn.close()
示例#6
0
def real_populate():
    p = []
    writelog('[INFO] Starting Realtime Population', 'real_populate', p)
    conn = pymysql.connect(host='localhost',
                           port=3306,
                           user='******',
                           passwd='password',
                           db='jsong')
    cur = conn.cursor()
    cur.execute("SELECT tick FROM ticklist WHERE enabled = 1")

    msg = '0'
    count = 1
    #for tick in cname:
    for tick1 in cur.fetchall():
        tick = tick1[0]
        print(str(count) + ": " + tick)
        count = count + 1
        writelog('[INFO] realtime ' + tick, 'real_populate', p)
        r = yql_real(tick, 0)
        if r == 1:
            p = []
            p.append(tick)
            writelog('[CRITICAL] Error Occurred During YQL_REALTIME',
                     'real_populate', p)
        else:
            writelog('[SUCCESS] Successfully Retrieved Realtime data',
                     'real_populate', p)
    cur.close()
    conn.close()
示例#7
0
def dictreturn(query):
        getcontext().prec = 6
        dict={}
        try:
                cn = getconf('db')
                if not cn:
                        writelog('[CRITICAL] Unable to Read Database Configuration File!', 'selectdb', 'query')
                        sys.exit('Database Configuration Not Found --- Exiting...')
                conn = connect( host = cn[0], port = int(cn[1]), user= cn[2], passwd = cn[3], db = cn[4])
                cursor = conn.cursor()
                cursor.execute(query)
                desc = cursor.description
                nlist = len(desc)

                colname = []
                table = []
                for col in desc:
                          table.append([])
                dict = {}
                data = cursor.fetchall()         
                
                for value in data:
                           for i in range(0,nlist):
                                    try:
                                            val = Decimal(value[i])
                                    except:
                                            val = value[i]
                                    table[i].append(val)
                c = 0
                for col in desc:
                          dict[col[0]] = table[c]
                          c = c + 1              

        except MySQLError as e:
                p = []
                p.append(query)
                print (p)
                cursor.close()
                conn.close()
                sys.exit('MySQL Exception Found --- Exiting...')
        except Warning as e:
                p = []
                p.append(query)
                errmsg = '[CRITICAL] MYSQL Warning Detected' + str(e)
                pass
        cursor.close()
        conn.close()
        return dict
示例#8
0
def getconf(category):
    try:
        conf = open('config/' + category + '.cfg', 'r')
        li = []
        for var in conf:
            var = var[:-1]
            var = var.replace(" ", "")
            li.append(var)
        return li
    except IOError as e:
        errmsg = '[CRITICAL] Error occured at config.py: %s' % e.strerror
        p = []
        p.append(category)
        writelog(errmsg, "getconf", p)
        g = []
        return g
示例#9
0
def getconf(category):
	try:
		conf = open('config/' + category + '.cfg', 'r')
		li = []
		for var in conf:
			var = var[:-1]
			var = var.replace(" ","")
			li.append(var)
		return li
	except IOError as e:
		errmsg = '[CRITICAL] Error occured at config.py: %s' % e.strerror
		p = []
		p.append(category)
		writelog(errmsg, "getconf", p)
		g = []
		return g 
示例#10
0
def yql_dividends_init():
	cn = getconf('dividends')
	if not cn:
		p = []
		writelog('[CRITICAL] No Configuration File Found', 'yql_dividends_init', p)
		sys.exit('[CRITICAL] No Configuration File Found')
		return 1
	d = 'DROP TABLE IF EXISTS yql_dividends'
	dberase(d)

	s = 'CREATE TABLE yql_dividends (id INT NOT NULL AUTO_INCREMENT, tick VARCHAR(10), '
	for ele in cn:
		s = s + ele + ' VARCHAR(25), '
	s = s[:-2] + ', PRIMARY KEY(id))'
	dbquery(s)
	return 0
def yql_estimates_init(tick):
	cn = getconf('estimates')
	if not cn:
		p = []
		p.append(tick)
		writelog('[CRITICAL] No Configuration File Found', 'yql_estimates_init', p)
		sys.exit('[CRITICAL] No Configuration File Found')
		return 1
	d = 'DROP TABLE IF EXISTS ' + tick + '_yql_estimates'
	dberase(d)

	s = 'CREATE TABLE ' + tick + '_yql_estimates (id INT NOT NULL AUTO_INCREMENT, '
	for ele in cn:
		s = s + ele + ' VARCHAR(15), '
	s = s[:-2] + ', PRIMARY KEY(id))'
	dbquery(s)
	return 0
def yql_growth_init(tick):
    cn = getconf('growth')
    if not cn:
        p = []
        p.append(tick)
        writelog('[CRITICAL] No Configuration File Found', 'yql_growth_init',
                 p)
        sys.exit('[CRITICAL] No Configuration File Found')
        return 1
    d = 'DROP TABLE IF EXISTS ' + tick + '_yql_growth'
    dberase(d)

    s = 'CREATE TABLE ' + tick + '_yql_growth (id INT NOT NULL AUTO_INCREMENT, '
    for ele in cn:
        s = s + ele + ' VARCHAR(15), '
    s = s[:-2] + ', PRIMARY KEY(id))'
    dbquery(s)
    return 0
示例#13
0
def readContent(url):
    global count
    file = None
    filename = ""
    try:
        writelog("Reading content of: " + url)
        response = urlopen(url)
        data = response.read().decode("utf-8",errors='ignore')
        soup = BeautifulSoup(data)
        text = soup.get_text()
        name = url.split('//')[-1]
        n = name + str(count) + ".txt"
        filename = "collection/" + n.replace('/','')
        file = open(filename,'w')
        file.write(url)
        file.write('\n')
        file.write(text)
        file.flush()
        writelog("Successfully read: " + url)
        #file.close()
        count += 1
        #file_log.write("Successfully crawled")
        #file_log.write('\n')
        
    except Exception as e:
        writelog("Exception: readContent " + url + " " + str(e))
        if file != None:
            file.close()
            os.remove(filename)

        #print("Failed: permission denied or link not working ")
        pass
    finally:
        if file != None:
            file.close()
示例#14
0
def dbquery(query):
	try:
		cn = getconf('db')
		if not cn:
			writelog('[CRITICAL] Unable to Read Database Configuration File!', 'dbquery', 'query')
			sys.exit('Database Configuration Not Found --- Exiting...')
		conn = connect( host = cn[0], port = int(cn[1]), user= cn[2], passwd = cn[3], db = cn[4])
		cursor = conn.cursor()
		cursor.execute(query)
		conn.commit()
	except MySQLError as e:
		p = []
		p.append(query)
		print (p)
		errmsg = '[CRITICAL] MYSQL Error Detected' + str(e)
		writelog(errmsg, 'dbquery', p)
		cursor.close()
		conn.close()
		sys.exit('MySQL Exception Found --- Exiting...')
	except Warning as e:
		p = []
		p.append(query)
		errmsg = '[CRITICAL] MYSQL Warning Detected' + str(e)
		writelog(errmsg, 'dbquery', p)
		pass
	cursor.close()
	conn.close()
示例#15
0
def retrivePage(url):
    url = url.strip("/")
    #file_log.write("now crawling" + url)
    if url.endswith('.pdf'):
        writelog("Reading content of: " + url)
        readpdffromweb(url)
        writelog("Successfully read: " + url)
        return []
    else:
        readContent(url)
        try:

            response = urlopen(url)
            writelog("retrievePage; Fetching urls from: " + url)
            data = response.read().decode("utf-8",errors="ignore")
            soup = BeautifulSoup(data)
            url_list = []
            visited[url] = 1
            soup.prettify()
            for anchor in soup.findAll('a', href=True):
                norm_url = normalizeurl(url,anchor['href'])
                if not isvalidurl(norm_url):#skip rest statements in the loop and continue to remaining iteration
                    continue

                if norm_url not in url_list and norm_url not in visited:
                    url_list.append(norm_url)


            writelog("retrievePage; Success fetching urls from: " + url)
            return url_list

        except Exception as e:
            writelog("Exception: retrievePage " + url + " " +  str(e))
            #file_log.write("failed: permission denied or link not working")
            #file_log.write("\n")
            return []
示例#16
0
def real_populate():
    p = []
    writelog('[INFO] Starting Realtime Population','real_populate',p)
    conn = pymysql.connect(host='localhost',port=3306, user='******', passwd='password', db='jsong')
    cur = conn.cursor()
    cur.execute("SELECT tick FROM ticklist WHERE enabled = 1")

    msg = '0'
    count = 1
    #for tick in cname:
    for tick1 in cur.fetchall():
        tick = tick1[0]
        print (str(count) + ": " + tick)
        count = count + 1 
        writelog('[INFO] realtime ' + tick, 'real_populate', p)
        r = yql_real(tick, 0)
        if r == 1:
            p = []
            p.append(tick)
            writelog('[CRITICAL] Error Occurred During YQL_REALTIME', 'real_populate', p)
        else:
            writelog('[SUCCESS] Successfully Retrieved Realtime data', 'real_populate', p)
    cur.close()
    conn.close()
示例#17
0
def yql_day(tick, attempts):
    # Web Scrapping
	try:
		req = Request(
			'http://finance.yahoo.com/d/quotes.csv?s=' + tick + '&f=a2dghj4vxy',
			data=None,
			headers={
				   'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
			}
		)
		html = urlopen(req)
		data = html.read()
	except URLError as e:
		p = []
		p.append(tick)
		writelog('[CRITICAL] URL ERROR Encountered', 'yql_day', p)
		writelog(e, 'yql_day', p)
		sys.exit('[CRITICAL] URL ERROR')
	except HTTPError as e:
		p = []
		p.append(tick)
		writelog('[WARNING] HTTP ERROR Encountered', 'yql_day', p)
		writelog(e, 'yql_day', p)
		return 1
    # Parse
	soup = BeautifulSoup(data, 'html.parser')
	for tag in soup.find_all('sup'):
		tag.replaceWith('')

	soup = str(soup)
	ts = soup.split(',')

	# Erase Table	
	dquery = 'DELETE FROM yql_day WHERE tick = ' + '\'' + tick + '\''
	dbquery(dquery)

	iquery = 'INSERT INTO yql_day (tick, '
	cn = getconf('day')
	if not cn:
		p = []
		writelog('[CRITICAL] No Configuration File Found', 'yql_day', p)
		sys.exit('[CRITICAL] No Configuration File Found')
		return 1
	for ele in cn:
		iquery = iquery + ele + ', '
	iquery = iquery[:-2] + ') VALUES (\'' + tick + '\', '

	if (len(cn) == len(ts) + 1):
		for el in ts:
			el = el.replace("\n","")
			el = el.replace('\"','')
			el = el.replace("\\","")
			iquery = iquery + '\'' + el + '\', '
	else:
		return 1
	beta = yql_beta(tick, 0)
	
	if beta == 0:
		beta = 'NA'
		p = []
		p.append(tick)
		writelog('Unable to collect beta', 'yql_day', p)

	iquery = iquery + '\'' + str(beta) + '\')'
	#iquery = iquery[:-3] + ')'
	dbquery(iquery)
	#print (iquery)
	return 0
def yql_analyst_trends(tick, attempts):
    p = []
    p.append(tick)
    p.append(attempts)
    # Web Scrapping
    try:
        req = Request(
            'http://finance.yahoo.com/q/ao?s=' + tick + '+Analyst+Opinion',
            data=None,
            headers={
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
            })
        html = urlopen(req)
        data = html.read()

        # Find table & Parse
        soup = BeautifulSoup(data, 'html.parser')

        # Remove subscripts
        for tag in soup.find_all('sup'):
            tag.replaceWith('')
        table = soup.find_all("table", {"class": "yfnc_datamodoutline1"})
    except URLError as e:
        writelog('[CRITICAL] URL ERROR Encountered', 'yql_analyst_trends', p)
        writelog(e, 'yql_analyst_summary', p)
        return 1
    except HTTPError as e:
        writelog('[WARNING] HTTP ERROR Encountered', 'yql_analyst_trends', p)
        writelog(e, 'yql_analyst_summary', p)
        return 1
    except http.client.IncompleteRead as e:
        writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_analyst_trends', p)
        if (attempts < 3):
            r = yql_analyst_trends(tick, attempts + 1)
        else:
            writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES',
                     'yql_analyst_trends', p)
            return 1

        if (r == 0):
            return 0
        else:
            writelog(
                '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve',
                'yql_analyst_trends', p)
            return 1

    i = 0
    cn = []
    count = True
    try:
        for ele in table:
            if (i == 3):
                for row in ele.findAll("tr"):
                    if (i > 4):
                        if count == True:
                            ls = len(row)
                            for t in range(0, ls - 1):
                                cl = []
                                cn.append(cl)
                            count = False
                        c = 0
                        for col in row.findAll("td"):
                            cs = col.get_text()
                            cn[c].append(cs)
                            c = c + 1
                    i = i + 1
            i = i + 1
    except IndexError as e:
        p = []
        p.append(tick)
        writelog('[WARNING] INDEX ERROR Encountered', 'yql_analyst_trends', p)
        writelog(e, 'yql_analyst_trends', p)
        return 1
    for l in cn:
        s = 'INSERT INTO ' + tick + '_yql_analyst_trends (Strong_Buy, Buy, Hold, Underperform, Sell) VALUES ('
        for x in l:
            s = s + '\'' + x + '\', '
        s = s[:-2] + ')'
        dbquery(s)
    return 0
示例#19
0
def yql_highlight(tick, attempts):
    # Web Scrapping
    p = []
    p.append(tick)
    p.append(attempts)
    try:
        req = Request(
            'http://finance.yahoo.com/q/ks?s=' + tick + '+Key+Statistics',
            data=None,
            headers={
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
            })
        html = urlopen(req)
        data = html.read()
        # Find table & Parse
        soup = BeautifulSoup(data, 'html.parser')

    except URLError as e:
        writelog('[CRITICAL] URL ERROR Encountered', 'yql_highlight', p)
        writelog(e, 'yql_highlight', p)
        return 1
    except HTTPError as e:
        writelog('[WARNING] HTTP ERROR Encountered', 'yql_highlight', p)
        writelog(e, 'yql_highlight', p)
        return 1
    except http.client.IncompleteRead as e:
        writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_highlight', p)
        if (attempts < 3):
            r = yql_highlight(tick, attempts + 1)
        else:
            writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES',
                     'yql_highlight', p)
            return 1

        if (r == 0):
            return 0
        else:
            writelog(
                '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve',
                'yql_highlight', p)
            return 1

    # Remove subscripts
    for tag in soup.find_all('sup'):
        tag.replaceWith('')

    table = soup.find_all("table", {"class": "yfnc_datamodoutline1"})

    cn = getconf('highlight')
    if not cn:
        p = []
        writelog('[CRITICAL] No Configuration File Found', 'yql_highlight', p)
        sys.exit('[CRITICAL] No Configuration File Found')
        return 1

    d = 'DELETE FROM yql_highlight WHERE tick = \'' + tick + '\''
    dbquery(d)

    s = 'INSERT INTO yql_highlight (tick, '
    for ele in cn:
        s = s + ele + ', '
    s = s[:-2] + ') VALUES (\'' + tick + '\', '

    i = 0
    ccl = []
    try:
        for ele in table:  #.findAll("table"):
            if (i >= 1) and (i <= 4):
                for row in ele.findAll("tr"):
                    if len(row) == 2:
                        for col in row.findAll("td"):
                            if (col.get_text().find(':') == -1):
                                #print (col.get_text())
                                g = col.get_text()
                                g = g.replace(' ', '')
                                g = g.replace('%', '')
                                ccl.append(g)
                                #s = s + '\"' + g + '\"' + ', '
            i = i + 1
        #s = s[:-2] + ')'
    except IndexError as e:
        writelog('[WARNING] INDEX ERROR Encountered ' + str(e),
                 'yql_highlight', p)
        return 1

    if (len(ccl) == len(cn)):
        for cc in ccl:
            s = s + '\"' + cc + '\"' + ', '
        s = s[:-2] + ')'
        dbquery(s)
        return 0
    else:
        writelog('[CRITICAL] No Data Retrieved', 'yql_highlight', p)
        return 1
def yql_dividends(tick, attempts):
	p = []
	p.append(tick)
	p.append(attempts)
        # Web Scrapping
	try:
		req = Request(
			'http://finance.yahoo.com/q/ks?s=' + tick + '+Key+Statistics',
			data=None,
			headers={
				'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
			}
		)
		html = urlopen(req)
		data = html.read()
	except URLError as e:
		writelog('[CRITICAL] URL ERROR Encountered', 'yql_dividends', p)
		writelog(e, 'yql_dividends', p)
		return 1
	except HTTPError as e:
		writelog('[WARNING] HTTP ERROR Encountered', 'yql_dividends', p)
		writelog(e, 'yql_dividends', p)
		return 1
	except http.client.IncompleteRead as e:
                writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_dividends', p)
                if (attempts < 3):
                        r =yql_dividends(tick, attempts + 1)
                else:
                        writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_dividends', p)
                        return 1

                if (r == 0):
                        return 0
                else:
                        writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_dividends', p)
                        return 1

        # Find table & Parse
	soup = BeautifulSoup(data, 'html.parser')
	for tag in soup.find_all('sup'):
		tag.replaceWith('')
	table = soup.find_all("table", { "class" : "yfnc_datamodoutline1" })

	cn = getconf('dividends')
	if not cn:
		writelog('[CRITICAL] No Configuration File Found', 'yql_dividends', p)
		sys.exit('[CRITICAL] No Configuration File Found')
		return 1

	d = 'DELETE FROM yql_dividends WHERE tick = \'' + tick + '\''
	dbquery(d)

	s = 'INSERT INTO yql_dividends (tick, '
	for ele in cn:
		s = s + ele + ', '
	s = s[:-2] + ') VALUES (\'' + tick + '\', '

	ccl = []
	i = 0
	try:
		for ele in table:
			if (i == 9):
				filterc = 0
				for row in ele.findAll("tr"):
					if len(row) == 2:
						for col in row.findAll("td"):
							if (col.get_text().endswith(':') == False):
								if (filterc >= 4) and (filterc != 7):
									g = col.get_text()
									g = g.replace("%", '')
									g = g.replace(',', '')
									ccl.append(g)
									#s = s + '\"' + g + '\"' + ', '
								filterc = filterc + 1

			i = i + 1
	except IndexError as e:
		writelog('[WARNING] INDEX ERROR Encountered ' + str(e), 'yql_dividends', p)
		return 1
	if (len(ccl) == len(cn)):
		for cc in ccl:
			s = s + '\"' + cc + '\"' + ', '
		s = s[:-2] + ')'
		dbquery(s)
		return 0
	else:
		writelog('[CRITICAL] No Data Retrieved', 'yql_dividends', p)
		return 1
def yql_growth(tick, attempts):
    p = []
    p.append(tick)
    p.append(attempts)
    # Web Scrapping
    try:
        req = Request(
            'http://finance.yahoo.com/q/ae?s=' + tick + '+Analyst+Estimates',
            data=None,
            headers={
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
            })
        html = urlopen(req)
        data = html.read()

        #  Find table & parase
        soup = BeautifulSoup(data, 'html.parser')
    except URLError as e:
        writelog('[CRITICAL] URL ERROR Encountered', 'yql_growth', p)
        writelog(e, 'yql_growth', p)
        return 1
    except HTTPError as e:
        writelog('[WARNING] HTTP ERROR Encountered', 'yql_growth', p)
        writelog(e, 'yql_growth', p)
        return 1
    except http.client.IncompleteRead as e:
        writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_growth', p)
        if (attempts < 3):
            r = yql_growth(tick, attempts + 1)
        else:
            writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES',
                     'yql_growth', p)
            return 1

        if (r == 0):
            return 0
        else:
            writelog(
                '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve',
                'yql_growth', p)
            return 1

    # Remove subscripts
    for tag in soup.find_all('sup'):
        tag.replaceWith('')

    table = soup.find_all("table", {"class": "yfnc_tableout1"})

    cn = getconf('growth')
    if not cn:
        writelog('[CRITICAL] No Configuration File Found', 'yql_growth', p)
        sys.exit('[CRITICAL] No Configuration File Found')
        return 1

    s = 'INSERT INTO ' + tick + '_yql_growth ('
    for ele in cn:
        s = s + ele + ', '
    s = s[:-2] + ') VALUES ('

    c = 0
    first = True
    ccn = [[], [], [], []]
    retval = 0
    try:
        for ele in table:
            if (c == 5):
                for row in ele.findAll("tr"):
                    for tag in row.find_all(['table', 'style']):
                        tag.replaceWith('')
                    i = 0
                    for col in row.findAll("td"):
                        if (i > 0):
                            ccn[i - 1].append(col.get_text())
                        i = i + 1
            c = c + 1

        for cr in ccn:
            ss = s
            if (len(cr) == len(cn)):
                for cc in cr:
                    ss = ss + '\'' + cc + '\', '
                ss = ss[:-2] + ')'
                dbquery(ss)
            else:
                retval = 1
    except IndexError as e:
        writelog('[WARNING] INDEX ERROR Encountered', 'yql_growth', p)
        writelog(e, 'yql_growth', p)
        return 1
    return retval
示例#22
0
        logger.writelog(str(ex_type.__name__), "Exception Type")
        logger.writelog(str(ex_value), "Exception Message")
        logger.writelog(str(trace_back), "Traceback")
    finally:
        logger.result_close()


rows_toload = 20000
gamma_start = 1.0e-5
gamma_end = 10.0
params = 10000
clusters = 50

logger.log_open()
stepsize = round(((gamma_end - gamma_start) / params), 5)
logger.writelog(gamma_start, "Gamma_start")
logger.writelog(gamma_end, "Gamma_end")
logger.writelog(params, "Parameters")
logger.writelog(stepsize, "Step_size")
logger.writelog(clusters, "clusters")
'''
------Below commented code is for loading letters-----------

data,label = load_letters(datafiles_names[0],"letters.csv",rows_toload)
logger.writelog(str(data.shape),"Dataset_dimension")
logger.writelog(str(label.shape),"Groundtruth_dimension")

df = pd.DataFrame(label)
df.to_csv(datafiles_names[0]+"label.csv",index=False,header=None)
del label
gc.collect()
示例#23
0
def yql_hist(tick, years, attempts):
	p = []
	p.append(tick)
	p.append(years)
	p.append(attempts)
	now = datetime.datetime.now()
	yeara = now.year
	montha = now.month
	daya = now.day

	yearb = now.year - 10
	monthb = now.month - 1
	dayb = now.day - 2
	if monthb == 0:
		monthb = 1

	if dayb <= 0:
		dayb = 1
	
	# Web Scrapping
	try:
		req = Request(
			'https://ca.finance.yahoo.com/q/hp?s=' + tick + '&a=' + str(monthb) + '&b=' + str(dayb) + '&c='+ str(yearb) + '&d=' + str(montha) + '&e=' + str(daya) + '&f=' + str(yeara) + '&g=w',
			data=None,
			headers={
				'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
			}
		)
		html = urlopen(req)
		data = html.read()

		#  Find table & parase
		soup = BeautifulSoup(data, 'html.parser')
	except URLError as e:
		writelog('[CRITICAL] URL ERROR Encountered', 'yql_hist', p)
		writelog(e, 'yql_hist', p)
		return 1
	except HTTPError as e:
		writelog('[WARNING] HTTP ERROR Encountered', 'yql_hist', p)
		writelog(e, 'yql_hist', p)
		return 1
	except http.client.IncompleteRead as e:
                writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_hist', p)
                if (attempts < 3):
                        r = yql_growth(tick, attempts + 1)
                else:
                        writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_hist', p)
                        return 1

                if (r == 0):
                        return 0
                else:
                        writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_hist', p)
                        return 1

	# Remove subscripts
	for tag in soup.find_all('sup'):
		tag.replaceWith('')

	table = soup.find("table", { "class" : "yfnc_datamodoutline1" })

	first = True
	# init
	try:
		ttable = table.findAll("table")
		for t in ttable:
			for row in t.findAll("tr"): #table.findAll("tr"):
				
				iquery = 'INSERT INTO ' + tick + '_hist (volume, adjclose) VALUES ('
				if len(row) == 7 and first == False:
					s = 0
					for col in row.findAll("td"):
						if (s == 1):
							last = col.get_text()
						if (s == 5) | (s == 6):
							tv = col.get_text()
							tv = tv.replace(',','')
							iquery = iquery + tv + ', '
						s = s + 1
					iquery = iquery[:-2] + ')'
					#print (iquery)
					dbquery(iquery)
				else:
					first = False

		for i in range(1,years):
			g = yql_hist_rep(tick, i ,last, 0)
			if g == 1:
				break
	except IndexError as e:
		writelog('[WARNING] INDEX ERROR Encountered', 'yql_hist', p)
		writelog(e, 'yql_hist', p)
		return 1
	return 0
def yql_competitor(tick, attempts):
    p = []
    p.append(tick)
    p.append(attempts)
    # Web Scrapping
    try:
        req = Request(
            'http://finance.yahoo.com/q/co?s=' + tick + '+Competitors',
            data=None,
            headers={
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
            })
        html = urlopen(req)
        data = html.read()

        # Find table & Parse
        soup = BeautifulSoup(data, 'html.parser')

        # Remove subscripts
        for tag in soup.find_all('sup'):
            tag.replaceWith('')

        table = soup.find_all("table", {"class": "yfnc_datamodoutline1"})
    except URLError as e:
        writelog('[CRITICAL] URL ERROR Encountered', 'yql_competitor', p)
        writelog(e, 'yql_competitor', p)
        return 1
    except HTTPError as e:
        writelog('[WARNING] HTTP ERROR Encountered', 'yql_competitor', p)
        writelog(e, 'yql_competitor', p)
        return 1
    except http.client.IncompleteRead as e:
        writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_competitor', p)
        if (attempts < 3):
            r = yql_competitor(tick, attempts + 1)
        else:
            writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES',
                     'yql_competitor', p)
            return 1

        if (r == 0):
            return 0
        else:
            writelog(
                '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve',
                'yql_competitor', p)
            return 1

    cl = []
    first = True
    try:
        for ele in table:
            for row in ele.findAll("tr"):
                if (first == True):
                    for col in row.findAll("th"):
                        cn = []
                        if (first == True):
                            first = False
                        elif (col.get_text().find(':') > -1):
                            break
                        else:
                            cn.append(col.get_text())
                            cl.append(cn)
                else:
                    i = 0
                    for col in row.findAll("td"):
                        cl[i].append(col.get_text())
                        i = i + 1
            break
    except IndexError as e:
        writelog('[WARNING] INDEX ERROR Encountered', 'yql_competitor', p)
        writelog(e, 'yql_competitor', p)
        return 1

    s = 'INSERT INTO ' + tick + '_yql_competitor (tick, '
    cn = getconf('competitor')
    if not cn:
        writelog('[CRITICAL] No Configuration File Found', 'yql_competitors',
                 p)
        sys.exit('[CRITICAL] No Configuration File Found')
        return 1
    for ele in cn:
        s = s + ele + ', '
    s = s[:-2] + ') VALUES ('

    retval = 0

    for col in cl:
        gs = s
        if (len(cn) == len(col) - 1):
            for row in col:
                gs = gs + '\'' + str(row) + '\', '
            gs = gs[:-2] + ')'
            dbquery(gs)
        else:
            retval = 1
    return retval
示例#25
0
def dr_cluster(data, method, gamma, params, clusters, stepsize, rows_toload,
               dropped_class_numbers):
    if (method == "Kmeans2D"):
        components = 2
    if (method == "Kmeans1D" or method == "Thresholding"):
        components = 1
        flag = 0
        resetflag = 0
    logger.writelog(components, "Components")
    logger.result_open(method)
    print(method)
    max_sc = -100.0
    best_purity = 0.0
    best_gamma = 0.0
    serial_num = 0
    try:
        for i in range(0, params + 1):
            transformer = KernelPCA(n_components=components,
                                    kernel='rbf',
                                    gamma=gamma)
            data_transformed = transformer.fit_transform(data)
            df = pd.DataFrame(data_transformed)
            df.to_csv(KPCA_output_path, index=False, header=None)
            del df
            gc.collect()
            if (method == "Thresholding"):
                if (flag == 0):
                    os.system("cc c_thresholding_new.c")
                    flag = 1
                start = timeit.default_timer()
                os.system("./a.out " + str(clusters) + " " + str(rows_toload))
                end = timeit.default_timer()
                thresholding_time = (end - start)
                sc = silhouette.silhouette(KPCA_output_path,
                                           Thresholding_paths[1])
                groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian(
                    't', Thresholding_paths[0], clusters, rows_toload,
                    dropped_class_numbers)
                logger.writeresult(i + 1, clusters, method, thresholding_time,
                                   gamma, sc, purity)
                #print(i+1,thresholding_time,gamma,sc,purity)
                if (i < params):
                    if (sc > max_sc):
                        max_sc = sc
                        best_gamma = gamma
                        best_purity = purity
                        serial_num = i + 1
                if (i == (params - 1)):
                    gamma = best_gamma
                    sc = max_sc
                    purity = best_purity
                if (i == params):
                    print(best_gamma, max_sc, best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writeresult(serial_num, clusters, method,
                                       thresholding_time, best_gamma, max_sc,
                                       best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writefinalresult(serial_num, clusters, method,
                                            thresholding_time, best_gamma,
                                            max_sc, best_purity)
                    write_hungarian_result(best_gamma, clusters,
                                           groundtruth_distribution,
                                           temp_assignment_error_matrix,
                                           row_ind, col_ind, class_numbers,
                                           best_purity, method, params,
                                           stepsize, dropped_class_numbers)
            else:
                kmeans_time = kmeans.kmeans(KPCA_output_path, KMeans_paths[1],
                                            clusters)
                kmeans.groundtruth_distribution(KMeans_paths[1],
                                                KMeans_paths[0],
                                                datafiles_names[0],
                                                datafiles_names[2], clusters)
                sc = silhouette.silhouette(KPCA_output_path, KMeans_paths[1])
                groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian(
                    'k', KMeans_paths[0], clusters, rows_toload,
                    dropped_class_numbers)
                logger.writeresult(i + 1, clusters, method, kmeans_time, gamma,
                                   sc, purity)
                #print(i+1,kmeans_time,gamma,sc,purity)
                if (i < params):
                    if (sc > max_sc):
                        max_sc = sc
                        best_gamma = gamma
                        best_purity = purity
                        serial_num = i + 1
                if (i == (params - 1)):
                    gamma = best_gamma
                    sc = max_sc
                    purity = best_purity
                if (i == params):
                    print(best_gamma, max_sc, best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writeresult(serial_num, clusters, method,
                                       kmeans_time, best_gamma, max_sc,
                                       best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writefinalresult(serial_num, clusters, method,
                                            kmeans_time, best_gamma, max_sc,
                                            best_purity)
                    write_hungarian_result(best_gamma, clusters,
                                           groundtruth_distribution,
                                           temp_assignment_error_matrix,
                                           row_ind, col_ind, class_numbers,
                                           best_purity, method, params,
                                           stepsize, dropped_class_numbers)
            if (i < (params - 1)):
                gamma = gamma + stepsize
    except (KeyboardInterrupt, SystemExit, Exception) as ex:
        ex_type, ex_value, ex_traceback = sys.exc_info()
        trace_back = traceback.extract_tb(ex_traceback)
        logger.writelog(str(ex_type.__name__), "Exception Type")
        logger.writelog(str(ex_value), "Exception Message")
        logger.writelog(str(trace_back), "Traceback")
    finally:
        logger.result_close()
示例#26
0
import time
from logger import writelog

try:
    writelog("logfile1.log", "Program started.", "O")

    time.sleep(5)

    writelog("logfile1.log", "Program finished.")
except Exception as error:
    print("Oh no! And error has occured")
    print(error)
示例#27
0
import time

from logger import writelog

writelog("logfile1.log", "Program Started")

time.sleep(10)

writelog("logfile1.log", "Program Finished")
def yql_analyst_summary(tick,attempts):
	p = []
	p.append(tick)
	p.append(attempts)
    # Web Scrapping
	try:
		req = Request(
			'http://finance.yahoo.com/q/ao?s=' + tick + '+Analyst+Opinion',
			data=None,
			headers={
				'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
			}
		)
		html = urlopen(req)
		data = html.read()
        	# Find table & Parse
		soup = BeautifulSoup(data, 'html.parser')
		
		# Remove subscripts
		for tag in soup.find_all('sup'):
			tag.replaceWith('')

		table = soup.find_all("table", { "class" : "yfnc_datamodoutline1 equaltable" })
	except URLError as e:
		writelog('[CRITICAL] URL ERROR Encountered' + str(e), 'yql_analyst_summary', p)
		return 1
	except HTTPError as e:
		writelog('[WARNING] HTTP ERROR Encountered ' + str(e), 'yql_analyst_summary', p)
		return 1
	except http.client.IncompleteRead as e:
		writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_analyst_summary', p)
		if (attempts < 3):
			r = yql_analyst_summary(tick, attempts + 1)
		else:
			writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_analyst_summary', p)	
			return 1

		if (r == 0):
			return 0
		else:
			writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_analyst_summary', p)
			return 1


	d = 'DELETE FROM yql_analyst_summary WHERE tick = ' + '\'' + tick + '\''
	dbquery(d)

	cn = getconf('analyst_summary')
	if not cn:
		writelog('[CRITICAL] No Configuration File Found', 'yql_analyst_summary', p)
		sys.exit('[CRITICAL] No Configuration File Found')
		return 1

	s = 'INSERT INTO yql_analyst_summary (tick, '
	for ele in cn:
		s = s + ele + ', '
	s = s[:-2] + ') VALUES (\'' + tick + '\', '
	ccl = []
	for ele in table:
		for row in ele.findAll("tr"):
			for col in row.findAll("td"):
				if (col.get_text().find(':') == -1):
					ts = col.get_text()
					ts = ts.replace("%","")
					ccl.append(ts)	
	
	if (len(ccl) == len(cn)):
		for cc in ccl:
			s = s + '\'' + cc + '\', '
		s = s[:-2] + ')'
		dbquery(s)
		return 0
	else:
		return 1
示例#29
0
def amigo(tick, category, freq, attempts):
    p = []
    p.append(tick)
    p.append(category)
    p.append(freq)
    p.append(attempts)

    try:
        # Web Scrapping
        req = Request(
            'http://amigobulls.com/stocks/' + tick + '/' + category + '/' +
            freq,
            data=None,
            headers={
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
            })
        html = urlopen(req)

        soup = BeautifulSoup(html, 'html.parser')
        for tag in soup.find_all('sup'):
            tag.replaceWith('')
        table = soup.find("table", {"id": "stackinfo"})

    except URLError as e:
        writelog('[CRITICAL] URL ERROR Encountered', 'amigo', p)
        if (attempts < 3):
            r = amigo(tick, category, freq, attempts + 1)
        else:
            writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'amigo',
                     p)
            return 1

        print("Attempt Number: " + str(attempts) + " Result: " + str(r))
        if (r == 0):
            return 0
        else:
            writelog(
                '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve',
                'amigo', p)
            writelog(e, 'amigo', p)
            return 1
    except HTTPError as e:
        writelog('[WARNING] HTTP ERROR Encountered', 'amigo', p)
        writelog(e, 'amigo', p)
        return 1
    except http.client.IncompleteRead as e:
        writelog('[WARNING] HTTP INCOMPLETE ERROR', 'amigo', p)
        if (attempts < 3):
            r = amigo(tick, category, freq, attempts + 1)
        else:
            writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'amigo',
                     p)
            return 1
        print("Attempt Number: " + str(attempts) + " Result: " + str(r))
        if (r == 0):
            return 0
        else:
            writelog(
                '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve',
                'amigo', p)
            return 1

    # init
    init = False
    cl = []
    cd = []

    for r in table.findAll("tr"):
        for e in r.findAll("td"):
            cd.append([])
        break

    for row in table.findAll("tr"):
        if (init == True):
            s = True
            c = 0
            for col in row.findAll("td"):
                ltd = col.get_text()
                ltd = ltd.replace("-", "")
                ltd = ltd.replace(" ", "")
                if (s == True):
                    cl.append(ltd)
                    s = False
                else:
                    cd[c].append(ltd)
                    c = c + 1
        else:
            init = True

    cname = getconf(category)
    if not cname:
        p = []
        p.append(tick)
        p.append(category)
        p.append(freq)
        writelog('[CRITICAL] No Configuration File Found', 'amigo', p)
        sys.exit('[CRITICAL] No Configuration File Found')
    base = 'INSERT INTO ' + 'amigo_' + tick + '_' + category.replace(
        '-', '_') + '_' + freq + ' ('

    for li in cname:
        base = base + li + ', '
    base = base[:-2] + ') VALUES ('

    lcd = len(cd) - 1
    cd.pop(lcd)
    retval = 0
    for z in cd:
        tinsert = base
        if z:
            if (len(cname) == len(z)):
                for cz in z:
                    tinsert = tinsert + '\'' + cz + '\'' + ', '
                tinsert = tinsert[:-2] + ');'
                dbquery(tinsert)
            else:
                retval = 1
    return retval
示例#30
0
def populate(list, deletet, start):
    p = []
    p.append(list)
    p.append(deletet)
    writelog('[INFO] Populating Ticklist Number :', 'populate', p)
    cname = getconf("ticklist_p" + str(list))
    if not cname:
        writelog('[CRITICAL] No Configuration File Found', 'populate', p)
        sys.exit('[CRITICAL] No Configuration File Found')

    term = "quarterly"
    ctick = ''
    rcl = []
    total = len(cname)
    counter = 0

    if (deletet == 1):
        writelog('[INFO] Dropping and Creating New Table', 'populate', p)
        s = "DROP TABLE IF EXISTS ticklist"
        dberase(s)
        s = "CREATE TABLE ticklist (id INT NOT NULL AUTO_INCREMENT, tick VARCHAR(10) NOT NULL, enabled VARCHAR(5) NOT NULL, status VARCHAR(5) NOT NULL, manual VARCHAR(5) NOT NULL, marketcap VARCHAR(20), sector VARCHAR(100), industry VARCHAR(100), errorfnc VARCHAR(200), enabledp VARCHAR(100), PRIMARY KEY(id))"
        dbquery(s)
    for tick in cname:
        counter = counter + 1
        if (counter >= start):
            outputstr = (str(counter)) + ":" + tick
            print(outputstr)
            writelog('[INFO] Progress: ' + str(counter) + ' / ' + str(total),
                     'populate', p)
            success = 0
            failure = 0
            failed_functions = []

            writelog('[INFO] Re-Initializing Tables Now......(' + tick + ')',
                     'populate', p)
            if (amigo_init(tick, "balance-sheet", term) == 0):
                tname = 'amigo_' + tick + '_' + "balance_sheet" + '_' + term
                writelog('[SUCCESS] Initialized ' + tname + ' table',
                         'populate', p)
            else:
                writelog(
                    '[CRITICAL] Unable to create database ' + tname + ' table',
                    'populate', p)
                sys.exit('[CRITICAL] Unable to create database ' + tname +
                         ' table')
            time.sleep(1)
            if (amigo_init(tick, "cash-flow", term) == 0):
                tname = 'amigo_' + tick + '_' + "cash_flow" + '_' + term
                writelog('[SUCCESS] Initialized ' + tname + ' table',
                         'populate', p)
            else:
                writelog(
                    '[CRITICAL] Unable to create database ' + tname + ' table',
                    'populate', p)
                sys.exit('[CRITICAL] Unable to create database ' + tname +
                         ' table')
            time.sleep(1)
            if (amigo_init(tick, "income-statement", term) == 0):
                tname = 'amigo_' + tick + '_' + "income_statement" + '_' + term
                writelog('[SUCCESS] Initialized ' + tname + ' table',
                         'populate', p)
            else:
                writelog(
                    '[CRITICAL] Unable to create database ' + tname + ' table',
                    'populate', p)
                sys.exit('[CRITICAL] Unable to create database ' + tname +
                         ' table')

            if (yql_analyst_trends_init(tick) == 0):
                writelog(
                    '[SUCCESS] Initialized ' + tick + '_yql_analyst_trends',
                    'populate', p)
            else:
                writelog(
                    '[CRITICAL] Unable to create database ' + tick +
                    '_yql_analyst_trends table', 'populate', p)
                sys.exit('[CRITICAL] Unable to create database ' + tick +
                         '_yql_analyst_trends table')

            if (yql_competitor_init(tick) == 0):
                writelog('[SUCCESS] Initialized ' + tick + '_yql_competitor',
                         'populate', p)
            else:
                writelog(
                    '[CRITICAL] Unable to create database ' + tick +
                    '_yql_competitor table', 'populate', p)
                sys.exit('[CRITICAL] Unable to create database ' + tick +
                         '_yql_competitor table')

            if (yql_estimates_init(tick) == 0):
                writelog('[SUCCESS] Initialized ' + tick + '_yql_estimates',
                         'populate', p)
            else:
                writelog(
                    '[CRITICAL] Unable to create database ' + tick +
                    '_yql_estimates table', 'populate', p)
                sys.exit('[CRITICAL] Unable to create database ' + tick +
                         '_yql_estimates table')

            if (yql_growth_init(tick) == 0):
                writelog('[SUCCESS] Initialized ' + tick + '_yql_growth',
                         'populate', p)
            else:
                writelog(
                    '[CRITICAL] Unable to create database ' + tick +
                    '_yql_growth table', 'populate', p)
                sys.exit('[CRITICAL] Unable to create database ' + tick +
                         '_yql_growth table')

            if (yql_hist_init(tick) == 0):
                writelog('[SUCCESS] Initialized ' + tick + '_yql_hist',
                         'populate', p)
            else:
                writelog(
                    '[CRITICAL] Unable to create database ' + tick +
                    '_yql_hist table', 'populate', p)
                sys.exit('[CRITICAL] Unable to create database ' + tick +
                         '_yql_hist table')

            writelog('[INFO] Populating Tables Now......(' + tick + ')',
                     'populate', p)

            if (amigo(tick, "balance-sheet", term, 0) == 0):
                success = success + 1
            else:
                failure = failure + 1
                ff = "amigo-balance-sheet"
                failed_functions.append(ff)

            if (amigo(tick, "cash-flow", term, 0) == 0):
                success = success + 1
            else:
                failure = failure + 1
                ff = "amigo-cash-flow"
                failed_functions.append(ff)

            if (amigo(tick, "income-statement", term, 0) == 0):
                success = success + 1
            else:
                failure = failure + 1
                ff = "amigo-income-statement"
                failed_functions.append(ff)

            if (yql_analyst_summary(tick, 0) == 0):
                success = success + 1
            else:
                failure = failure + 1
                ff = "yql_analyst_summary"
                failed_functions.append(ff)

            if (yql_analyst_trends(tick, 0) == 0):
                success = success + 1
            else:
                failure = failure + 1
                ff = "yql_analyst_trends"
                failed_functions.append(ff)

            if (yql_competitor(tick, 0) == 0):
                success = success + 1
            else:
                failure = failure + 1
                ff = "yql_competitor"
                failed_functions.append(ff)

            if (yql_day(tick, 0) == 0):
                success = success + 1
            else:
                ff = "yql_day"
                failed_functions.append(ff)

            if (yql_dividends(tick, 0) == 0):
                success = success + 1
            else:
                ff = "yql_dividends"
                failed_functions.append(ff)

            if (yql_estimates(tick, 0) == 0):
                success = success + 1
            else:
                ff = "yql_estimates"
                failed_functions.append(ff)

            if (yql_growth(tick, 0) == 0):
                success = success + 1
            else:
                ff = "yql_growth"
                failed_functions.append(ff)

            if (yql_highlight(tick, 0) == 0):
                success = success + 1
            else:
                ff = "yql_highlight"
                failed_functions.append(ff)

            writelog(
                '[INFO] Populating Database Tables Complete! ' + '(' + tick +
                ')', 'populate', p)

            # Delete Row
            dquery = 'DELETE FROM ticklist WHERE tick = \'' + tick + '\''
            dbquery(dquery)

            if (failure > 0):
                flist = functostr(failed_functions)
                wmsg = '[WARNING] Encountered Some Failures While Populating Database for ' + tick + '. \nSuccess: ' + str(
                    success) + '  Failure: ' + str(
                        failure) + '\nList of Failed Functions: ' + flist
                writelog(wmsg, 'populate', p)
                s = "INSERT INTO ticklist (tick, enabled, status, manual, errorfnc) VALUES (\'" + tick + "\', \'1\', \'1\', \'1\', \'" + flist + "\')"
            else:
                writelog(
                    '[SUCCESS] Populated Database Tables For ' + tick +
                    ' Without Error', 'populate', p)
                s = "INSERT INTO ticklist (tick, enabled, status, manual) VALUES (\'" + tick + "\', \'1\', \'0\', \'1\')"
            dbquery(s)
            time.sleep(7)
    writelog('[INFO] Finished Populating Database', 'populate', p)
    writelog('[INFO] Finished Populating Ticklist Number: ' + str(list),
             'populate', p)
def yql_competitor(tick, attempts):
	p = []
	p.append(tick)
	p.append(attempts)
	# Web Scrapping
	try:
		req = Request(
			'http://finance.yahoo.com/q/co?s=' + tick + '+Competitors',
			data=None,
			headers={
				'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
			}
		)
		html = urlopen(req)
		data = html.read()

		# Find table & Parse
		soup = BeautifulSoup(data, 'html.parser')
		
		# Remove subscripts
		for tag in soup.find_all('sup'):
			tag.replaceWith('')

		table = soup.find_all("table", { "class" : "yfnc_datamodoutline1" })
	except URLError as e:
		writelog('[CRITICAL] URL ERROR Encountered', 'yql_competitor', p)
		writelog(e, 'yql_competitor', p)
		return 1
	except HTTPError as e:
		writelog('[WARNING] HTTP ERROR Encountered', 'yql_competitor', p)
		writelog(e, 'yql_competitor', p)
		return 1
	except http.client.IncompleteRead as e:
		writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_competitor', p)
		if (attempts < 3):
			r = yql_competitor(tick, attempts + 1)
		else:
			writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_competitor', p)	
			return 1

		if (r == 0):
			return 0
		else:
			writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_competitor', p)
			return 1



	cl = []
	first = True
	try:
		for ele in table:
			for row in ele.findAll("tr"):
				if (first == True):
					for col in row.findAll("th"):
						cn = []
						if (first == True):
							first = False
						elif (col.get_text().find(':') > -1):
							break
						else:
							cn.append(col.get_text())
							cl.append(cn)
				else:
					i = 0
					for col in row.findAll("td"):
						cl[i].append(col.get_text())
						i = i + 1
			break
	except IndexError as e:
		writelog('[WARNING] INDEX ERROR Encountered', 'yql_competitor', p)
		writelog(e, 'yql_competitor', p)
		return 1

	s = 'INSERT INTO ' + tick + '_yql_competitor (tick, '
	cn = getconf('competitor')
	if not cn:
		writelog('[CRITICAL] No Configuration File Found', 'yql_competitors', p)
		sys.exit('[CRITICAL] No Configuration File Found')
		return 1
	for ele in cn:
		s = s + ele + ', '
	s = s[:-2] + ') VALUES ('

	retval = 0

	for col in cl:
		gs = s
		if (len(cn) == len(col) - 1):
			for row in col:
				gs = gs + '\'' + str(row) + '\', ' 
			gs = gs[:-2] + ')'
			dbquery(gs)
		else:
			retval = 1
	return retval
def yql_estimates(tick, attempts):
	p = []
	p.append(tick)
	p.append(attempts)
    # Web Scrapping
	try:
		req = Request(
			'http://finance.yahoo.com/q/ae?s=' + tick + '+Analyst+Estimates',
			data=None,
			headers={
				'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
			}
		)
		html = urlopen(req)
		data = html.read()
	except URLError as e:
		writelog('[CRITICAL] URL ERROR Encountered ' + str(e), 'yql_estimates', p)
		return 1
	except HTTPError as e:
		writelog('[WARNING] HTTP ERROR Encountered ' + str(e), 'yql_estimates', p)
		return 1
	except http.client.IncompleteRead as e:
                writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_estimates', p)
                if (attempts < 3):
                        r = yql_competitor(tick, attempts + 1)
                else:
                        writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_estimates', p)
                        return 1

                if (r == 0):
                        return 0
                else:
                        writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_estimates', p)
                        return 1
    # Find table & Parse
	soup = BeautifulSoup(data, 'html.parser')

	# Remove subscripts
	for tag in soup.find_all('sup'):
		tag.replaceWith('')

	table = soup.find_all("table", { "class" : "yfnc_tableout1" })

	cn = getconf('estimates')

	s = 'INSERT INTO ' + tick + '_yql_estimates ('
	try:
		for ele in cn:
			s = s + ele + ', '
		s = s[:-2] + ') VALUES ('

		c = 0
		ccn = [[],[],[],[]]
		for ele in table:
			if (c >= 0) and (c <= 2):
				for row in ele.findAll("tr"):
					for tag in row.find_all(['table','style']):
								tag.replaceWith('')
					i = 0
					for col in row.findAll("td"):
						if (i > 0):
							ccn[i - 1].append(col.get_text())
						i = i + 1
			c = c + 1
	except IndexError as e:
		p = []
		p.append(tick)
		writelog('[WARNING] INDEX ERROR Encountered', 'yql_estimates', p)
		writelog(e, 'yql_estimates', p)
		return 1
	retval = 0
	for cr in ccn:
		ss = s
		if (len(cr) == len(cn)):
			for cc in cr:
				ss = ss + '\'' + cc + '\', '
			ss = ss[:-2] + ')'
			dbquery(ss)
		else:
			retval = 1
	return retval
def yql_updatetick(tick,attempts):
	p = []
	p.append(tick)
	p.append(attempts)
	# Web Scrapping
	try:
		req = Request(
			'http://finance.yahoo.com/q/in?s=' + tick,
			data=None,
			headers={
				'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
			}
		)
		html = urlopen(req)
		data = html.read()

		# Find table & Parse
		soup = BeautifulSoup(data, 'html.parser')

		# Remove subscripts
		for tag in soup.find_all('sup'):
			tag.replaceWith('')
		table = soup.find_all("table", { "class" : "yfnc_datamodoutline1" })
	except URLError as e:
		writelog('[CRITICAL] URL ERROR Encountered', 'updatetick', p)
		writelog(e, 'updatetick', p)
		return 1
	except HTTPError as e:
		writelog('[WARNING] HTTP ERROR Encountered', 'updatetick', p)
		writelog(e, 'updatetick', p)
		return 1
	except http.client.IncompleteRead as e:
		writelog('[WARNING] HTTP INCOMPLETE ERROR', 'updatetick', p)
		if (attempts < 3):
			r = yql_updatetick(tick, attempts + 1)
		else:
			writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'updatetick', p)	
			return 1

		if (r == 0):
			return 0
		else:
			writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'updatetick', p)
			return 1

	i = 0
	cn = []
	count = True
	try:
		for ele in table:
			for tr in ele.findAll("tr"):
				if i >= 1:
					for td in tr.findAll("td"):
						cn.append(td.get_text())
				i = i + 1
	except IndexError as e:
		p = []
		p.append(tick)
		writelog('[WARNING] INDEX ERROR Encountered', 'updatetick', p)
		writelog(e, 'updatetick', p)
		return 1
	if (len(cn) == 2):
		s = "UPDATE ticklist SET sector = \'" + cn[0] + "\', industry = \'" + cn[1] + "\' WHERE tick = \'" + tick + "\'"
		dbquery(s)
	return 0
def init_db():
	p = []
	writelog('[INFO] Starting Database Initializer', 'initializer',p)

	cname = getconf("ticklist")
	if not cname:
		writelog('[CRITICAL] No Configuration File Found', 'initializer', p)
		sys.exit('[CRITICAL] No Configuration File Found')

	# Single Table
	if (yql_analyst_summary_init() == 0):
		writelog('[SUCCESS] Initialized yql_analyst_summary table', 'initializer', p)
	else:
		writelog('[CRITICAL] Unable to create database yql_analyst_summary', 'initializer', p)
		sys.exit('[CRITICAL] Unable to create database yql_analyst_summary')

	if (yql_day_init() == 0):
		writelog('[SUCCESS] Initialized yql_day table', 'initializer', p)
	else:
		writelog('[CRITICAL] Unable to create database yql_day table', 'initializer', p)
		sys.exit('[CRITICAL] Unable to create database yql_day table')

	if (yql_dividends_init() == 0):
		writelog('[SUCCESS] Initialized yql_day table', 'initializer', p)
	else:
		writelog('[CRITICAL] Unable to create database yql_day table', 'initializer', p)
		sys.exit('[CRITICAL] Unable to create database yql_day table')

	if (yql_highlight_init() == 0):
		writelog('[SUCCESS] Initialized yql_highlight table', 'initializer', p)
	else:
		writelog('[CRITICAL] Unable to create database yql_highlight table', 'initializer', p)
		sys.exit('[CRITICAL] Unable to create database yql_highlight table')

	if (yql_real_init() == 0):
		writelog('[SUCCESS] Initialized yql_real table', 'initializer', p)
	else:
		writelog('[CRITICAL] Unable to create database yql_real table', 'initializer', p)
		sys.exit('[CRITICAL] Unable to create database yql_real table')

	term = "quarterly"
	for tick in cname:
		tick = tick.replace(" ","")
		if (amigo_init(tick, "balance-sheet", term) == 0):
			tname = 'amigo_' + tick + '_' + "balance_sheet" + '_' + term
			writelog('[SUCCESS] Initialized ' + tname + ' table', 'initializer', p)
		else:
			writelog('[CRITICAL] Unable to create database ' + tname + ' table', 'initializer', p)
			sys.exit('[CRITICAL] Unable to create database ' + tname + ' table')

		if (amigo_init(tick, "cash-flow", term) == 0):
			tname = 'amigo_' + tick + '_' + "cash_flow" + '_' + term
			writelog('[SUCCESS] Initialized ' + tname + ' table', 'initializer', p)
		else:
			writelog('[CRITICAL] Unable to create database ' + tname + ' table', 'initializer', p)
			sys.exit('[CRITICAL] Unable to create database ' + tname + ' table')

		if (amigo_init(tick, "income-statement", term) == 0):
			tname = 'amigo_' + tick + '_' + "income_statement" + '_' + term
			writelog('[SUCCESS] Initialized ' + tname + ' table', 'initializer', p)
		else:
			writelog('[CRITICAL] Unable to create database ' + tname + ' table', 'initializer', p)
			sys.exit('[CRITICAL] Unable to create database ' + tname + ' table')

		if (yql_analyst_trends_init(tick) == 0):
			writelog('[SUCCESS] Initialized ' + tick + '_yql_analyst_trends', 'initializer', p)
		else:
			writelog('[CRITICAL] Unable to create database ' + tick + '_yql_analyst_trends table', 'initializer', p)
			sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_analyst_trends table')

		if (yql_competitor_init(tick) == 0):
			writelog('[SUCCESS] Initialized ' + tick + '_yql_competitor', 'initializer', p)
		else:
			writelog('[CRITICAL] Unable to create database ' + tick + '_yql_competitor table', 'initializer', p)
			sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_competitor table')

		if (yql_estimates_init(tick) == 0):
			writelog('[SUCCESS] Initialized ' + tick + '_yql_estimates', 'initializer', p)
		else:
			writelog('[CRITICAL] Unable to create database ' + tick + '_yql_estimates table', 'initializer', p)
			sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_estimates table')

		if (yql_growth_init(tick) == 0):
			writelog('[SUCCESS] Initialized ' + tick + '_yql_growth', 'initializer', p)
		else:
			writelog('[CRITICAL] Unable to create database ' + tick + '_yql_growth table', 'initializer', p)
			sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_growth table')

		if (yql_hist_init(tick) == 0):
			writelog('[SUCCESS] Initialized ' + tick + '_yql_hist', 'initializer', p)
		else:
			writelog('[CRITICAL] Unable to create database ' + tick + '_yql_hist table', 'initializer', p)
			sys.exit('[CRITICAL] Unable to create database ' + tick + '_yql_hist table')

	writelog('[SUCCESS] Initializing Database Tables Complete!', 'initializer', p)
示例#35
0
def yql_beta(tick, attempts):
	p = []
	p.append(tick)
	p.append(attempts)
        # Web Scrapping
	try:
		req = Request(
			'http://finance.yahoo.com/q/ks?s=' + tick + '+Key+Statistics',
			data=None,
			headers={
				'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
			}
		)
		html = urlopen(req)
		data = html.read()

		# Find table & Parse
		soup = BeautifulSoup(data, 'html.parser')
		for tag in soup.find_all('sup'):
			tag.replaceWith('')
		table = soup.findAll("table", { "class" : "yfnc_datamodoutline1" })
	except URLError as e:
		writelog('[CRITICAL] URL ERROR Encountered', 'yql_beta', p)
		writelog(e, 'yql_beta', p)
		return 1
	except HTTPError as e:
		writelog('[WARNING] HTTP ERROR Encountered', 'yql_beta', p)
		writelog(e, 'yql_beta', p)
		return 1
	except http.client.IncompleteRead as e:
		writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_beta', p)
		if (attempts < 3):
			r = yql_beta(tick, attempts + 1)
		else:
			writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES', 'yql_beta', p)	
			return 1

		if (r == 0):
			return 0
		else:
			writelog('[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve', 'yql_beta', p)
			return 1

	beta = False
	next = False
	try:
		for ele in table:
			for row in ele.findAll("tr"):
				for col in row.findAll("td"):
					if next == True:
						beta = True
						return (str(col.get_text()))
					if re.match("^Beta:", col.get_text()):
						next = True
	except IndexError as e:
		p = []
		p.append(tick)
		writelog('[WARNING] INDEX ERROR Encountered', 'yql_beta', p)
		writelog(e, 'yql_beta', p)
		return 1
	return 1
示例#36
0
def yql_analyst_summary(tick, attempts):
    p = []
    p.append(tick)
    p.append(attempts)
    # Web Scrapping
    try:
        req = Request(
            'http://finance.yahoo.com/q/ao?s=' + tick + '+Analyst+Opinion',
            data=None,
            headers={
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
            })
        html = urlopen(req)
        data = html.read()
        # Find table & Parse
        soup = BeautifulSoup(data, 'html.parser')

        # Remove subscripts
        for tag in soup.find_all('sup'):
            tag.replaceWith('')

        table = soup.find_all("table",
                              {"class": "yfnc_datamodoutline1 equaltable"})
    except URLError as e:
        writelog('[CRITICAL] URL ERROR Encountered' + str(e),
                 'yql_analyst_summary', p)
        return 1
    except HTTPError as e:
        writelog('[WARNING] HTTP ERROR Encountered ' + str(e),
                 'yql_analyst_summary', p)
        return 1
    except http.client.IncompleteRead as e:
        writelog('[WARNING] HTTP INCOMPLETE ERROR', 'yql_analyst_summary', p)
        if (attempts < 3):
            r = yql_analyst_summary(tick, attempts + 1)
        else:
            writelog('[CRITICAL] HTTP INCOMPLETE ERROR AFTER 3 TRIES',
                     'yql_analyst_summary', p)
            return 1

        if (r == 0):
            return 0
        else:
            writelog(
                '[CRITICAL] HTTP INCOMPLETE READ ERROR - Unable to resolve',
                'yql_analyst_summary', p)
            return 1

    d = 'DELETE FROM yql_analyst_summary WHERE tick = ' + '\'' + tick + '\''
    dbquery(d)

    cn = getconf('analyst_summary')
    if not cn:
        writelog('[CRITICAL] No Configuration File Found',
                 'yql_analyst_summary', p)
        sys.exit('[CRITICAL] No Configuration File Found')
        return 1

    s = 'INSERT INTO yql_analyst_summary (tick, '
    for ele in cn:
        s = s + ele + ', '
    s = s[:-2] + ') VALUES (\'' + tick + '\', '
    ccl = []
    for ele in table:
        for row in ele.findAll("tr"):
            for col in row.findAll("td"):
                if (col.get_text().find(':') == -1):
                    ts = col.get_text()
                    ts = ts.replace("%", "")
                    ccl.append(ts)

    if (len(ccl) == len(cn)):
        for cc in ccl:
            s = s + '\'' + cc + '\', '
        s = s[:-2] + ')'
        dbquery(s)
        return 0
    else:
        return 1