示例#1
0
def thread_fetch(thread_codes, thread_name, logger):
    my = MYSQL(thread_name)
    update_sql = "UPDATE fund SET updated=False WHERE code=%s;"
    code_set = thread_codes
    logger.info("%s => START  CLEAR UPDATED FIELD!" % thread_name)
    my.update_many_data(update_sql, code_set)
    logger.info("%s => FINISH CLEAR UPDATED FIELD!" % thread_name)

    logger.info("%s => START CRAWLER URLS DATA!" % thread_name)
    datas = []
    counter = 1
    for code in thread_codes:
        logger.info("%s => %s => ** %s ** START PRASE DATA ..." %
                    (thread_name, counter, code))
        time.sleep(1)
        url = "%s%s.html" % (WEB_URL, code)
        ##value
        try:
            body = urllib2.urlopen(url).read()
            soup = BeautifulSoup(body, "lxml")
        except Exception, e:
            logger.error("%s => %s => ** %s ** URL OPEN FAIL :%s" %
                         (thread_name, counter, code, str(e)))
        else:
            model = prase_content(soup, code)
            ## append data
            if model:
                datas.append(model.get_model_tuple())
            else:
                logger.error("%s => %s => ** %s ** START PRASE DATA FAIL ..." %
                             (thread_name, counter, code))
        finally:
示例#2
0
def get_codes(style, c):
    codes = []
    if style == "all":
        main_sl = MYSQL("main")
        codes = main_sl.get_datas("select code from fund;")
        codes = [code[0] for code in codes]
        main_sl.close()
    elif style == "patch":
        patch_sl = MYSQL("patch")
        codes = patch_sl.get_datas(
            "select code from fund where updated=False;")
        codes = [code[0] for code in codes]
        patch_sl.close()
    elif style == "one":
        code = c
        codes.append(code)
    return codes
示例#3
0
 def init(self):  #初始化mysql
     if self.cj == '共济':
         try:
             from mysql import MYSQL
             import xb_1 as cv
             self.cv = cv
             self.ms = MYSQL(host=self.host,
                             port=self.port,
                             user=self.user,
                             pwd=self.pwd,
                             db=self.db)
             self.flag = 1
         except:
             self.flag = 0
             self.sinOut.emit(2000)
     elif self.cj == '中联':
         try:
             from mssql import MSSQL
             import zl_1 as cv
             self.cv = cv
             self.ms = MSSQL(host=self.host,
                             port=self.port,
                             user=self.user,
                             pwd=self.pwd,
                             db=self.db)
             self.flag = 1
         except:
             self.flag = 0
             self.sinOut.emit(2001)
     elif self.cj == '栅格':
         try:
             from mysql import MYSQL
             import sg_1 as cv
             self.cv = cv
             self.ms = MYSQL(host=self.host,
                             port=self.port,
                             user=self.user,
                             pwd=self.pwd,
                             db=self.db)
             self.flag = 1
         except:
             self.flag = 0
             self.sinOut.emit(2000)
示例#4
0
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import sqlite3
import re
from mysql import MYSQL

WEB_URL = "http://fund.eastmoney.com/"

fo = open("./allfund.html").read()
regex = re.compile("href=\"http://fund.eastmoney.com/\d+.html")
funds = re.findall(regex, fo)
web_codes = [f.split("/")[3][:-5] for f in funds]
web_codes = list(set(web_codes))
print " >> %4s codes pull from web page !" % len(web_codes)

my = MYSQL("pull")
db_codes_sql = '''select code from fund;'''
db_codes = my.get_datas(db_codes_sql)
db_codes = [code[0] for code in db_codes]
print " >> %4s codes in Database fund !" % len(db_codes)

data_update = []
data_insert = []
for code in web_codes:
    url = "%s%s.html" % (WEB_URL, code)
    if code not in db_codes:
        i = (code, url)
        data_insert.append(i)
    else:
        u = ('niuniu', code)
        data_update.append(u)
from sqlalchemy import create_engine
from datetime import datetime, timedelta
sys.path.append('core')
from mysql import MYSQL


pd.options.mode.chained_assignment = None
db_connection = 'mysql+pymysql://douzi@traffic110:1qaz!QAZ2wsx@[email protected]/traffic'
# db_connection = 'mysql+pymysql://root:@localhost/accident'
conn = create_engine(db_connection)
df = pd.read_sql("""
                 SELECT a.ACCIDENT_NO, ACCIDENTDATE, ACCIDENTTIME, n.Lat, n.Long, a.SPEED_ZONE  FROM accident as a
                        LEFT JOIN node as n ON a.ACCIDENT_NO = n.ACCIDENT_NO
                        """, conn)

ms = MYSQL(host="traffic110.mysql.database.azure.com",user="******",pwd="1qaz!QAZ2wsx@WSX",db="traffic")
# ms = MYSQL(host="localhost",user="******",pwd="",db="accident")

df['ACCIDENTDATE'] = pd.to_datetime(df['ACCIDENTDATE'])
df['ACCIDENTDATE_FOMAT'] = df['ACCIDENTDATE'].dt.strftime('%Y%m%d')
df['ACCIDENTTIME'] = pd.to_datetime(df['ACCIDENTTIME']).dt.strftime('%H:00:00')
df['Datetime'] = pd.to_datetime(df['ACCIDENTDATE'].apply(str) + ' ' + df['ACCIDENTTIME'])
df['Timestrap'] = df['Datetime'].values.astype(np.int64) // 10 ** 9

#https://api.weather.com/v1/geocode/-37.688/144.841/observations/historical.json?apiKey=6532d6454b8aa370768e63d6ba5a832e&startDate=20060101&endDate=20060102&units=e
base_url = 'https://api.weather.com/v1/geocode/'
format_url = '/observations/historical.json?units=e'
api_key = '&apiKey=6532d6454b8aa370768e63d6ba5a832e'

def nearest(items, pivot):
    return min(items, key=lambda x: abs(x - pivot))
示例#6
0
        logger.info("%s => START UPDATE DATABSE ..." % thread_name)
        my.update_many_data(SQL, datas)
    except Exception, e:
        logger.error("%s => UPDATE DATABSE FAIL :%s" % (thread_name, str(e)))
    else:
        logger.info("%s => UPDATE DATABSE SUCCESS!" % thread_name)
    finally:
        logger.info("%s => UPDATE DATABSE FINISH!!!!" % thread_name)
        my.close()


if __name__ == '__main__':
    style = sys.argv[1]
    codes = []
    if style == "all":
        main_sl = MYSQL("main")
        codes = main_sl.get_datas("select code from fund;")
        codes = [code[0] for code in codes]
        main_sl.close()
    elif style == "patch":
        patch_sl = MYSQL("patch")
        codes = patch_sl.get_datas(
            "select code from fund where updated=False;")
        codes = [code[0] for code in codes]
        patch_sl.close()
    elif style == "one":
        code = sys.argv[2]
        codes.append(code)

    total = len(codes)
    threads = []
示例#7
0
import re
from settings import *
from utils import *
import pandas as pd
from mysql import MYSQL
import multiprocessing

data = pd.read_csv('repo.csv', header=0)
repo_names = data[0:-1]['name']
repo_ids = data[0:-1]['id']
user_names = data[0:-1]['owner']
mysql = MYSQL()


# remove emoji
def filter_emoji(text):
    try:
        re_emoji = re.compile(
            u'['
            u'\U0001F300-\U0001F64F'
            u'\U0001F680-\U0001F6FF'
            u'\u2600-\u2B55'
            u'\u23cf'
            u'\u23e9'
            u'\u231a'
            u'\u3030'
            u'\ufe0f'
            u"\U0001F600-\U0001F64F"  # emoticons
            u"\U0001F300-\U0001F5FF"  # symbols & pictographs
            u'\U00010000-\U0010ffff'
            u'\U0001F1E0-\U0001F1FF'  # flags (iOS)
示例#8
0
# -*- coding: utf-8 -*-
from mysql import MYSQL
import operator
my = MYSQL("analyse")
FIELDS = ["one_month", "three_month", "six_month", "one_year", "three_year"]


def query_field(field, condition):
    sql = "select %s from fund where %s;" % (field, condition)
    infos = my.get_datas(sql)
    return infos


def query_infos(type, count, field):
    sql = "select code,owner,name,level from fund where type like '%s' order by %s desc limit %s;" % (
        type, field, count)
    infos = my.get_datas(sql)
    return infos


def analyse_percent(end):
    type = "%债%"
    count = "50"
    codes = {}
    funds = []
    time = "+".join(FIELDS[0:end])
    result = set()
    for field in FIELDS[0:end]:
        infos = query_infos(type, count, field)
        #funds
        for info in infos:
示例#9
0
df_negative['smoke'] = 0
df_negative['dust'] = 0
df_negative['strong_winds'] = 0
df_negative['wind_dir'] = ''
df_negative['wind_speed'] = 0
df_negative['temperature'] = 0
df_negative['SURFACE_COND'] = 0
df_negative['NODE_TYPE'] = ''
df_negative['Deg_Urban_Name'] = ''
df_negative['target'] = 0
df_negative['accident_counts'] = 0
df_negative['date'] = df_negative['timestamp'].dt.strftime('%Y-%m-%d')

# ms = MYSQL(host="localhost",user="******",pwd="",db="accident")
ms = MYSQL(host="traffic110.mysql.database.azure.com",
           user="******",
           pwd="1qaz!QAZ2wsx@WSX",
           db="traffic")

for i in range(df_negative.shape[0]):
    # for i in range(2):
    #     print(df_negative.iloc[i])
    getRoad = ms.ExecQuery("""
                   SELECT SPEED_ZONE, Light_Condition, ROAD_TYPE, DIRECTION_LOCATION, 
                   snowing, raining, foggy, smoke, dust, strong_winds, wind_dir, wind_speed,
                   temperature, SURFACE_COND, NODE_TYPE, Deg_Urban_Name
                   FROM positive_feature WHERE ACCIDENTDATE = '%s' AND Route_No = '%s'
                   LIMIT 1
                   """ % (df_negative['date'][i], df_negative['Route_No'][i]))
    if getRoad:
        df_negative['SPEED_ZONE'][i] = getRoad[0][0]
        #         if
示例#10
0
def fetch(start, end, thread_name, logger):
    my = MYSQL(thread_name)
    for url in urls[start:end]:
        time.sleep(1)
        ##value
        try:
            body = urllib2.urlopen(url).read()
            soup = BeautifulSoup(body, "lxml")
        except Exception, e:
            logger.error("%s => FAIL :%s" % (url, str(e)))
        else:
            try:
                title = soup.find("title")
                model['name'] = title.text.split('(')[0]

                item01 = soup.find("dl", class_="dataItem01")
                model['evaluate_value'] = item01.contents[1].contents[0].text
                model['increase_value'] = item01.contents[1].contents[
                    2].contents[0].text
                model['increase_percent'] = item01.contents[1].contents[
                    2].contents[1].text[:-1]
                model['one_month'] = item01.contents[2].contents[1].text[:-1]
                model['one_year'] = item01.contents[3].contents[1].text[:-1]

                item02 = soup.find("dl", class_="dataItem02")
                model['per_value'] = item02.contents[1].contents[0].text
                model['per_value_percent'] = item02.contents[1].contents[
                    1].text[:-1]
                model['three_month'] = item02.contents[2].contents[1].text[:-1]
                model['three_year'] = item02.contents[3].contents[1].text[:-1]

                item03 = soup.find("dl", class_="dataItem03")
                model['total_value'] = item03.contents[1].contents[0].text
                model['six_month'] = item03.contents[2].contents[1].text[:-1]
                model['till_now'] = item03.contents[3].contents[1].text[:-1]

                tables = soup.find_all("table")
                model['type'] = tables[2].contents[0].contents[0].text.split(
                    "|")[0]
                model['size'] = tables[2].contents[0].contents[1].contents[1][
                    1:]
                model['manager'] = tables[2].contents[0].contents[2].contents[
                    1].text
                model['start_date'] = tables[2].contents[1].contents[
                    0].contents[1][1:]
                model['owner'] = tables[2].contents[1].contents[1].contents[
                    2].text
                #model['level'] = tables[2].contents[1].contents[2].contents[2].text
                level = tables[2].contents[1].contents[2].contents[2].attrs[
                    'class'][0]
                if len(level) > 4:
                    model['level'] = level[4]
                else:
                    model['level'] = 0
            except IndexError, e:
                infoItem = soup.find("div", class_="fundInfoItem")
                model['wan_get'] = infoItem.contents[0].contents[0].contents[
                    1].text
                model['seven_get'] = infoItem.contents[0].contents[2].contents[
                    1].text[:-1]
                model['fourting_get'] = infoItem.contents[0].contents[
                    4].contents[1].text[:-1]
                model['two_eghit_get'] = infoItem.contents[0].contents[
                    6].contents[1].text[:-1]

                model['one_month'] = infoItem.contents[1].contents[0].contents[
                    0].contents[1].text[:-1]
                model['one_year'] = infoItem.contents[1].contents[0].contents[
                    1].contents[1].text[:-1]
                model['three_month'] = infoItem.contents[1].contents[
                    1].contents[0].contents[1].text[:-1]
                model['three_year'] = infoItem.contents[1].contents[
                    1].contents[1].contents[1].text[:-1]
                model['six_month'] = infoItem.contents[1].contents[2].contents[
                    0].contents[1].text[:-1]
                model['till_now'] = infoItem.contents[1].contents[2].contents[
                    1].contents[1].text[:-1]

                tables = soup.find_all("table")
                model['type'] = tables[2].contents[0].contents[0].text.split(
                    "|")[0]
                model['size'] = tables[2].contents[0].contents[1].contents[1][
                    1:]
                model['manager'] = tables[2].contents[0].contents[2].contents[
                    1].text
                model['start_date'] = tables[2].contents[1].contents[
                    0].contents[1][1:]
                model['owner'] = tables[2].contents[1].contents[1].contents[
                    2].text
                #model['level'] = tables[2].contents[1].contents[2].contents[2].text
                level = tables[2].contents[1].contents[2].contents[2].attrs[
                    'class'][0]
                if len(level) > 4:
                    model['level'] = level[4]
                else:
                    model['level'] = 0
            except Exception, e:
                logger.error("%s => FAIL :%s" % (url, str(e)))
示例#11
0
                    model['level'], url)
                ## insert data
                try:
                    my.insert_data(sql)
                except Exception, e:
                    logger.error("%s => FAIL :%s" % (url, str(e)))
                else:
                    logger.debug("%s => OK" % url)
    my.close()


if __name__ == '__main__':
    style = sys.argv[1]
    urls = []
    if style == "all":
        main_sl = MYSQL("main")
        us = main_sl.get_datas("select url from fund;")
        urls = [url[0] for url in us]
        main_sl.close()
    elif style == "patch":
        urls = check(True)
    elif style == "one":
        url = sys.argv[2]
        urls.append(url)

    total = len(urls)
    threads = []
    exec_shell_result("rm -rf /var/log/crawler/*.*")
    for i in range(100):
        my_urls_start = i * 100
        my_urls_end = i * 100 + 100