import traceback import datetime import datetime import MySQLdb sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" import mysql_connecter mysql_connecter = mysql_connecter.mysql_connecter() log_obj = set_log.Logger('data_cleaner.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('data_cleaner.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 class data_cleaner(object): def __init__(self): pass def get_data(self, length=100): """每次只会读取100条数据,若是长时间没有清洗过数据了,需要更改这个数值""" sql = r"SELECT `key`, `detail` FROM `monitor` WHERE `parcel_no` <> ''" # LIMIT %s [length,], data = mysql_connecter.connect(sql, dbname='spider', ip='116.62.230.38', user='******', password='******')
@time: 2017/11/28 13:39 -------------------------------- """ import sys import os import datetime import pandas as pd import numpy as np sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('calculation.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('calculation.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 class calculation(object): def __init__(self): pass def earnings_cal(self, fund_code, df, **kwarg): """ :param fund_code: 净值的数列 :param df: :param kwarg: :return: """
import pandas as pd import numpy as np import time import chardet import xlwt from contextlib import closing import pymysql sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf-8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('stock_capital_flow.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('stock_capital_flow.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 import requests_manager requests_manager = requests_manager.requests_manager() import xls_manager xls_manager = xls_manager.xls_manager() print sys.getdefaultencoding() token = '1942f5da9b46b069953c873404aad4b5' class stock_capital_flow(object): def __init__(self): pass
import numpy as np import time import re import time sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" import mysql_connecter import data_cleaner data_cleaner = data_cleaner.data_cleaner() mysql_connecter = mysql_connecter.mysql_connecter() log_obj = set_log.Logger('DF_reader.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('DF_reader.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 with open('df_rename.json') as f: df_rename = json.load(f, encoding='utf8') city_list = [ u'杭州', u'宁波', u'绍兴', u'湖州', u'嘉兴', u'金华', u'衢州', u'台州', u'丽水', u'舟山' ] # 标题行存在如下列标题,则更换 title_replace = { u'杭州萧山': { u'编号': u'地块编号' }, u'嘉兴': {
import sys import os import datetime import pandas as pd import numpy as np import re import xlwt import time sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('report_output.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('report_output.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 import net_value_cal_display import compare_net_value import fund_holdings_display class report_output(object): def __init__(self): self.net_value_cal_display = net_value_cal_display.net_value_cal_display( ) self.compare_net_value = compare_net_value.compare_net_value() self.fund_holdings_display = fund_holdings_display.fund_holdings_display( )
import re import time import json import copy sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import requests_manager requests_manager = requests_manager.requests_manager() import api51 api51 = api51.api51() import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('cal_fitting_net_value.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('cal_fitting_net_value.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 user_key = '1923eae2a3054d4c92a7eb74d7f65396' with open('stock_blacklist.txt', 'r') as f: stock_blacklist = f.read().split('\n') d = { 'prod_code': '000001.SS', 'candle_mode': '0', 'data_count': '1000', 'get_type': 'offset', 'search_direction': '1', 'candle_period': '6',
@time: 2017/4/18 13:54 -------------------------------- """ import sys import os import datetime import pymail import time import csv sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('controller.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('controller.log', if_cleanup=False) # 是否需要在每次运行程序前清空Log文件 file_name = 'NEW.csv' #新公告将储存在这个文件中 class controller(object): def __init__(self): self.pymail = pymail.pymail() def initialize(self): """删除旧的公告发布数据""" if os.path.exists(file_name): os.remove(file_name) def start_spider(self, spider_id):
import pandas as pd import numpy as np from contextlib import closing import pymysql import datetime import xlrd import xlwt from xlutils.copy import copy import re sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('fund_holdings_display.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('fund_holdings_display.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 class fund_holdings_display(object): def __init__(self): sql = """ SELECT `fund_code`, `fund_name` FROM `fund_info` """ with closing( pymysql.connect('10.10.10.15', 'spider', 'jlspider', 'spider',
import datetime import pandas as pd import numpy as np import api51 api51 = api51.api51() from contextlib import closing import pymysql import copy sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('mock_trading.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('mock_trading.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 user_key = '1923eae2a3054d4c92a7eb74d7f65396' # d = { # 'prod_code':'000001.SS', # 'candle_mode':'0', # 'data_count':'1000', # 'get_type':'offset', # 'search_direction':'1', # 'candle_period':'6', # } # json_data = api51.connect(user_key, d) # df = pd.DataFrame(json_data['data']['candle']['000001.SS']) # date_ser = df[0].apply(lambda num:datetime.datetime.strptime(str(num),'%Y%m%d'))
@time: 2017/11/24 11:35 -------------------------------- """ import sys import os import urllib2 import pandas as pd import numpy as np sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('api51.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('api51.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 user_key = '1923eae2a3054d4c92a7eb74d7f65396' class api51(object): def __init__(self): pass def connect(self, appcode, querys, path='/kline'): host = 'http://stock.api51.cn' method = 'GET' #appcode = '343dff93ee6549daab7f1d6b8e244027' #'candle_mode=0&candle_period=1&data_count=10&date=date&end_date=end_date&fields=fields&get_type=offset&min_time=min_time&prod_code=000001.SS&search_direction=1&start_date=start_date' print "api51 querys:\n", '\n'.join(
sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import html_table_reader import PhantomJS_driver import requests_manager requests_manager = requests_manager.requests_manager() PhantomJS_driver = PhantomJS_driver.PhantomJS_driver() html_table_reader = html_table_reader.html_table_reader() import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger(u'crawler全国青少年科技竞赛获奖名单公示.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup(u'crawler全国青少年科技竞赛获奖名单公示.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 class crawler0(object): def __init__(self): pass def get_list(self): resp = requests.get('http://gs.cyscc.org.cn/') bs_obj = bs4.BeautifulSoup(resp.text, 'html.parser') e_table = bs_obj.find('table', class_='styledTable') e_trs = e_table.find_all('tr')[1:]
""" import pymysql as mysql import sys reload(sys) sys.setdefaultencoding('utf8') import set_log import pandas as pd import numpy as np from itertools import chain #import sqlalchemy #import sqlalchemy.ext.declarative #import sqlalchemy.orm log_obj = set_log.Logger('mysql_connecter.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('mysql_connecter.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 class mysql_connecter(object): def __init__(self): pass def connect(self, sql, args=None, host='localhost', user='******', password='******', dbname='spider', charset='utf8'):
import sys import os import datetime import pandas as pd import numpy as np import time from contextlib import closing import pymysql sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('update_excel.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('update_excel.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 class update_excel(object): def __init__(self): pass def get_data(self, code, date1, date2): date1 = date1.strftime('%Y-%m-%d') date2 = date2.strftime('%Y-%m-%d') sql = 'SELECT `crawler_key`, `fund_code`, `value_date`,`accumulative_net_value` FROM `eastmoney_daily_data` WHERE `fund_code` = %s AND (`value_date` BETWEEN "%s" AND "%s")' % ( code, date1, date2) with closing( pymysql.connect('10.10.10.15', 'spider',
@time: 2017/2/21 9:38 -------------------------------- """ import sys import os import math import numpy as np import pandas as pd import matplotlib.pyplot as plt sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('decision_tree.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('decision_tree.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 print(""" 这里使用的是ID3算法来构造决策树 """) class decision_tree(object): def __init__(self, target_type): print("===》需要分类的列为 %s 列" % target_type) self.target_type = target_type
import pandas as pd import requests import selenium.webdriver import string import os import time import sys sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" import csv_report log_obj = set_log.Logger('hnfgw_spider.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('hnfgw_spider.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 key_list = [u'项目名称', u'开发公司', u'所在区域', u'容积率', u'规划总建筑面积(平方米)', u'未售总套数', u'未售总面积(平方米)'] class hnfgw_spider(object): def __init__(self): self.csv_report = csv_report.csv_report() self.headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.8', 'Cache-Control': 'max-age=0', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36', 'Connection': 'keep-alive' }
@Contact: [email protected] @file: Logistic_regression.py @time: 2017/12/28 16:32 -------------------------------- """ import sys import os import pandas as pd import numpy as np sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('Logistic_regression.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('Logistic_regression.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 class Logistic_regression(object): def __init__(self): pass def gradient_ascent(self, df, class_col): alpha = 0.001 max_cycles = 500 target_col = df[class_col].copy() df = df.drop([class_col,], axis=1) for i in range(max_cycles):
import os import traceback import json import pandas as pd import bs4 sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" import driver_manager import requests_manager requests_manager = requests_manager.requests_manager() driver_manager = driver_manager.driver_manager() log_obj = set_log.Logger('crawler.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('crawler.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 class crawler(object): def __init__(self): with open('page_count.json', 'r') as f: self.page_count = json.load(f) self.used_urls = set() def main(self): #s_list = self.get_urls() with open('urls.txt', 'r') as f: s = f.read() s_list = s.split('\n') for s0 in s_list:
@Contact: [email protected] @file: command.py @time: 2017/6/23 13:59 -------------------------------- """ import re import sys import os import sqlite3 sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('command.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('command.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 #'CREAT TABLE Double_Check_URL(`id` INT(10) PRIMARY KEY NOT NULL AUTO_INCREMENT, `insert_time` timestamp not null default current_timestamp, `url` VARCHAR(255))' class command(object): def __init__(self): pass def check_url(self, url): sql = "SELECT * FROM `Double_Check_URL` WHERE `url` = \"%s\"" %url resp = self.connect(sql) m = re.search(r'\([^\(\)]+?\)', str(resp)) if m: return True else:
import cal_fitting_net_value cal_fitting_net_value = cal_fitting_net_value.cal_fitting_net_value() import requests_manager requests_manager = requests_manager.requests_manager() import json import time import calculation calculation = calculation.calculation() sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('compare_net_value.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('compare_net_value.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 user_key = '1923eae2a3054d4c92a7eb74d7f65396' class compare_net_value(object): def __init__(self): pass def get_net_value(self, fund_code, date_str1, date_str2): sql = "SELECT `fund_code`, `value_date`, `accumulative_net_value` FROM `eastmoney_daily_data` " \ "WHERE `fund_code` = '%s' AND `value_date` BETWEEN '%s' AND '%s'" %(fund_code, date_str1, date_str2) with closing(
import announcements_monitor.items import re import traceback import datetime import bs4 import json log_path = r'%s/log/spider_DEBUG(%s).log' % ( os.getcwd(), datetime.datetime.date(datetime.datetime.today())) sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" import csv_report log_obj = set_log.Logger(log_path, set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup(log_path, if_cleanup=False) # 是否需要在每次运行程序前清空Log文件 csv_report = csv_report.csv_report() """ bs = bs4.BeautifulSoup(s,'html.parser') e_trs = bs.find_all('tr') e_trs[0].get_text() """ with open(os.getcwd() + r'\announcements_monitor\spiders\needed_data.txt', 'r') as f: s = f.read() needed_data = s.split(',') needed_data = [s.encode('utf8') for s in needed_data] title_type1 = [ 'parcel_no', 'parcel_location', '用地面积(㎡)', 'offer_area_m2', 'purpose',
""" import sys import os import datetime import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.dates sys.path.append(sys.prefix + "\\Lib\\MyWheels") reload(sys) sys.setdefaultencoding('utf8') import set_log # log_obj.debug(文本) "\x1B[1;32;41m (文本)\x1B[0m" log_obj = set_log.Logger('plot_manager.log', set_log.logging.WARNING, set_log.logging.DEBUG) log_obj.cleanup('plot_manager.log', if_cleanup=True) # 是否需要在每次运行程序前清空Log文件 class plot_manager(object): def __init__(self): pass def two_axis(self, df, ax_cols1, ax_cols2, **kwargs): fig = plt.figure() ax1 = fig.add_subplot(111) if 'x_axis' in kwargs: df = df.set_index(kwargs['x_axis'])