Пример #1
0
def get_country_news(lat1, lon1, lat2, lon2):
    # Make sure lat1, lon1 is less than lat2, lon2
    if lat1 > lat2:
        temp = lat1
        lat1 = lat2
        lat2 = temp
    if lon1 > lon2:
        temp = lon1
        lon1 = lon2
        lon2 = temp
    
    # Find all countries within the boundaries
    db = mysql.Mysql()
    db.query("SELECT * FROM Country WHERE lat != '-1' AND lon != '-1'")
    locations = []
    while True:
        country = db.fetch()
        if country == {}:
            break
        if not (country['lat'] > lat1 and country['lat'] < lat2 and
           country['lon'] > lon1 and country['lon'] < lon2):
            continue
        country_name = country['countryName']
        country_lat = get_real_latlon(country['lat'])
        country_lon = get_real_latlon(country['lon'])
        locations.append([country_name, country_lat, country_lon])
    return locations
Пример #2
0
def getNewsList():

    db = mysql.Mysql()

    try:
        index = request.args.get('index')

        pageIndex = int(pageCount) * int(index)

        sql = 'select title,time,url from FreeNews order by time desc limit %s,%s' % (pageIndex,pageCount)

        db.queryData(sql)
        result_mysql = db.cur.fetchall()

        result = []

        for item in result_mysql:
            result.append({"title":item[0],"time":item[1],"url":item[2]})

        db.conn.close()

        if len(result) == 0:
            return getBaseReturnValue(data=result,msg="没有更多数据!",code=False)
        else:
            return getBaseReturnValue(data=result,msg='OK',code=True)

    except KeyError,e:
        return getBaseReturnValue(data=[],msg="Error",code=False)
Пример #3
0
 def initialize(self):
     db = self.application.db
     if not db:
         self.application.db = mysql.Mysql()
         db = self.application.db
     assert (db)
     db.open_session()
Пример #4
0
 def main(self, pageIndex):
     f_html = open('houseUrl.txt', 'w')
     #进程池,4个进程并发
     pool = Pool(processes=4)
     #for page in pageIndex:
     #	print pool.apply_async(self.getAllUrl, (pageIndex,))
     poolurl = pool.map(self.getAllUrl, pageIndex)
     urls = []
     #将列表中的子列表合并
     map(urls.extend, poolurl)
     #join()将列表转为字符串
     f_html.write('\n'.join(urls))
     #关闭进程池,进程池不会再创建新的进程
     pool.close()
     #等待进程池中的全部进程执行完毕,防止主进程再worker进程结束前结束
     pool.join()
     f_html.close()
     #连接数据库
     self.mysql = mysql.Mysql()
     #将获取的url存入数据库
     for i in range(0, len(urls)):
         url_dict = {
             "url": urls[i],
         }
         self.mysql.insertData('houseurl', url_dict)
Пример #5
0
 def __init__(self):
     # self.PageNum = 0
     # self.payload = {'start' : self.PageNum , 'type' : 'T'}
     self.TitleList = ['小说']
     # self.title = None
     self.BaseUrl = 'https://book.douban.com/tag/'
     self.mysql = mysql.Mysql()
Пример #6
0
    def output(self, data):
        time_now = datetime.datetime.now()
        data['datetime'] = '\"' + time_now.strftime('%Y-%m-%d %H:%M:%S') + '\"'

        # to csv
        df = pd.DataFrame(data.items(), columns=['item', 'content'])
        bak_file = './result/output_%s.csv' % time_now.strftime(
            '%Y-%m-%d-%H-%M-%S')
        df.to_csv(bak_file,
                  index=False,
                  header=False,
                  sep='\t',
                  mode='wb',
                  encoding='utf-8')  # for bak_up
        df.to_csv('./result/output.csv',
                  index=False,
                  header=False,
                  sep='\t',
                  mode='wb',
                  encoding='utf-8')
        logger.info('Bak_file: %s' % bak_file)

        # to mysql
        sql = mysql.Mysql()
        sql.insert_data(table_name, data)
        logger.info('Output to mysql is done.')
Пример #7
0
def demo01(request):
    db = mysql.Mysql()
    sql = "show databases sdf;"
    params = {}
    params['sql'] = sql
    res = db.exec_query(params)
    jsonStr = json.dumps(res, cls=MyEncoder)
    return HttpResponse(jsonStr)
Пример #8
0
class ProxyMiddleware(object):

    mysql = mysql.Mysql()

    def process_request(self, request, spider):
        AllPro = self.mysql.selectData()
        length = len(AllPro)
        print length
        print '22222222222222222222'
        n = random.randint(0, length - 1)
        pro = AllPro[n][1]
        print pro
        request.meta['proxy'] = pro
Пример #9
0
def demo02(request):
    # 初始化db --- 目前还不能指定不同的mysql,需要改进
    db = mysql.Mysql()
    # 写你的sql语句
    #sql = "select * from account_bkuser"
    # 定义参数
    params = {}
    params['sql'] = ''
    # 使用db的get_all(),获取所有的结果
    res = db.query_all(params)
    # 将获取的结果转换成json格式,通过http返回
    jsonStr = json.dumps(res, cls=MyEncoder)
    return HttpResponse(jsonStr)
Пример #10
0
def load_subcategories_news(country_id, subcategory_id):
    db = mysql.Mysql()
    news_url = news_url_prefix + str(subcategory_id) + news_url_suffix
    json_response = urllib2.urlopen(news_url)
    news = json.loads(json_response.read())
    news = news['articles']
    for article in news:
        title = db.escape_string(article['title'])
        publish_date = db.escape_string(article['publish_date'])
        url = db.escape_string(article['url'])
        query = "INSERT INTO Article (title, minZoom, locId, publishDate, url) \
            VALUES('" + title + "', '2', '" + str(
            country_id) + "', '" + publish_date + "', '" + url + "')"
        db.query(query)
Пример #11
0
def get_country_article_list(country_name):
    db = mysql.Mysql()
    country_name = db.escape_string(country_name)
    db.query("SELECT id FROM Country WHERE countryName='"+country_name+"'")
    country = db.fetch()
    country_id = country['id']
    db.query("SELECT * FROM Article WHERE minZoom='2' AND locId='"+str(country_id)+"'")
    article_list = []
    while True:
        article = db.fetch()
        if article == {}:
            break
        article_list.append(article)
    return article_list
Пример #12
0
    def run(self):
        gpioFd = []
        # 创建数据库对象
        gpioDB = mysql.Mysql(cfg.getLocalIp(), int(cfg.getLocalPort()),
                             cfg.getLocalUser(), cfg.getLocalPassword(),
                             cfg.getLocalDatabase(), cfg.getLocalTable())
        # 连接数据库
        gpioDB.connectDatabase()
        epoll = select.epoll()
        # 初始化,更新一次本地所有的gpio的状态
        for gpioIndex in range(8):
            # 更新gpio口状态
            # 申请gpio
            gpio.gpioExport(gpioTuple[gpioIndex])
            # 设置为输入
            gpio.setInput(gpioTuple[gpioIndex])
            # 设置gpio的中断触发方式为双边沿触发
            gpio.setEdge(gpioTuple[gpioIndex], 'both')
            # 插入gpio的状态到sqlserver
            gpioDB.insertInto(gpioIndex, '%d' % (gpioIndex + 1),
                              'gpio%d' % (gpioIndex + 1),
                              gpio.getInputValue(gpioTuple[gpioIndex]),
                              gpioIndex)
            # 获取value的文件,后面的操作,文件都是一直打开的,避免重复打开关闭文件带来的时间上的浪费
            f = gpio.gpioInputFile(gpioTuple[gpioIndex])
            gpioFd.append(f)
            #print ('fileno: %d' % f.fileno())
            # 注册到epoll中
            epoll.register(f, select.EPOLLERR | select.EPOLLPRI)

        while True:
            events = epoll.poll()
            for fileno, event in events:
                # 有数据要读取
                if event & select.EPOLLPRI:
                    # 循环检测哪个gpio的数据变化了,要读取
                    for i in range(8):
                        if fileno == gpioFd[i].fileno():
                            value = gpioFd[i].read().strip('\n')
                            # print ('f: %s' % value)
                            # 文件指针移动到文件开头
                            gpioFd[i].seek(0, 0)
                            # 保存到mysql中
                            gpioDB.insertInto(1, '%d' % (gpioIndex + 1),
                                              'gpio%d' % (i + 1), value,
                                              gpioIndex)
                            break

            if event & select.EPOLLERR:
                pass
Пример #13
0
    def ready(self):
        self.mysql = mysql.Mysql()
        AllNews = self.mysql.selectData('new')
        content = ''
        for n in AllNews:
            title = n[1].encode('utf-8')
            url = n[2].encode('utf-8')
            each_line = '<a href="%s">%s</a><br><br>' % (url, title)
            content = content + each_line

        sql = "select value from config where field='laoju'"
        laoju = self.mysql.NoemalSelect(sql)
        str_laoju = laoju[0][0].split(",")
        content = content + "<h5>%s<br><br>%s<br><br>%s</h5>" %(str_laoju[0].encode('utf-8'), str_laoju[1].encode('utf-8'), str_laoju[2].encode('utf-8'))

        return content
Пример #14
0
 def run(self):
     gpioDB = mysql.Mysql(cfg.getLocalIp(), int(cfg.getLocalPort()),
                          cfg.getLocalUser(), cfg.getLocalPassword(),
                          cfg.getLocalDatabase(), cfg.getLocalTable())
     gpioDB.connectDatabase()
     #sqlserver = cdll.LoadLibrary(os.getcwd() + '/libsqlserver.so')
     sqlserver = cdll.LoadLibrary('/usr/share/gpio/libsqlserver.so')
     while True:
         if self.first_connect or self.upload_result == False:
             #print ('%s:%s' % (cfg.getRemoteIp(), cfg.getRemotePort()))
             if sqlserver.openSqlserver(
                     cfg.getRemoteUser().encode(),
                     cfg.getRemotePassword().encode(),
                     cfg.getRemoteDatabase().encode(),
                 ('%s:%s' %
                  (cfg.getRemoteIp(), cfg.getRemotePort())).encode(),
                     cfg.getRemoteTable().encode()) == 0:
                 time.sleep(1)
                 continue
             self.first_connect = False
             self.upload_result = True
         # 获取目前的时间
         timeLocal = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
         # 获取本地数据库中,更新时间小于当前时间的数据
         results = gpioDB.selectByUpdateTime(timeLocal)
         # 得到的tuple顺序是反的,所以运行调整顺序
         #            print('upload time ' + configureInotify.get_section_value('remote', 'interval_uploadData'))
         for res in results[::-1]:
             print(res)
             print(res[0])
             # 转换成时间戳
             t = res[5].timestamp()
             # 更新
             mutexLock.acquire()
             if sqlserver.insertInto(res[1], res[2].encode(),
                                     res[3].encode(), res[4].encode(),
                                     int(t), res[6]) == 0:
                 mutexLock.release()
                 # 更新数据失败,从新链接sqlserver
                 self.upload_result = False
                 continue
             mutexLock.release()
             # 更新完成的数据在本地删除
             gpioDB.deleteByRealDateLogId(res[0])
         time.sleep(int(cfg.getUploadInterval()))
Пример #15
0
def getSearchList():

    db = mysql.Mysql()

    try:
        keyword = request.args.get('keyword')

        sql = "select title,url from FreeVideo where title like '%"+ keyword +"%' limit 0,20"

        db.queryData(sql)
        result_mysql = db.cur.fetchall()
        result = []
        for item in result_mysql:
            result.append({"title":item[0],"url":item[1]})

        db.conn.close()

        return getBaseReturnValue(data=result,msg='OK',code=True)
    except KeyError,e:
        return getBaseReturnValue(data=[],msg="Error",code=False)
Пример #16
0
def get_country_categories(url):
    db = mysql.Mysql()
    country_categories = {}
    country_categories = {}
    json_response = urllib2.urlopen(url).read()
    subcategories = json.loads(json_response)
    for subcategory in subcategories:
        subcategory_name = subcategory['english_subcategory_name']
        db.query("SELECT id, countryName FROM Country")
        for country in db.iterate_rows():
            if country['countryName'] in subcategory_name:
                country_categories[
                    country['id']] = subcategory['subcategory_id']
                break
    db.query("SELECT id, countryName FROM Country")
    uncategorized_countries = {}
    for country in db.iterate_rows():
        if country['id'] not in country_categories:
            uncategorized_countries[country['id']] = country['countryName']
    return country_categories, uncategorized_countries
Пример #17
0
def load_uncategorized_countries_news(country_id, country_name):
    db = mysql.Mysql()
    country_name = country_name.replace('.', '')
    url = uncategorized_news_url + urllib.quote_plus(country_name)
    try:
        json_response = urllib2.urlopen(url).read()
    except Exception as e:
        print e
        print 'Counry Name: ' + country_name
        print 'URL: ' + url
    articles = json.loads(json_response)
    articles = articles['articles']
    for article in articles:
        title = db.escape_string(article['title'])
        publish_date = db.escape_string(article['publish_date'])
        url = db.escape_string(article['url'])
        query = "INSERT INTO Article (title, minZoom, locId, publishDate, url) \
            VALUES('" + title + "', '2', '" + str(
            country_id) + "', '" + publish_date + "', '" + url + "')"
        db.query(query)
Пример #18
0
class DoubanSpider(scrapy.Spider):
    mysql = mysql.Mysql()
    name = "douban"
    headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, sdch, br",
            "Accept-Language": "zh-CN,zh;q=0.8",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Content-Type":" application/x-www-form-urlencoded; charset=UTF-8",
            'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
            'Host': "www.douban.com",
            "Upgrade-Insecure-Requests": '1',
            'Referer': "https://www.douban.com",
            }

    def start_requests(self):
        # AllPro = self.mysql.selectData()
        # print AllPro[0][1]
        # print '111111111111111111111111111111111111111111'
        # print AllPro[0][1]
        urls = [
            'https://www.douban.com/',
        ]
        url = 'https://www.douban.com/'
        url2 = 'https://www.baidu.com/'
        # for url in urls:
        yield scrapy.Request(url=url, headers=self.headers, callback=self.parse, dont_filter = True)
        yield scrapy.Request(url=url2, headers=self.headers, callback=self.parse, dont_filter = True)
        
        # # 手动设置成功
        # for url in urls:
        #     yield scrapy.Request(url=url, meta={'proxy': AllPro[0][1]}, headers=self.headers, callback=self.parse)

        # 尝试用下载中间件
        
    def parse(self, response):
        print response.body
Пример #19
0
def query(zoom, query):
    db = mysql.Mysql()
    if zoom not in zooms:
        abort(404)
    zoom = db.escape_string(zoom)
    query = db.escape_string(query)
    depth = zoom_to_depth[zoom]
    
    # Get the location_ids
    db.query("SELECT id FROM "+depth+" WHERE "+depth.lower()+"Name = '"+query+"'")
    location_id = db.fetch()
    location_id = str(location_id['id'])
    
    # Get the articles
    db.query("SELECT id, title, minZoom, publishDate, url FROM Article WHERE \
        minZoom='"+zoom+"' AND locId='"+location_id+"'")
    articles = []
    while True:
        article = db.fetch()
        if article == {}:
            break
        articles.append(article)
    return json.dumps(articles)
Пример #20
0
def getMediaList():

    db = mysql.Mysql()

    try:
        index = request.args.get('index')
        leve1 = request.args.get('leve1')
        leve2 = request.args.get('leve2')

        result = []

        pageIndex = int(pageCount) * int(index)

        if leve2 == "":
            sql = 'select title,url from FreeVideo where leve1="%s" limit %s,%s' % (leve1,pageIndex,pageCount)
        else:
            sql = 'select title,url from FreeVideo where leve2="%s" limit %s,%s' % (leve2,pageIndex,pageCount)

        db.queryData(sql=sql)

        result_mysql = db.cur.fetchall()

        for item in  result_mysql:
            ji = {"title": item[0],"url":item[1]}
            result.append(ji)

        db.conn.close()

        if len(result) == 0:
            return getBaseReturnValue(data=result,msg="没有更多数据!",code=False)
        else:
            return getBaseReturnValue(data=result,msg='OK',code=True)

    except KeyError,e:
        print(e)
        return getBaseReturnValue(data=[],msg="Error",code=False)
Пример #21
0
                        elif quarter == 2:
                            if soup_table[i].a:
                                insert_item = insert_item + "'" + soup_table[i].a.string[7:17] + "'"
                            else:
                                insert_item = insert_item + "'" + soup_table[i].string[8:18] + "'"
                        else:
                            insert_item = insert_item + "'" + soup_table[i].string[8:18] + "'"
                    else:
                        insert_item = insert_item + "'" + soup_table[i].string[8:18] + "'"
                    for j in range(1,8):
                        insert_item = insert_item + ", '"+ soup_table[i+j].string + "'"
                    sql.insert(table_name, insert_columns, insert_item)
                    sql.conn.commit()
    sql.close_conn()
                    
if __name__ == '__main__':
    #id_securities = get_id_security(db_host, db_user, db_passwd, db_name)
    id_securities = ['300388']
    sql = mysql.Mysql(db_host, db_user, db_passwd, db_name, charset_type)
    unit_action(id_securities, year_today, quarter_today, sql)
    #unit_test(600053, year_today, quarter_today, sql)
    
    '''
    002752 300208 300219 3
    Traceback (most recent call last):
  File "dailyprice_create_sql.py", line 160, in <module>
    unit_action(id_securities[2372:], year_today, quarter_today, sql)
  File "dailyprice_create_sql.py", line 128, in unit_action
    year_start = soup_table[0].find_all('option')[-5].string  # 获得年份
IndexError: list index out of range
'''
Пример #22
0
def query():
    db = mysql.Mysql()
    return jsonify(db.queryData())
Пример #23
0
"""
Updates the list of video files on the local computer

"""
import mysql
import os
from comm import Comm

databaseConnect = mysql.Mysql()
databaseConnectDelete = mysql.Mysql()
comminstance = Comm()

#Get the list of local directories
query = "SELECT directory FROM directories WHERE server='localfiles'"
databaseConnect.query(query)
localDirectory = databaseConnect.fetch()
directoryList = []
while localDirectory != {}:
    directoryList.append(localDirectory['directory'])
    localDirectory = databaseConnect.fetch()

#Get the list of local files and make sure they are in the database
for directory in directoryList:
    fileNameList = [
        f for f in os.listdir(directory)
        if os.path.isfile(os.path.join(directory, f))
    ]
    fileNameList.sort()
    for fileName in fileNameList:
        query = "SELECT * FROM localfiles WHERE filedirectory='" + databaseConnect.escape_string(
            directory) + "' AND filename='" + databaseConnect.escape_string(
Пример #24
0
class PoolSpider(scrapy.Spider):
    identity = 0
    tool = Tool()
    mysql = mysql.Mysql()
    name = "pool"
    headers = {
        "Accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate, sdch, br",
        "Accept-Language": "zh-CN,zh;q=0.8",
        "Cache-Control": "max-age=0",
        "Connection": "keep-alive",
        "Content-Type": " application/x-www-form-urlencoded; charset=UTF-8",
        'User-Agent':
        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0",
        'Host': "www.xicidaili.com",
        "Upgrade-Insecure-Requests": '1',
        'Referer': "http://www.xicidaili.com/nn/1",
    }

    def start_requests(self):
        urls = 'http://www.xicidaili.com/nn/'
        # urls = 'http://www.kuaidaili.com/proxylist/1/'
        self.mysql.clearnIp()
        for i in range(1, 11):
            print i
            yield scrapy.Request(url=urls + str(i),
                                 headers=self.headers,
                                 callback=self.parse)

        # return self.test()

        # # urls = 'http://www.xicidaili.com/nn/'
        # # choice = raw_input('(T)est (C)heck (R)un\n')
        # # if choice == 'T':
        # #     # fp = open('usefulIp.txt' , 'a')
        # #     # fp.write('')
        # #     # fp.close()
        # #     self.mysql.clearnUsefulIp()
        # #     return self.test()
        # # elif choice == 'R':
        # #     return self.choice()
        # # elif choice == 'C':
        # #     return self.check()
        # # else:
        # #     print 'Wrong input'

        # for i in range(1,11):
        #     print i
        #     yield scrapy.Request(url=urls + str(i), headers=self.headers, callback=self.parse)

    # def choice(self):
    #     urls = 'http://www.xicidaili.com/nn/'
    #     # urls = 'http://www.kuaidaili.com/proxylist/1/'
    #     self.mysql.clearnIp()
    #     for i in range(1,11):
    #         print i
    #         yield scrapy.Request(url=urls + str(i), headers=self.headers, callback=self.parse)

    def parse(self, response):

        # good
        ip = response.xpath(
            '//td[(((count(preceding-sibling::*) + 1) = 3) and parent::*)] | //td[(((count(preceding-sibling::*) + 1) = 2) and parent::*)]'
        ).extract()
        ips = []
        for i in ip:
            ips.append(self.tool.replace(i))

        # identity = 0
        if ip:
            while ips != []:
                port = ips.pop()
                Anip = ips.pop()
                if self.mysql.insertData(self.identity, Anip, port):
                    pass
                    # print u"保存ip成功"
                else:
                    print u"保存ip失败"
                self.identity += 1

        # filename = 'ipPool.txt'
        # with open(filename, 'w') as f:
        #     count = 1
        #     for eachIp in ips:
        #         f.write(eachIp),
        #         # f.write('\t')
        #         if count%2 == 0:
        #             f.write('\n')
        #         else:
        #             f.write('\t')
        #         count += 1
        #
        # 以上为把爬到的ip写入文件中

    def test(self):
        self.mysql.clearnUsefulIp()
        headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, sdch, br",
            "Accept-Language": "zh-CN,zh;q=0.8",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Content-Type":
            " application/x-www-form-urlencoded; charset=UTF-8",
            'User-Agent':
            "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0",
            'Host': "www.baidu.com",
            "Upgrade-Insecure-Requests": '1',
            # 'Referer': "http://www.xicidaili.com/nn/1",
        }
        url = 'https://www.baidu.com/'

        # fp = open('ipPool.txt' , 'r')
        # AnIp = fp.readlines()
        # fp.close()

        AllIp = self.mysql.selectData()

        proxys = []
        for p in AllIp:
            # TheIp = p.strip('\n').split('\t')
            # # print TheIp[0],TheIp[1]
            pro = 'http://' + str(p[1]) + ':' + str(p[2])
            try:
                print pro
                yield scrapy.Request(url=url,
                                     headers=headers,
                                     meta={
                                         'proxy': pro,
                                         'download_timeout': 10
                                     },
                                     callback=self.test_parse,
                                     dont_filter=True)
            except:
                print 'next'
        # sys.exit(1)
        # 以下为尝试并把能用的ip代理写入一个新的文件

    #
    def test_parse(self, response):
        print '6666666666666666'
        # fp = open('usefulIp.txt' , 'a')
        # fp.write(response.meta['proxy'])
        # fp.write('\n')
        # print response.meta['proxy']
        proxy = response.meta['proxy']
        if self.mysql.usefulIp(self.identity, proxy):
            pass
            # print u"保存ip成功"
        else:
            print u"保存ip失败"
        self.identity += 1

    #
    # @classmethod
    # def from_crawler(cls, crawler, *args, **kwargs):
    #     spider = super(PoolSpider, cls).from_crawler(crawler, *args, **kwargs)
    #     crawler.signals.connect(spider.spider_idle, signal=signals.spider_idle)
    #     return spider
    #
    # def spider_idle(self, spider):
    #     self.test()
    #     print 'here'
    #     self.mysql.clearnUsefulIp()

    def check(self):
        url = 'https://www.baidu.com/'
        headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, sdch, br",
            "Accept-Language": "zh-CN,zh;q=0.8",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Content-Type":
            " application/x-www-form-urlencoded; charset=UTF-8",
            'User-Agent':
            "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0",
            'Host': "www.baidu.com",
            "Upgrade-Insecure-Requests": '1',
            # 'Referer': "http://www.xicidaili.com/nn/1",
        }
        UsefulIp = self.mysql.selectip()
        for eachIp in UsefulIp:
            pro = eachIp[1]
            print pro
            try:
                yield scrapy.Request(url=url,
                                     headers=headers,
                                     meta={'proxy': pro},
                                     callback=self.check_parse,
                                     dont_filter=True)
            except:
                pass
                identity = eachIp[0]
                self.mysql.delete(identity)

    def check_parse(self, response):
        print response.meta['proxy']
        print 'OK'
Пример #25
0
def get_article(article_id):
    db = mysql.Mysql()
    article_id = db.escape_string(article_id)
    db.query("SELECT * FROM Article WHERE id='"+article_id+"';")
    article = db.fetch()
    return article
Пример #26
0
def main():
    print 'Backup starting on %s' % str(datetime.datetime.now())

    # Start the counter
    start = int(time.time())

    # Create temporary directory
    temp_dir = mkdtemp()

    # MySQL
    if config.mysql['enabled']:
        print '\n[MySQL]'

        i = 0

        for settings in config.mysql['servers']:
            instance = mysql.Mysql(settings)

            instance_dir = os.path.join(temp_dir, 'db/mysql/instance', str(i))
            dump_filepath = os.path.join(instance_dir, 'all_databases.sql')

            os.makedirs(instance_dir)

            print '\tInstance %s' % str(i)

            for db in instance.get_db_list():
                print '\t\t%s' % db

            instance.save(dump_filepath)

            print '\n\tDump : %s (%s MB)' % (os.path.basename(dump_filepath), str(os.path.getsize(dump_filepath) / 1000000))

            i += 1

    # ... the rest

    print '\n[Archiving]'

    # Create output_dir if it does not exist
    if not os.path.isdir(config.output_dir):
        try:
            os.makedirs(config.output_dir)
        except:
            print 'The backup directory %s does not exist and could not be created. Exiting.' % config.output_dir
            sys.exit(1)

    # Create the tar archive
    print '\tCreating directory'
    tar_file = os.path.join(config.output_dir, config.archive_prefix + str(datetime.date.today()) + ".tar.gz")
    tar = tarfile.open(tar_file, 'w:gz')

    # Add databases into the archive
    if config.mysql['enabled']:
        print '\tAdding database dumps'
        tar.add(temp_dir)

    # Add all the directories from config into the archive
    if len(config.dirs_to_backup) > 0:
        print '\tAdding directories from configuration'
        for directory in config.dirs_to_backup:
            print '\t\t%s' % directory
            tar.add(directory)

    tar.close()

    shutil.rmtree(temp_dir)

    print '\n\tArchive : %s, size %s MB.' % (os.path.basename(tar_file), str(os.path.getsize(tar_file) / 1000000))

    print '\n[Upload]'

    # FTP
    if config.ftp['enabled']:
        for instance in config.ftp['servers']:
            try:
                print '\tUploading to %s' % instance['host']
                upload_time = ftp.upload(instance, tar_file)

                minutes = upload_time / 60
                seconds = upload_time - minutes * 60
                
                print '\t\tUpload completed in %s min %s s.' % (str(minutes), str(seconds))
            except Exception as e:
                print 'Error during upload : %s\n' % e.message

    # Swift
    if config.swift['enabled']:
        for server in config.swift['servers']:
            try:
                container = swift.SwiftContainer(
                    server['authurl'],
                    server['auth_version'],
                    server['user'],
                    server['key'],
                    server['tenant_name'],
                    server['container_name'],
                    server['autocreate'])

                print '\tUploading to Swift server %s' % server['name']
                swift_upload_start = int(time.time())

                container.upload(tar_file)

                swift_upload_end = int(time.time())

                minutes = (swift_upload_end - swift_upload_start) / 60
                seconds = (swift_upload_end - swift_upload_start) - minutes * 60
                print '\t\tUpload completed in %s min %s s.' % (str(minutes), str(seconds))
                
            except Exception as e:
                print 'Error during upload : %s\n' % e.message


    print '\n[Delete old backups]'

    archives_deleted = False

    for archive in os.listdir(config.output_dir):
        archive_info = os.stat(os.path.join(config.output_dir, archive))
        
        # If archive is older than keeptime (in days)...
        if time.mktime(time.gmtime()) - archive_info.st_mtime > (config.keeptime * 24 * 60 * 60):
            print '\tDeleting %s' % archive
            
            # Remove local copy
            print '\t\tLocal copy'
            try:
                os.remove(config.output_dir + '/' + archive)
            except Exception as e:
                print 'Error during removal : %s\n' % e.message

            # Remove FTP copy
            if config.ftp['enabled']:
                for instance in config.ftp['servers']:
                    try:
                        print '\t\tFrom FTP server %s' % instance['host']
                        ftp.delete_remote(instance, archive)
                    except Exception as e:
                        print 'Error during deletion : %s\n' % e.message
            
            if config.swift['enabled']:
                for server in config.swift['servers']:
                    try:
                        print '\t\tFrom Swift server %s' % server['name']
                        container = swift.SwiftContainer(
                            server['authurl'],
                            server['auth_version'],
                            server['user'],
                            server['key'],
                            server['tenant_name'],
                            server['container_name'])

                        container.delete(archive)
                    except Exception as e:
                        print 'Error during deletion : %s\n' % e.message


            archives_deleted = True

    if not archives_deleted:
        print '\tNo archives were deleted.'

    # Stop the counter
    end = int(time.time())

    print '\nBackup ending on %s' % str(datetime.datetime.now())

    minutes = (end - start) / 60
    seconds = (end - start) - minutes * 60
    print 'Time elapsed : %s min %s s.' % (str(minutes), str(seconds))
Пример #27
0
    * ISO 3166 Country Code
    * Country Name
    * Latitude
    * Longitude 
    
e.g.:
"AF","Afghanistan",33,65

@author: Albert Wang
@date: 10/24/2010
"""

import csv
import mysql

mysql_connection = mysql.Mysql()

csv_file_location = "./average-latitude-longitude-countries.csv"
csv_file_handle = open(csv_file_location, 'r')
csv_file_reader = csv.reader(csv_file_handle, delimiter=',')

# Read the city data line by line
for line in csv_file_reader:
    country_code = mysql_connection.escape_string(line[0])
    name = mysql_connection.escape_string(line[1])
    latitude = mysql_connection.escape_string(line[2])
    longitude = mysql_connection.escape_string(line[3])

    # Insert data into database
    query = "INSERT INTO countries (code, name, latitude, longitude)\
    VALUES('" + country_code + "', '" + name + "', '" + latitude + "', '" + longitude + "')"
Пример #28
0
#!/usr/bin/python
# -*- coding: utf-8 -*- #
import requests, sys, re
import MySQLdb
import mysql

from bs4 import BeautifulSoup

reload(sys)
sys.setdefaultencoding('utf-8')

mysql = mysql.Mysql()

print '正在从活动家抓取数据......'
headers = {
    'content-type':
    'application/json',
    'Host':
    'www.huodongjia.com',
    'Referer':
    'https://www.huodongjia.com/it/',
    'Upgrade-Insecure-Requests':
    '1',
    'User-Agent':
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
}
for page in range(150):
    # url='https://www.huodongjia.com/beijing/it/page-9'
    url = 'https://www.huodongjia.com/beijing/it/page-' + str(page + 1) + '/'
    print '---------------------------正在爬取第' + str(
        page + 1) + '页......--------------------------------'
Пример #29
0
"""
This script loads cities into the database
"""
import csv

import pycountry
import mysql

db = mysql.Mysql()

# Empty all the relevant tables so we can start from scratch
db.query("TRUNCATE TABLE City")
db.query("TRUNCATE TABLE CityAlias")

# Load cities
# Country,City,AccentCity,Region,Population,Latitude,Longitude
csv = csv.reader(open('worldcitiespop.txt', 'rb'))
for row in csv:
    print row[1], " in ", row[0]
    if row[4] <= 1000000 or row[4] == '':
        continue
    try:
        states = pycountry.subdivisions.get(country_code=row[0].upper())
    except:
        continue
    for state in states:
        if state.code[3:] == row[3]:
            break
    db.query("SELECT id FROM State WHERE stateName='" +
             db.escape_string(state.name) + "'")
    state_id = str(db.fetch()['id'])
Пример #30
0
 def __init__(self):
     self.page_num = 1
     self.total_num = None
     self.page_spider = page.Page()
     self.mysql = mysql.Mysql()