Пример #1
0
def setup():
  commit('''CREATE TABLE unsubs(id INT NOT NULL AUTO_INCREMENT, \
    hash VARCHAR(8), \
    url VARCHAR(1000), \
    email VARCHAR(150), \
    PRIMARY KEY (id))''')
    
  commit('''CREATE TABLE readmail(id INT NOT NULL AUTO_INCREMENT, \
    email INT, \
    PRIMARY KEY (id))''')
    
  commit('''CREATE TABLE usercount(id INT NOT NULL AUTO_INCREMENT, \
    another INT, \
    PRIMARY KEY (id))''')
    
  commit('''CREATE TABLE analytics(id INT NOT NULL AUTO_INCREMENT, \
    email VARCHAR(150), \
    url VARCHAR(1000), \
    success INT, \
    PRIMARY KEY (id))''')
    
  commit('''CREATE TABLE anonymousanalytics(id INT NOT NULL AUTO_INCREMENT, \
    emailhash VARCHAR(64), \
    unsubhash VARCHAR(8), \
    success INT, \
    stamp DATETIME, \
    PRIMARY KEY (id))''')
Пример #2
0
def deleteAllUnsubs():
    results = fetch('select hash from unsubs')
    log.info('deleting all unsubs with # unsubs ' + str(len(results)))
    if len(results) < 15:
        for r in results:
            hh = r[0]
            commit('delete from unsubs where hash=%s', hh)
Пример #3
0
def handleDB(it):
    for jj in range(10):
        ll, origSet = getFive()
        if not ll:
            if it > 2:
                log.info('empty turning off')
                #time.sleep(120)  # wait for master to finish
                turnOff()
            return
        browser = selenium.getBrowser()
        log.info(str(len(ll)) + str(ll) + str(jj))
        try:
            i = 0
            for uns in ll:
                if i > 6:
                    break
                i += 1
                log.info('hashh', uns.hashh)
                res = unsubscribe(uns, browser)
                if not res:
                    log.info('failed confirmation', uns.hashh)
                    addEmailToSqlAnalytics(uns, False)
                else:
                    log.info('confirmed unsub')
                    commit('insert into usercount (another) values (1)')
                    addEmailToSqlAnalytics(uns, True)
                #browser = selenium.refreshBrowser(browser)
        except Exception as e:
            log.warn(e)
        log.info('deleting from unsubs ' + str(origSet))
        for ss in origSet:
            commit('delete from unsubs where hash=%s', ss)
        selenium.closeBrowser(browser)
Пример #4
0
 def post(self,ids):
     title = self.get_argument('title')
     fl = self.get_argument('fl')
     tag  = self.get_argument('tag')
     content = self.get_argument('comment')
     sql.cur.execute("update blog set title='%s',fl='%s',tag='%s',content='%s' where id = '%s'"%(
         title,fl,tag,content,ids))
     sql.commit()
     self.redirect('/test/%s'%ids)
Пример #5
0
 def post(self, ids):
     title = self.get_argument('title')
     fl = self.get_argument('fl')
     tag = self.get_argument('tag')
     content = self.get_argument('comment')
     sql.cur.execute(
         "update blog set title='%s',fl='%s',tag='%s',content='%s' where id = '%s'"
         % (title, fl, tag, content, ids))
     sql.commit()
     self.redirect('/test/%s' % ids)
Пример #6
0
def deleteReadEmail17days():
  results = fetch('select email from readmail')
  print results[-10:]
  
  start = 6000
  for r in results:
    commit('delete from readmail where email=%s',str(start))
    start += 1
  results = fetch('select email from readmail')
  print results[-10:]
Пример #7
0
def fill_proxypool():
    page = get_webpage()  # 爬取西刺代理网站,注意频度保持在15分钟以上
    # page = test.read('西刺代理.html', 'utf-8') # 测试所用,读取本地网页,避免频繁爬取被封杀
    counter = 1
    for item in parse_page(page):
        print('正在更新:{0:^2} - {1:<16}'.format(counter, item[0]), end='\t')
        if sql.insert(item[0], item[1], item[2], item[3], item[4]):
            counter += 1
        else:
            print('重复插入')
    sql.commit()
    print('\n此次共更新 %d 条代理IP\n' % (counter - 1))
Пример #8
0
def deleteLastReads():
  results = fetch('select email from readmail')
  print results[-10:]
  
  total = len(results)
  
  for r in results[total-150:]:
    commit('delete from readmail where email=%s',r[0])
  results = fetch('select email from readmail')
  print results[-10:]
  
#deleteLastReads()
#deleteReadEmail17days()
Пример #9
0
 def post(self):
     res_list=[]
     ids = int(time.strftime("%Y%m%y%H%M%S")+ str(random.randrange(10000,99999)))
     name = 'admin'
     email = '*****@*****.**'
     title = self.get_argument('title')
     fl = self.get_argument('fl')
     tag  = self.get_argument('tag')
     tms = time.strftime('%Y-%m-%d  %H:%M')
     content = self.get_argument('comment')
     for i in int(ids),name,email,title,fl,tag,tms,content:res_list.append(i)
     sql.cur.execute("insert into blog  values (?,?,?,?,?,?,?,?)",res_list)
     sql.commit()
     self.redirect('/test')
Пример #10
0
def main(argv):

    parser = argparse.ArgumentParser(description='')
    parser.add_argument('files', nargs='+')
    parser.add_argument('--standortid', type=int)
    parser.add_argument('--pages', type=int, default=1)
    args = parser.parse_args(argv)
    
    for full_fn in args.files:
        fn = ntpath.basename(full_fn)
        # look up the file id
        file_id  = sql.fetch('ufiles', fn, id_key='name')
        if not file_id:
            print "File '%s' not found in DB, skipping" % fn
        else:
            data = []
            headers = []
            for page in xrange(args.pages):
                data, headers = p_xls.read_xls_data(full_fn, page)
                lines = 0 # administration: number of succesfully inserted lines
                for row in data:
                    standortid = -1
                    if hasattr(row, 'StandortID'):
                        standortid = getattr(row, 'StandortID')
                    elif args.standortid != None:
                        standortid = args.standortid
                    else:
                       sys.stderr.write('No StandortID found!')
                       exit()

                    rs = sql.fetch_all('temps', {
                        'datum': getattr(row, 'Datum'),
                        'location_id': standortid
                    })
                    if rs != False:# and len(rs) == 1:
                        for i in xrange(len(rs)):
                            if (sql.insert('ufiletemps', {
                                'ufile_id': file_id['id'],
                                'temp_id':  rs[i]['id']
                            })):
                                lines += 1
                            else:
                                print "%d,%d" % (file_id['id'], rs[i]['id'])
                print "Inserted %d/%d of page %d" % (lines, len(data), page)
            sql.commit() # after each file

    return None
Пример #11
0
 def post(self):
     res_list = []
     ids = int(
         time.strftime("%Y%m%y%H%M%S") +
         str(random.randrange(10000, 99999)))
     name = 'admin'
     email = '*****@*****.**'
     title = self.get_argument('title')
     fl = self.get_argument('fl')
     tag = self.get_argument('tag')
     tms = time.strftime('%Y-%m-%d  %H:%M')
     content = self.get_argument('comment')
     for i in int(ids), name, email, title, fl, tag, tms, content:
         res_list.append(i)
     sql.cur.execute("insert into blog  values (?,?,?,?,?,?,?,?)", res_list)
     sql.commit()
     self.redirect('/test')
def ApiList(jmgr, os_target, sql, args):
	sql.connect_table(tables['api_type'])
	sql.connect_table(tables['api_list'])

	api_types = os_target.get_api_types()
	apis = os_target.get_apis()

	sql.delete_record(tables['api_type'])
	for type_id, type_name in api_types.items():
		values = dict()
		values['type'] = type_id
		values['name'] = type_name
		sql.append_record(tables['api_type'], values)

	sql.delete_record(tables['api_list'])
	for values in apis:
		sql.append_record(tables['api_list'], values)

	sql.commit()
Пример #13
0
def anonymousAnalytics(email, unsubhash, success=False):
    digest = hashEmail(email)

    now = str(datetime.datetime.now())
    results = fetch(
        'select unsubhash, success from anonymousanalytics where unsubhash=%s',
        (unsubhash))
    success = int(success)
    if results:
        if int(results[0][1]) == 0 and success:
            commit(
                'update anonymousanalytics set success=1 where unsubhash=%s',
                (unsubhash))
        else:
            log.info('unsub hash is still failing, do not update analytics',
                     unsubhash)
    else:
        commit(
            'insert into anonymousanalytics (emailhash, unsubhash, success, stamp) values (%s, %s, %s, %s)',
            (digest, unsubhash, str(success), now))
def append_api_list(sql, type, id, name):
	sql.connect_table(tables['api_list'])

	retry = True
	while retry:
		res = sql.search_record(tables['api_list'], 'type=' + Table.stringify(type)  + ' and id=' + Table.stringify(id), ['name'])
		if len(res) > 0 and res[0][0]:
			if res[0][0] != name:
				raise Error('duplicate key value (' + Table.stringify(type) + ',' + Table.stringify(id) + ')')
			return

		values = dict()
		values['type'] = type
		values['id'] = id
		values['name'] = name
		retry = False
		try:
			sql.append_record(tables['api_list'], values)
			sql.commit()
		except:
			retry = True
			pass
Пример #15
0
import sql
import pickle
import pprint



content = open('result_array.pkl','rb')
data = pickle.load(content)

#sql.create_Table()

def analyze_link(i):
    str = 'https://ctd-1257758577.cos.ap-guangzhou.myqcloud.com/' + i['addr']
    str = str.replace('\\','/')
    return str


for i in data:
    try:

        sql.insertValue(i['name'],i['author'],i['age'],analyze_link(i))
    except Exception as e:
        print(str(e))
        print(i)
        input('error......')


sql.commit()
Пример #16
0
 def post(self):
     ids = self.get_argument('id_del')
     sql.cur.execute("delete from blog where id=%s"%ids)
     sql.commit()
     self.redirect('/xml')
Пример #17
0
def fullAnalytics(email, url, success):
    s = int(success)
    commit('insert into analytics (email, url, success) values (%s, %s, %s)',
           (email, url, s))
Пример #18
0
    request = session.get(url, headers=headers)
    if request.status_code == 200:
        print('Get page = ', url)
        soup = bs(request.content, 'html.parser')
        posts_list = soup.find_all('a', 'story__title-link',
                                   'story__title-link_visited')
        for post in posts_list:
            url = post['href']
            title_name = post.text
            id = get_id_url(url)
            category = soup.title.text
            post = PostUrl(title=title_name,
                           pika_id=id,
                           url=url,
                           category=category)
        print('parse complete = ', url)
    else:
        print("Page not exist")


def get_id_url(url):
    result = re.findall(r'_(\d+)$', url)[0]
    return result


if __name__ == '__main__':
    for url in url_list:
        sleep(1)
        parse_posts_urls(url)
        commit()
Пример #19
0
    attach_text = p.sub('', attach_text)

    t = re.sub('<[^>]*>', ' ', text)
    t = set([i[1:] for i in t.split() if i.startswith("#")])
    for tag in t:
        add_tag(tag)
        tags.append(tag)
    detect_tags(tags, text)

    for tag in tags:
        if tag in all_tags:
            sql.upsert('blog_post_tags', {'post_id': id, 'tag_id': all_tags[tag]})

    act, pid = sql.upsert('blog_post', {'id': id}, {'datetime': date, 'content': text, 'deleted': False, 'showAds': False})
            
    if  'insert' == act:
        if not dryRun:
            text = re.sub('<[^>]*>', ' ', text)
            post_twitter.send(text, attach_text, "http://mechanicalbear.ru/" + str(id))
            post_facebook.send(id, text, image, video, attach_text)
            post_delicious.send(id, text + ' ' + attach_text, tags)

            if image > 0:
                post_tumblr.send(id, image, date)
                post_flickr.send(id, image, text)
        #else:
    #break

    sql.commit()
sql.close()
Пример #20
0
def wipe():
  commit('''drop table unsubs''')
  commit('''drop table readmail''')
  commit('''drop table usercount''')
  commit('''drop table emailhashespositive''')
  commit('''drop table emailhashestotal''')
  commit('''drop table analytics''')
  setup()
Пример #21
0
def main(argv):
    argparser = argparse.ArgumentParser(description='')
    argparser.add_argument('files', nargs='+')
    args = argparser.parse_args(argv)

    for fn in args.files:
        if isdir(fn): continue

        # read in file
        print 'opening %s' % fn
        f = open(fn, 'r')

        lines_raw = f.readlines()
        raw_id = None
        if sql.insert('raws', { 'data': ''.join(lines_raw), 'filename': basename(fn) }):
            raw_id = sql.lastrowid()
            progress("Added %d to raws" % raw_id)

        # preprocess
        lines = []
        try:
            line_nr = 0
            for line in lines_raw:
                line_nr += 1
                line = line.rstrip('\r\n')
                line = re.split(r'\t|;', line)
                line = preprocess_line(line)
                lines.append(line)
        except:
            print "%d: %s" % (line_nr, line)
            raise

        # add a dummy plant/culture/subspecies, just in case samples can't be connected just yet.
        if not sql.exists('cultures', 1): sql.insert('cultures', {'id': DUMMY_CULTURE_ID, 'name': 'placeholder' })
        if not sql.exists('plants'  , 1): sql.insert('plants'  , {'id': DUMMY_PLANT_ID, 'name': 'placeholder', 'culture_id': DUMMY_CULTURE_ID })
        if not sql.exists('subspecies'  , 1): sql.insert('subspecies'  , {'id': DUMMY_SUBSPECIES_ID, 'species_id': SPECIES_ID })

        # some lines need to be sent back to LIMS, this is where we store them
        lims_lines = []

        # save!
        line_nr = 0
        try:
            for line in lines:
                line_nr += 1
                program_id = get_program_id(line)
                if is_sample_plant(line):
                    line[8] = int(line[8]) # plant_id is still str
                    #save_sample_plant(sample_id=line[7], plant_id=line[8], date=date) # skipped because made redundant when preloading all samples/plants

                    lims_lines.append("\t".join([ str(item) for item in line ]))
                #elif program_id == 1 and is_freshweight_between(line):
                #    lims_lines.append("\t".join([ str(item) for item in line ]))
                else:
                    phenotype = format_line(line) # create a readable program

                    # add the actual phenotype
                    phenotype_id = None
                    if sql.insert('phenotypes', {
                        'version': phenotype['version'],
                        'object' : phenotype['object'],
                        'program_id': phenotype['program_id'],
                        'date': phenotype['date'],
                        'time': phenotype['time'],
                        'entity_id': phenotype['entity_id'],
                        'value_id': phenotype['value_id'],
                        'number': phenotype['number']
                    }):
                        phenotype_id = sql.lastrowid()
                        progress('Added %d to phenotype' % phenotype_id)

                    # if plant, add it to plants, otherwise to samples
                    if ora_sql.is_plant(phenotype['sample_id']) or ora_sql.was_plant(phenotype['sample_id']):
                        sql.insert('phenotype_plants', { 'phenotype_id': phenotype_id, 'plant_id': phenotype['sample_id'] })
                    elif ora_sql.is_sample(phenotype['sample_id']):
                        sql.insert('phenotype_samples', { 'phenotype_id': phenotype_id, 'sample_id': phenotype['sample_id'] })
                    elif ora_sql.is_aliquot(phenotype['sample_id']):
                        sql.insert('phenotype_aliquots', { 'phenotype_id': phenotype_id, 'aliquot_id': phenotype['sample_id'] })
                    else:
                        print "%s NOT found!!" % phenotype['sample_id']

                    sql.insert('phenotype_raws'   , { 'phenotype_id': phenotype_id, 'raw_id': raw_id, 'line_nr': line_nr })
                    if program_id > 1:
                        sql.insert('phenotype_bbches', { 'phenotype_id': phenotype_id, 'bbch_id': phenotype['bbch_id']})
        except:
            progress("%d: %s" % (line_nr, line))
            raise
        
        # save the current saved lines for LIMS
        write_lims_lines(lims_lines, fn)

    sql.commit()
Пример #22
0
 def post(self):
     ids = self.get_argument('id_del')
     sql.cur.execute("delete from blog where id=%s" % ids)
     sql.commit()
     self.redirect('/xml')
Пример #23
0
def main(argv):
    argparser = argparse.ArgumentParser(description='')
    argparser.add_argument('files', nargs='+')
    args = argparser.parse_args(argv)

    for fn in args.files:
        if isdir(fn): continue

        # read in file
        print 'opening %s' % fn
        f = open(fn, 'r')

        lines_raw = f.readlines()
        raw_id = sql.fetch_all('raws', {'filename': basename(fn)})[0]['id']
        progress("Found %d of %s" % (raw_id, fn))

        # preprocess
        lines = []
        try:
            line_nr = 0
            for line in lines_raw:
                line_nr += 1
                line = line.rstrip('\r\n')
                line = re.split(r'\t|;', line)
                line = preprocess_line(line)
                lines.append(line)
        except:
            print "%d: %s" % (line_nr, line)
            raise

        # save!
        line_nr = 0
        try:
            for line in lines:
                line_nr += 1
                phenotype = format_line(line)  # create a readable program

                # add the actual phenotype
                phenotype_id = None
                phenotype_q_params = dict(phenotype.items() + {
                    'entity_id': -12345,
                    'filename': basename(fn)
                }.items())
                del phenotype_q_params['attribute']
                del phenotype_q_params['value']
                phenotype_q_params['pp.plant_id'] = phenotype_q_params.pop(
                    'sample_id')

                q = """
                    select phenotypes.id from phenotypes
                    join phenotype_raws pr on pr.phenotype_id = phenotypes.id
                    join raws r on r.id = pr.raw_id
                    left join phenotype_plants pp on pp.phenotype_id = phenotypes.id
                    where """
                q += ' and '.join(
                    ['%s=%s' % (k, '%s') for k in phenotype_q_params.keys()])
                sql_phenotype = sql.fetch_all(None, phenotype_q_params, q)

                if len(sql_phenotype) == 1:
                    if sql.update('phenotypes', {'id': sql_phenotype[0]['id']},
                                  {'entity_id': phenotype['entity_id']}):
                        phenotype_id = sql.lastrowid()
                        progress('Added %d to phenotype' % phenotype_id)
        except:
            progress("%d: %s" % (line_nr, line))
            raise

    sql.commit()