def get_tracking_by_courier(courier_link): url = courier_link r = requests.get(url) data = r.text soup = BeautifulSoup(data) recent = soup.find_all('li',{'class':'checkpoint'}) if len(recent) <= 0: status_text = soup.find('p',{'id':'status-text'}) print status_text if status_text: return 0 return None recent = recent[0] place = recent.find('div',{'class':'checkpoint__content'}).find('div',{'class':'hint'}).get_text() datetime = recent.find('div',{'class':'checkpoint__time'}) date = datetime.find('strong').get_text() tag = soup.find('p',{'class':'tag'}).get_text() time = datetime.find('div',{'class':'hint'}).get_text() if tag == "In Transit": tag_th = u"กำลังจัดส่ง" elif tag == "Delivered": tag_th = u"ผู้รับได้รับเรียบร้อย" elif tag == "Out For Delivery": tag_th = u"เตรียมการนำจ่าย" elif tag == "Info Received": tag_th = u"รับเข้าระบบ" else: tag_th = u"" time = datetime.find('div',{'class':'hint'}).get_text() return {"place": place, "date":date, "time":time, "tag":tag, "tag_th" :tag_th}
def cvt_dt(self, dt): if type(dt) == str: if dt.upper() == "FIRST": return dt.datetime.fromtimestamp(0) elif dt.find('/') >= 0: return dt.datetime.strptime(dt, self.time_fmt) else: return dt.datetime.fromtimestamp(float(start)) elif type(dt) == float or type(dt) == int: return dt.datetime.fromtimestamp(dt)
def get_tracking(tracking_id): url = "https://track.aftership.com/" + tracking_id r = requests.get(url) data = r.text soup = BeautifulSoup(data) recent = soup.find_all('li', {'class': 'checkpoint'}) if len(recent) <= 0: status_text = soup.find('p', {'id': 'status-text'}) if status_text: return 0 return None recent = recent[0] courier = soup.find('div', {'class': 'courier-info'}).find('h2').get_text() place = recent.find('div', { 'class': 'checkpoint__content' }).find('div', { 'class': 'hint' }).get_text() datetime = recent.find('div', {'class': 'checkpoint__time'}) date = datetime.find('strong').get_text() tag = soup.find('p', {'class': 'tag'}).get_text() if tag == "In Transit": tag_th = u"กำลังจัดส่ง" elif tag == "Delivered": tag_th = u"ผู้รับได้รับเรียบร้อย" elif tag == "Out For Delivery": tag_th = u"เตรียมการนำจ่าย" elif tag == "Info Received": tag_th = u"รับเข้าระบบ" else: print tag tag_th = u"" time = datetime.find('div', {'class': 'hint'}).get_text() return { "courier": courier, "place": place, "date": date, "time": time, "tag": tag, "tag_th": tag_th }
def dateTimeParcer(dt): if(dt.find(" ") == -1): print("fail") else: datetime = dt if not datetime.find("{") == -1: datetime = datetime[datetime.index("{") + 1:len(datetime)] if not datetime.find("}") == -1: datetime = datetime[0:datetime.index("}")] d = datetime.index(" ") date = datetime[0:d] date = date.strip(" ") datetime2 = datetime[d + 1:len(datetime)] d2 = datetime2.index(" ") time = datetime2[0:d2] time = time.strip(" ") time = time + ":00" timezone = datetime2[d2:len(datetime2)] timezone = timezone.strip(" ") timezone = timezone.strip("(") timezone = timezone.strip(")") return [date, time, timezone]
def get_tracking_all(tracking_id): url = "https://track.aftership.com/"+tracking_id r = requests.get(url) data = r.text soup = BeautifulSoup(data) multi_courier = soup.find_all('a',{'class':'courier-detection__courier-link'}); multi_courier_return = [] if multi_courier: for courier in multi_courier: multi_courier_return.append({"name":courier.get_text(),"link": "https://track.aftership.com"+courier['href']}) return multi_courier_return recent = soup.find_all('li',{'class':'checkpoint'}) if len(recent) <= 0: status_text = soup.find('p',{'id':'status-text'}) print status_text if status_text: return 0 return None courier = soup.find('div',{'class':'courier-info'}).find('h2').get_text() recent = recent[0] place = recent.find('div',{'class':'checkpoint__content'}).find('div',{'class':'hint'}).get_text() datetime = recent.find('div',{'class':'checkpoint__time'}) date = datetime.find('strong').get_text() tag = soup.find('p',{'class':'tag'}).get_text() if tag == "In Transit": tag_th = u"กำลังจัดส่ง" elif tag == "Delivered": tag_th = u"ผู้รับได้รับเรียบร้อย" elif tag == "Out For Delivery": tag_th = u"เตรียมการนำจ่าย" elif tag == "Info Received": tag_th = u"รับเข้าระบบ" else: tag_th = u"" time = datetime.find('div',{'class':'hint'}).get_text() return {"courier": courier, "place": place, "date":date, "time":time, "tag":tag, "tag_th" :tag_th}
def download_quote_xls(): init() _xls = _download_quote_xls() if not _xls: return data = xlrd.open_workbook(_xls) #注意这里的workbook首字母是小写 sheet = data.sheet_names()[0] table = data.sheet_by_name(sheet) # ['数据更新时间', '12-25 15:15:12', '', '', '', '', '', '', '', '', '', '', ''] update_day = table.row_values(0)[1].split()[0] dt = str(datetime.date.today()) if dt.find(update_day) < 0: os.remove(_xls) print('not today, xls removed') return return _xls
def mobile(settings): BCOST = settings["BCOST"] # коэффициент звонков до 0:30 ACOST = settings["ACOST"] # коэффициент СМС SMSCOST = settings["SMSCOST"] # Время, отноительно которого изменяется коэффциент TIMECONST = settings["TIMECONST"] # номер из варианта 7 DEFAULT_PHONE = settings["DEFAULT_PHONE"] input_filename = settings["input_filename"] # Считываем данные из файла mas = [] with open(input_filename) as cdr: for line in cdr: mas.append(line[:len(line) - 1].split(',')) phone = DEFAULT_PHONE # если не введен другой номер, используется номер из варианта 7 if not phone.isdigit(): # print("Введенный номер не соответствует формату.\nБудет протарифицирован номер 933156729") phone = DEFAULT_PHONE sum = 0 calls_sum = 0 for line in mas: # Тарифицируем исходящие звонки и СМС для номера if line[1] == phone: datetime = line[0] time = datetime[datetime.find(":") - 2:] try: duration = float(line[3]) # print("Ошибка в call_duration") except: sys.exit() sms = int(line[-1]) if time < TIMECONST: time = float( datetime[datetime.find(":") + 1:datetime.rfind(":")] ) + float(datetime[datetime.rfind(":") + 1:]) / 60 # Сколько осталось до 0:30 ost = 30.0 - time # Если человек начал говорить раньше 0:30, а закончил позже, # то находим соответствующие промежутки до(ost) и после(after) if duration > ost: after = duration - ost # Считаем стоимость промежутков по соответствующим коэффициентам calls_sum = ost * BCOST + after * ACOST else: # Иначе считаем по коэффициенту до 0:30 calls_sum = duration * BCOST else: # Если звонок начался после 0:30, то тарифицируем по коэффициенту после 0:30 calls_sum = duration * ACOST sum += calls_sum sms_sum = sms * SMSCOST sum += sms_sum elif line[2] == phone: datetime = line[0] time = datetime[datetime.find(":") - 2:] try: duration = float(line[3]) except: # print("Ошибка в call_duration") sys.exit() if time < TIMECONST: time = float( datetime[datetime.find(":") + 1:datetime.rfind(":")] ) + float(datetime[datetime.rfind(":") + 1:]) / 60 # Сколько осталось до 0:30 ost = 30.0 - time # Если человек начал говорить раньше 0:30, а закончил позже, # то находим соответствующие промежутки до(ost) и после(after) if duration > ost: after = duration - ost # Считаем стоимость промежутков по соответствующим коэффициентам calls_sum = ost * BCOST + after * ACOST else: # Иначе считаем по коэффициенту до 0:30 calls_sum = duration * BCOST else: # Если звонок начался после 0:30, то тарифицируем по коэффициенту после 0:30 calls_sum = duration * ACOST sum += calls_sum return round(sum, 2)
def get_quote(xlsfile, dt=None): #打开excel data = xlrd.open_workbook(xlsfile) #注意这里的workbook首字母是小写 #查看文件中包含sheet的名称 sheets = data.sheet_names() sheet = sheets[0] #得到第一个工作表,或者通过索引顺序 或 工作表名称 table = data.sheets()[0] table = data.sheet_by_index(0) table = data.sheet_by_name(sheet) if not check_format(table.row_values(1)): raise Exception('xls format changed...') #获取行数和列数 #nrows = table.nrows #ncols = table.ncols #获取整行和整列的值(数组) #循环行,得到索引的列表 trade_date = None # ['数据更新时间', '12-25 15:15:12', '', '', '', '', '', '', '', '', '', '', ''] update_day = table.row_values(0)[1].split()[0] if dt == None: dt = str(datetime.date.today()) if dt.find(update_day) < 0: print(dt, update_day, 'not today\'s quote') #os.remove(xlsfile) return trade_date = dt val_many = [] for rownum in range(table.nrows): row = table.row_values(rownum) if not row[0].startswith('s'): continue print(row[0]) key_xls = [ '代码', '名称', '最新价', '涨跌幅', '涨跌额', '买入', '卖出', '成交量', '成交额', '今开', '昨收', '最高', '最低', '日期' ] key_dict = { '代码': 'code', '名称': '', '最新价': 'close', '涨跌幅': '', '涨跌额': '', '买入': '', '卖出': '', '成交量': 'volume', '成交额': 'turnover', '今开': 'open', '昨收': '', '最高': 'high', '最低': 'low', '日期': 'trade_date' } row.append(trade_date) row[0] = row[0][2:] #AttributeError: 'dict' object has no attribute 'iteritems' key_list = [ 'code', 'trade_date', 'open', 'high', 'low', 'close', 'volume', 'turnover' ] indice = [0, 13, 9, 11, 12, 2, 7, 8] #subscript fmt_list = ['%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'] val_list = [] volume = row[7] if int(volume) <= 0: #print(row[0], '停牌') continue row[7] *= 100 for i, idx in enumerate(indice): val_list.append(row[idx]) if key_list[i] != key_dict[key_xls[idx]]: exit(0) #print('{0}\t{1}\t{2}'.format(key_list[i], key_xls[idx], row[idx])) val = tuple(val_list) val_many.append(val) # print(sql_str % tuple(val_list)) return val_many
def get_content(html): soup = BeautifulSoup(html, 'html.parser') matches = [] table = soup.find('table', class_='table') desks = table.find_all('tbody', class_='table__body') #print("Кря_desks:",len(desks)) i = 1 for desk in desks: flag = 0 #print(" Кря_desk_"+str(i)) i = i + 1 comp = "" games = [] rows = desk.find_all('tr', class_='table__row') #print(" Кря_rows:",len(rows)) for row in rows: if row == rows[0]: headline = row.find("th") if headline != None: comp = headline.find( "h2", class_='table__title-text').get_text(strip=True) #print(' Кря_comp: ' + comp) else: notmatch = row.find('td', class_='table-complex__wrap') notlive = row.find('div', class_='table__live') if (notmatch == None) and (notlive == None): flag = 1 bothteams = row.find( 'h3', class_='table__match-title-text').get_text(strip=True) #print(' Кря: '+bothteams) team1 = bothteams[:bothteams.find(' — ')] team2 = bothteams[bothteams.find(' — ') + 3:] #print(' Кря_teams: '+team1+' : '+team2) datetime = row.find('div', class_='table__timescore') str_datetime = datetime.find(has_no_class).get_text( strip=True) str_date = str_datetime[:str_datetime.find(' в ')] date = format_date(str_date) str_time = str_datetime[str_datetime.find(' в ') + 3:] time = format_time(str_time) coefs = row.find_all('td', class_='_type_btn', limit=3) try: c1 = float(coefs[0].get_text(strip=True)) cX = float(coefs[1].get_text(strip=True)) c2 = float(coefs[2].get_text(strip=True)) except ValueError: pass #print(' Кря_game:',team1,'vs',team2,'|',str_date,str_time,'|','нет коэффициентов') else: #print(' Кря_game:',team1,'vs',team2,'|',str_date,str_time,'|',c1,'|',cX,'|',c2) games.append({ 'type': 'game', 'date': date, 'time': time, 'team1': team1, 'team2': team2, 'k1': c1, 'kx': cX, 'k2': c2 }) if flag == 1: matches.append({'type': 'comp', 'name': comp}) matches.extend(games) return matches
def get_score_data(url): """Return gameId (str), date (date), time (time), location (str), homeTeam (int), awayTeam (int), homeFinal (int), awayFinal (int), and playerRows (list) for a given espn game URL. """ #initialize variables rows = [] playerRows = [] html = urllib.urlopen(url).read() schoolDict = get_schooldict() homeTeam = -1 awayTeam = -1 gameId = url[url.find('=')+1:] #main data extraction block if html: print "Getting the score data for game " + gameId soup = BeautifulSoup(html) #game date dateElement = soup.find(class_="game-time-location").find("p") datetime = dateElement.get_text() time = (datetime[:datetime.find(',')]).encode('utf-8') time = convert_time(time) date = (datetime[datetime.find(',')+2:]).encode('utf-8') #game location locationElement = dateElement.find_next('p') location = locationElement.get_text() #score data awayFinalElement = soup.find(class_="ts") awayFinal = awayFinalElement.get_text() homeFinalElement = awayFinalElement.find_next(class_="ts") homeFinal = homeFinalElement.get_text() #find school1 school1 = soup.find(id="my-players-table").find("th") school1Name = (school1.get_text()).encode('utf-8') #replace school name with schoolid if schoolDict.has_key(school1Name): awayTeam = int(schoolDict[school1Name]) else: print school1Name + " doesn't exist... adding it..." add_school(school1Name) schoolDict = get_schooldict() awayTeam = int(schoolDict[school1Name]) #Find school1 starter stats starters = soup.find(id="my-players-table").find("tbody") for row in starters.find_all('tr'): rows.append([awayTeam] + [val.text.encode('utf-8') for val in row.find_all('td')] + [1]) #find school1 bench stats bench = starters.next_sibling.next_sibling for row in bench.find_all('tr'): rows.append([awayTeam] + [val.text.encode('utf-8') for val in row.find_all('td')] + [0]) #find school2 school2 = soup.find(id="my-players-table").find("thead") school2 = school2.find_next_sibling("thead").find_next_sibling("thead").find_next_sibling("thead") school2Name = (school2.find("th").get_text()).encode('utf-8') #replace school name with schoolid... if the school doesn't exist, create it if schoolDict.has_key(school2Name): homeTeam = int(schoolDict[school2Name]) else: print school2Name + "doesn't exist... adding it..." add_school(school2Name) schoolDict = get_schooldict() homeTeam = int(schoolDict[school2Name]) #Find school2 starter stats starters = school2.find_next_sibling("tbody") for row in starters.find_all('tr'): rows.append([homeTeam] + [val.text.encode('utf-8') for val in row.find_all('td')] + [1]) #find school2 bench stats bench = starters.next_sibling.next_sibling for row in bench.find_all('tr'): rows.append([homeTeam] + [val.text.encode('utf-8') for val in row.find_all('td')] + [0]) #remove the bad rows for row in rows: if (row and row[0] > -1 and len(row) > 10): playerRows.append([gameId] + row) #make player position its own field and replace dashes in stats for row in playerRows: #split player and position playerpos = row[2] commaIndex = playerpos.rfind(',') if commaIndex == -1: position = 'N' player = playerpos else: position = playerpos[commaIndex+2:] player = playerpos[:commaIndex] row[2] = player row.insert(3, position) #handle cases where minutes doesn't exist for the line score... make the minutes -1 for that player if '-' in row[4]: row.insert(4, -1) #make 'of' stats two fields i = 5 while i < 11: s1 = row[i][:row[i].index('-')] s2 = row[i][row[i].index('-')+1:] if s1 == '': s1 = 0 if s2 == '': s2 = 0 row.pop(i) row.insert(i, s1) row.insert(i+1, s2) i = i+2 return gameId, date, time, location, homeTeam, awayTeam, homeFinal, awayFinal, playerRows