def create_lu(team_abb, player_vals, player_pos): lineups = list(itertools.product(*player_pos)) print '\t' + str(len(lineups)) + ' total lineups' entries = [] i = 0 for lu in lineups: i += 1 if i % 1000000 == 1: if (i+1000000) > len(lineups): print '\t\tchecking lineups ' + str(i) + ' to ' + str(len(lineups)) else: print '\t\tchecking lineups ' + str(i) + ' to ' + str(i+1000000-1) dupes = check_dupes(lu) if dupes is False: for vs_hand in ('l', 'r', 'all'): entry = enter_lineup(team_abb, vs_hand, lu, player_vals) entries.append(entry) print '\t' + str(len(entries)) + ' valid lineups' table = '__team_lineups' if entries != []: for i in range(0, len(entries), 2500): if (i+1) % 2500 == 1: if (i+1+2500) > len(entries): print '\t\tentering lineups ' + str(i+1) + ' to ' + str(len(entries)) else: print '\t\tentering lineups ' + str(i+1) + ' to ' + str(i+2500) db.insertRowDict(entries[i: i + 2500], table, insertMany=True, replace=True, rid=0,debug=1) db.conn.commit()
def process(): print "processed_team_hitting" db.query("TRUNCATE TABLE `processed_team_hitting_basic`") db.query("TRUNCATE TABLE `processed_team_hitting_advanced`") yr_min, yr_max = db.query( "SELECT MIN(year), MAX(year) FROM processed_league_averages_pitching" )[0] for year in range(yr_min, yr_max + 1): for _type in ('basic', 'advanced'): print str(year) + "\thitting\t" + _type table = 'processed_team_hitting_%s' % (_type) if _type == 'basic': entries = process_basic(year) elif _type == 'advanced': entries = process_advanced(year) if entries != []: db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0) db.conn.commit()
def process_grades(year, grades_id, grades, player_type, prospect_type): grade_entry = {"year":year, "grades_id":grades_id, "prospect_type":prospect_type} fv = 0 for g in grades: if g.get("key") is None: continue if g.get("key").lower().strip() == "overall": fv = g.get("value") elif g.get("key").lower().strip() not in ("fastball", "change", "curve", "slider", "cutter", "splitter", "control", "hit", "power", "run", "arm", "field", "speed", "throw", "defense"): grade_entry["other"] = g.get("value") else: if g.get("key").lower().strip() == "speed": grade_entry["run"] = g.get("value") elif g.get("key").lower().strip() == "throw": grade_entry["arm"] = g.get("value") elif g.get("key").lower().strip() == "defense": grade_entry["field"] = g.get("value") else: grade_entry[g.get("key").lower().strip()] = g.get("value") if "hit" in grade_entry or "field" in grade_entry: grades_table = "mlb_grades_hitters" elif "control" in grade_entry or "fastball" in grade_entry: grades_table = "mlb_grades_pitchers" else: print "\n\n\n", grades, "\n\n\n" return fv db.insertRowDict(grade_entry, grades_table, insertMany=False, replace=True, rid=0,debug=1) db.conn.commit() return fv
def process_draft(year, entry, row_val): try: try: pick_num = int(row_val['Draft'].split('/')[0]) except ValueError: pick_num = int(row_val['Draft'].split('/')[1]) pick_team = row_val['Draft'].replace('/', '').replace(str(pick_num), '') except (AttributeError, IndexError): pick_num, pick_team = None, None entry['college_commit'] = row_val['CollegeCommit'] or row_val[ 'College_Commit'] or row_val['cCollegeCommit'] entry['draft_rank'] = ifzero(row_val['DraftRank']) entry['ovr_rank'] = ifzero(row_val['Ovr_Rank']) entry['pick_num'] = pick_num entry['pick_team'] = pick_team trend = ifzero(row_val['Trend']) try: if 'uarr' in trend: trend_val = 'UP' elif 'darr' in trend: trend_val = 'DOWN' except TypeError: trend_val = None entry['trend'] = trend_val db.insertRowDict(entry, 'fg_prospects_draft', replace=True, debug=1) db.conn.commit()
def comment_updater(boulder_name, area, sub_area, update_comment): qry = """SELECT * FROM boulders_tried WHERE boulder_name = "%s" AND area = "%s" AND sub_area = "%s" AND completed = "TRUE";""" query = qry % (boulder_name, area, sub_area) res = db.query(query) if len(res) != 1: print "\n\n\nERROR", boulder_name, "HAS LENGTH", str(len(res)) else: entry = {} _date, est_time, boulder_name, area, sub_area, v_grade, est_attempts, est_minutes, return_interest, session_num, completed, first_comment = res[0] entry["est_date"] = _date entry["est_time"] = est_time entry["boulder_name"] = boulder_name entry["area"] = area entry["sub_area"] = sub_area entry["v_grade"] = v_grade entry["est_attempts"] = est_attempts entry["est_minutes"] = est_minutes entry["return_interest"] = return_interest entry["session_num"] = session_num entry["completed"] = "TRUE" entry["comment"] = update_comment db.insertRowDict(entry, 'boulders_tried', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def process_pitcher_grades(year, entry, row_val): grade_entry = {} grade_entry['year'] = year grade_entry['grades_id'] = entry['grades_id'] grade_entry['TJ_Date'] = ifzero(row_val['TJDate']) grade_entry['MaxVelo'] = ifzero(row_val['Vel']) grade_entry['Fastball_RPM'] = ifzero(row_val['fRPM']) grade_entry['Breaking_RPM'] = ifzero(row_val['bRPM']) grade_entry['Command_present'] = ifzero(row_val['pCMD']) grade_entry['Command_future'] = ifzero(row_val['fCMD']) grade_entry['Fastball_type'] = ifzero(row_val['FBType']) grade_entry['Fastball_present'] = ifzero(row_val['pFB']) grade_entry['Fastball_future'] = ifzero(row_val['fFB']) grade_entry['Changeup_present'] = ifzero(row_val['pCH']) grade_entry['Changeup_future'] = ifzero(row_val['fCH']) grade_entry['Curveball_present'] = ifzero(row_val['pCB']) grade_entry['Curveball_future'] = ifzero(row_val['fCB']) grade_entry['Slider_present'] = ifzero(row_val['pSL']) grade_entry['Slider_future'] = ifzero(row_val['fSL']) grade_entry['Cutter_present'] = ifzero(row_val['pCT']) grade_entry['Cutter_future'] = ifzero(row_val['fCT']) grade_entry['Splitter_present'] = ifzero(row_val['pSPL']) grade_entry['Splitter_future'] = ifzero(row_val['fSPL']) db.insertRowDict(grade_entry, 'fg_grades_pitchers', replace=True, debug=1) db.conn.commit()
def process(season_id): for _type in ('Player', 'Team', 'League'): print '\t' + _type query = """SELECT * FROM( SELECT %s_id, season_id, season_type, shot_zone_basic, shot_zone_area, games, SUM(attempts) AS attempts, SUM(attempts)/all_atts AS zone_pct, SUM(points)/SUM(attempts)/2 AS efg FROM shots_%s_Breakdown JOIN( SELECT %s_id, season_id, season_type, SUM(attempts) AS all_atts FROM shots_%s_Breakdown WHERE season_id = '%s' AND shot_zone_basic = 'all' AND shot_zone_area = 'all' GROUP BY %s_id, season_id, season_type ) allatts USING (%s_id, season_id, season_type) WHERE season_id = '%s' GROUP BY %s_id, season_id, season_type, shot_zone_basic, shot_zone_area ) a ORDER BY %s_id ASC, season_id ASC, shot_zone_basic ASC, shot_zone_area ASC, season_type DESC """ q = query % (_type, _type, _type, _type, season_id, _type, _type, season_id, _type, _type) # raw_input(q) res = db.query(q) entries = [] _id = '%s_id' % (_type.lower()) for row in res: type_id, season_id, season_type, shot_zone_basic, shot_zone_area, games, attempts, zone_pct, efg = row entry = { _id: type_id, "season_id": season_id, "season_type": season_type, "shot_zone_basic": shot_zone_basic, "shot_zone_area": shot_zone_area, "games": games, "attempts": attempts, "zone_pct": zone_pct, "efg": efg } entries.append(entry) table = "shots_%s_Distribution_Year" % (_type) if entries != []: for i in range(0, len(entries), 1000): db.insertRowDict(entries[i:i + 1000], table, insertMany=True, replace=True, rid=0, debug=1) db.conn.commit()
def process_international(year, entry, row_val): entry['int_rank'] = ifzero(row_val['DraftRank']) db.insertRowDict(entry, 'fg_prospects_international', replace=True, debug=1) db.conn.commit()
def process(season_id): for _type in ('Player', 'Team'): print '\t' + _type query = """SELECT %s_id, season_id, season_type, b.shot_zone_basic, b.shot_zone_area, a.games, a.attempts, IFNULL((a.zone_pct/b.zone_pct)*100,0) AS zone_pct_plus, IFNULL((a.efg/b.efg)*100,0) AS ZONE_efg_plus, IFNULL(a.attempts*(a.efg-b.efg)*2,0) AS ZONE_paa, IFNULL((a.attempts*(a.efg-b.efg)*2)/a.games,0) AS ZONE_paa_per_game, IFNULL((a.efg/c.efg)*100,0) AS efg_plus, IFNULL(a.attempts*(a.efg-c.efg)*2,0) AS paa, IFNULL((a.attempts*(a.efg-c.efg)*2)/a.games,0) AS paa_per_game FROM shots_%s_Distribution_Year a JOIN shots_League_Distribution_Year b USING (season_id, season_type, shot_zone_basic, shot_zone_area) JOIN shots_League_Distribution_Year c USING (season_id, season_type) WHERE c.shot_zone_basic = 'all' AND c.shot_zone_area = 'all' AND season_id = '%s' """ q = query % (_type, _type, season_id) # raw_input(q) res = db.query(q) entries = [] _id = '%s_id' % (_type.lower()) for row in res: type_id, season_id, season_type, z_basic, z_area, games, attempts, z_plus, ZONE_efg, ZONE_paa, ZONE_paag, efg, paa, paag = row entry = { _id: type_id, "season_id": season_id, "season_type": season_type, "shot_zone_basic": z_basic, "shot_zone_area": z_area, "games": games, "attempts": attempts, "zone_pct_plus": z_plus, "ZONE_efg_plus": ZONE_efg, "ZONE_paa": ZONE_paa, "ZONE_paa_per_game": ZONE_paag, "efg_plus": efg, "paa": paa, "paa_per_game": paag } entries.append(entry) table = "shots_%s_Relative_Year" % (_type) if entries != []: for i in range(0, len(entries), 1000): db.insertRowDict(entries[i:i + 1000], table, insertMany=True, replace=True, rid=0, debug=1) db.conn.commit()
def initiate(): base_path = "/Volumes/TheWretch/NOT_ON_LAPTOP/TV_Shows/" for i in range(1, len(os.listdir(base_path))): entry = {} seasons_cnt = 0 episodes_cnt = 0 show_name = os.listdir(base_path)[i] print show_name if show_name[0] == 'z': show_name = show_name[1:] seasons_path = base_path + os.listdir(base_path)[i] seasons_count = len([ name for name in os.listdir(seasons_path) if name.startswith(show_name) ]) seasons_cnt += seasons_count seasons_indices = [] for j in range(1, len(os.listdir(seasons_path))): if show_name in os.listdir(seasons_path)[j]: seasons_indices.append(j) print seasons_indices for k in seasons_indices: episodes_paths = base_path + os.listdir( base_path)[i] + '/' + os.listdir(seasons_path)[k] try: episodes_count = len([ name for name in os.listdir(episodes_paths) if name.startswith(show_name) ]) episodes_cnt += episodes_count except OSError: seasons_cnt -= 1 try: avg_eps = float(episodes_cnt) / float(seasons_cnt) except ZeroDivisionError: avg_eps = 0 # if show_name[0] == 'z': # show_name = show_name[1:] entry['name'] = show_name.replace("_", " ") entry['seasons'] = seasons_cnt entry['episodes'] = episodes_cnt entry['episodes_per_season'] = avg_eps print entry db.insertRowDict(entry, 'tv_show_data', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def statistics_war(year): player_q = """SELECT player_name, team_id, pos, inn FROM statistics_fielding WHERE year = %s; """ player_qry = player_q % (year) player_data = db.query(player_qry) entries = [] for row in player_data: entry = {} entry['year'] = year player_name, team_id, position, inn = row entry['player_name'] = player_name search_name = player_name.replace("'","''") lookuptable = 'teams' team_abb = db.lookupValues("teams",("team_id","year",),(team_id,year),val="team_abb",operators=("=","="))[0] entry['team_abb'] = team_abb entry['position'] = position if position.lower() == 'p': continue else: # entry['bats'] = None entry['age'] = None entry['pa'] = None entry['inn'] = inn rn_val, err_val, arm_val, pb_val = helper.get_def_values(search_name, position, year) #1450 innings is a full season defense = float(inn)*(rn_val + err_val + arm_val + pb_val)/1450 entry['defense'] = defense adj = float(helper.get_pos_adj(position.upper())) position_adj = adj*(float(inn)/1450) entry['position_adj'] = position_adj dwar = (defense+position_adj)/10 entry['dWAR'] = dwar entries.append(entry) table = 'processed_compWAR_defensive' if entries != []: db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0) db.conn.commit()
def process(): for _type in ('Player', 'Team'): print '\t' + _type if _type == 'Player': _join = 'JOIN players USING (player_id)\n' _career = 'CONCAT(GREATEST(1996, from_year),to_year)' else: _join = '' _career = "'1'" query = """SELECT %s_id, %s AS career, season_type, c.shot_zone_basic, c.shot_zone_area, all_games AS games, SUM(a.attempts) AS attempts, IFNULL(100*(SUM(b.attempts*b.zone_pct/c.zone_pct)/SUM(b.attempts)),0) AS zone_pct_plus, IFNULL(100*(SUM(b.attempts*b.efg/c.efg)/SUM(b.attempts)),0) AS ZONE_efg_plus, IFNULL(SUM(ZONE_paa),0) as ZONE_paa, IFNULL(SUM(ZONE_paa)/all_games,0) as ZONE_paa_per_game, IFNULL(100*(SUM(b.attempts*b.efg/d.efg)/SUM(b.attempts)),0) AS efg_plus, IFNULL(SUM(paa),0) AS paa, IFNULL(SUM(paa)/all_games,0) AS paa_per_game, IFNULL(SUM(par),0) AS par, IFNULL(SUM(par)/all_games,0) AS par_per_game FROM shots_%s_Relative_Year a %sJOIN shots_%s_Distribution_Year b USING (%s_id, season_id, season_type, shot_zone_basic, shot_zone_area) JOIN shots_League_Distribution_Year c USING (season_id, season_type, shot_zone_basic, shot_zone_area) JOIN shots_League_Distribution_Year d USING (season_id, season_type) JOIN( SELECT %s_id, season_type, SUM(games) AS all_games FROM shots_%s_Breakdown WHERE shot_zone_basic = 'all' AND shot_zone_area = 'all' GROUP BY %s_id, season_type ) g USING (%s_id, season_type) WHERE d.shot_zone_basic = 'all' AND d.shot_zone_area = 'all' GROUP BY %s_id, season_type, shot_zone_basic, shot_zone_area """ q = query % (_type, _career, _type, _join, _type, _type, _type, _type, _type, _type, _type) # raw_input(q) res = db.query(q) entries = [] _id = '%s_id' % (_type.lower()) for row in res: type_id, season_id, season_type, z_basic, z_area, games, attempts, z_plus, ZONE_efg, ZONE_paa, ZONE_paag, efg, paa, paag, par, parg = row entry = {_id:type_id, "season_id":season_id, "season_type":season_type, "shot_zone_basic":z_basic, "shot_zone_area":z_area, "games":games, "attempts":attempts, "zone_pct_plus":z_plus, "ZONE_efg_plus":ZONE_efg, "ZONE_paa":ZONE_paa, "ZONE_paa_per_game":ZONE_paag, "efg_plus":efg, "paa":paa, "paa_per_game":paag, "par":par, "par_per_game":parg} entries.append(entry) table = "shots_%s_Relative_Career" % (_type) if entries != []: for i in range(0, len(entries), 1000): db.insertRowDict(entries[i: i + 1000], table, insertMany=True, replace=True, rid=0,debug=1) db.conn.commit()
def initiate(): cur_weekday = datetime.now().weekday() qry = """SELECT DATEDIFF(DATE(NOW()), MAX(update_date)) AS DAYS_SINCE_UPDATE FROM update_log WHERE 1 AND type = 'weekly' """ days_since_update = db.query(qry)[0][0] if (days_since_update is None or (cur_weekday == 2 and days_since_update >= 2) # or (cur_weekday == 3 and days_since_update >= 3) or (cur_weekday in (4, 5, 6) and days_since_update >= (cur_weekday - 2))): standings_update = scrape_cur_standings() if standings_update == True: date = datetime.now().date() entry = {'type': 'weekly', 'update_date': date} db.insertRowDict(entry, 'update_log', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit() email_sub = "NSBL Update Started [%s]" % (strftime( "%Y-%m-%d %H:%M:%S", localtime())) email_msg = "Check http://thensbl.com/orgstand.htm for updated standings" email(email_sub, email_msg) subprocess.call(['./NSBL_weekly_run.sh']) email_sub = "NSBL Updated [%s]" % (strftime( "%Y-%m-%d %H:%M:%S", localtime())) email_msg = "Check http://thensbl.com/orgstand.htm for updated standings" email_msg += "\n\n\nCheck out recent board activity: https://nsbl2012.boards.net/posts/recent" email_msg += "\n\n\nUpdated Advanced Standings: http://connor-r.github.io/Tables/leaderboard_Standings.html" email_msg += "\nUpdated Leaderboard Changes: http://connor-r.github.io/Tables/leaderboard_Changes.html" email_msg += "\nUpdated Pitching Leaderboard: http://connor-r.github.io/Tables/historical_StatsPitchers.html" email_msg += "\nUpdated Hitting Leaderboard: http://connor-r.github.io/Tables/historical_StatsHitters.html" email(email_sub, email_msg) print "DONE" else: print "--------------\nNo update - %s\n--------------" % (strftime( "%Y-%m-%d %H:%M:%S", localtime())) elif (days_since_update <= 1): print "***" else: print "--------------\nAlready updated - %s\n--------------" % ( strftime("%Y-%m-%d %H:%M:%S", localtime()))
def process_team_summary(summary, year, season_gp, date): entries = [] lg = '' div = '' for tm in summary: if tm[0].upper() == 'NATIONAL LEAGUE': lg = 'NL' elif tm[0].upper() == 'AMERICAN LEAGUE': lg = 'AL' if tm[1].upper() == 'EAST': div = 'East' elif tm[1].upper() == 'CENTRAL': div = 'Central' elif tm[1].upper() == 'WEST': div = 'West' entry = { 'year': year, 'gp': season_gp, 'date': date, 'league': lg, 'division': div } cats = [ 'team_name', 'team_abb', 'base_cap', 'prev_reserves', 'buyout', 'traded_cash', 'total_cap', 'payroll', 'cap_room', 'debt_load', 'active_roster', 'reserve_roster', 'IL', 'Total' ] for i in range(0, 14): entry[cats[i]] = tm[i + 2].replace('$', '') if len(tm) < 17: GM = '' else: GM = tm[16] if len(tm) < 18: GM_email = '' else: GM_email = tm[17] entry['GM'] = GM entry['GM_email'] = GM_email entries.append(entry) if entries != []: db.insertRowDict(entries, 'excel_team_summary', replace=True, insertMany=True, rid=0) db.conn.commit()
def process_scouting_grades(reported, fg_id, scouting_dict): entry = {} if "Hit" in scouting_dict: player_type = "hitters" elif ("Fastball" in scouting_dict or "Command" in scouting_dict): player_type = "pitchers" else: # print "\n\n**ERROR TAG** CORRUPTED GRADES", reported, fg_id, scouting_dict, "\n\n" return None entry["year"] = reported entry["fg_id"] = fg_id.split("&")[0] hitter_cats = ["Hit", "GamePower", "Field", "RawPower", "Speed", "Throws"] pitcher_cats = [ "Fastball", "Changeup", "Curveball", "Slider", "Cutter", "Splitter", "Command" ] for k, v in scouting_dict.items(): if player_type == "hitters": if k in hitter_cats: grade_present = v.split(" / ")[0].strip() grade_future = v.split(" / ")[1].strip() if grade_present < 8: grade_present = grade_present * 10 if grade_future < 8: grade_future = grade_future * 10 entry[k + "_present"] = grade_present entry[k + "_future"] = grade_future elif k != "Future Value": print "\n\n**ERROR TAG** NO CATEGORY", k, "\t", v, "\n\n" elif player_type == "pitchers": if k in pitcher_cats: grade_present = v.split(" / ")[0].strip() grade_future = v.split(" / ")[1].strip() if grade_present < 8: grade_present = grade_present * 10 if grade_future < 8: grade_future = grade_future * 10 entry[k + "_present"] = grade_present entry[k + "_future"] = grade_future elif k != "Future Value": grade_present = v.split(" / ")[0].strip() grade_future = v.split(" / ")[1].strip() if grade_present < 8: grade_present = grade_present * 10 if grade_future < 8: grade_future = grade_future * 10 entry["Other_present"] = grade_present entry["Other_future"] = grade_future table = "fg_grades_%s" % (player_type) db.insertRowDict(entry, table, replace=True, debug=1) db.conn.commit()
def update_tv_show_rankings(): qry = """SELECT * FROM tv_show_data;""" res = db.query(qry) for row in res: entry = {} name, seasons, episodes, eps_per_season = row row_qry = """SELECT * FROM tv_show_grades WHERE name = "%s";""" row_query = row_qry % (name) try: foo, genre, ep_len, runtime_hrs, peak, consistency, premise, plot, information_gain, fx, wit, lng, timelsns, adj, grade = db.query( row_query)[0] runtime_hrs = float(episodes * ep_len) / 60.0 except (IndexError, TypeError): update_entry = {"name": name} db.insertRowDict(update_entry, 'tv_show_grades', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit() genre, ep_len, runtime_hrs, peak, consistency, premise, plot, information_gain, fx, wit, lng, timelsns, adj, grade = None, None, None, None, None, None, None, None, None, None, None, None, None, None entry['name'] = name entry['genre'] = genre entry['episode_length'] = ep_len entry['approx_runtime_hours'] = runtime_hrs entry['peak'] = peak entry['consistency'] = consistency entry['premise'] = premise entry['plot'] = plot entry['information_gain'] = information_gain entry['desired_effects'] = fx entry['wit'] = wit entry['length'] = lng entry['timelessness'] = timelsns entry['adjustment'] = adj entry['overall_grade'] = grade db.insertRowDict(entry, 'tv_show_grades', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def process(year): tms = db.query("SELECT mascot_name FROM NSBL.teams WHERE year = %s" % (year)) tmlst = ["Dbacks" if tm[0]=="Diamondbacks" else tm[0] for tm in tms] tmlst.append("draft") # tmlst.append("international") def get_team_list(url): json = getter.get_url_data(url, "json") try: prospect_list = json["data"]["prospects"] return prospect_list except (KeyError): print("\tMISSING TEAM - waiting 30 seconds and trying again...") sleep(30) get_team_list(url) for i, team in enumerate(tmlst[:]): url = base_url % (year, team) print '\n', i+1, team empty_list = False while empty_list is False: prospect_list = get_team_list(url) if prospect_list is None: print url else: empty_list = True entries = [] for j, prospect in enumerate(prospect_list): print "\t", j+1, prospect.get("player").get("useName"), prospect.get("player").get("lastName") entry = parse_prospect(j+1, year, prospect, team) entries.append(entry) if team == "draft": table = "mlb_prospects_draft" elif team == "international": table = "mlb_prospects_international" else: table = "mlb_prospects_professional" # for e in entries: # for n, m in e.items(): # print (str(n)[:20] if len(str(i)) > 20 else str(m).ljust(20)), "\t", j # raw_input("wait") if entries != []: for i in range(0, len(entries), 1000): db.insertRowDict(entries[i: i + 1000], table, insertMany=True, replace=True, rid=0,debug=1) db.conn.commit()
def update_career_percentiles(): for _type in ('Player', 'Team'): print '\t', _type entries = [] qry = """SELECT %s_id, b.season_id, season_type, SUM(attempts), SUM(games), ROUND(SUM(AttemptsPerGame_percentile*games)/SUM(games),1), ROUND(SUM(EFG_Percentile*attempts)/SUM(attempts),1), ROUND(SUM(PAAperGame_percentile*attempts)/SUM(attempts),1), ROUND(SUM(shotSkill_Percentile*attempts)/SUM(attempts),1) FROM percentiles_%s_Year JOIN (SELECT %s_id, CONCAT(LEFT(MIN(season_id),4), RIGHT(MAX(season_id),2)) as season_id FROM percentiles_%s_Year GROUP BY %s_id) b USING (%s_id) GROUP By %s_id, season_type;""" query = qry % (_type, _type, _type, _type, _type, _type, _type) res = db.query(query) for i, row in enumerate(res): entry = {} _id, season_id, season_type, attempts, games, AttemptsPerGame_percentile, EFG_Percentile, PAAperGame_percentile, shotSkill_Percentile = row if str(season_id)[4:] < '50': season_id = int(str(season_id)[:4] + '20' + str(season_id)[4:]) else: season_id = int(str(season_id)[:4] + '19' + str(season_id)[4:]) id_key = _type + '_id' entry[id_key] = _id entry['season_id'] = season_id entry['season_type'] = season_type entry['games'] = games entry['attempts'] = attempts entry['AttemptsPerGame_percentile'] = AttemptsPerGame_percentile entry['EFG_Percentile'] = EFG_Percentile entry['PAAperGame_percentile'] = PAAperGame_percentile entry['shotSkill_Percentile'] = shotSkill_Percentile entries.append(entry) table = "percentiles_%s_Career" % (_type) if entries != []: for i in range(0, len(entries), 1000): db.insertRowDict(entries[i:i + 1000], table, insertMany=True, replace=True, rid=0, debug=1) db.conn.commit()
def initiate(yr, _type, player_mapper): path = '/Users/connordog/Dropbox/Desktop_Files/Work_Things/CodeBase/Python_Scripts/Python_Projects/NSBL/ad_hoc/historical_csv_files/' csv_file_ext = '%s_zips_%s.csv' % (yr, _type) csv_file = path+csv_file_ext print yr, _type, csv_file_ext entries = [] with codecs.open(csv_file, 'rb', encoding='utf-8', errors='ignore') as f: mycsv = csv.reader(f) i = 0 for row in mycsv: if i == 0: i += 1 continue else: i += 1 if _type == 'offense': year, player_name, team_abb, age, bats, g, ab, r, h, _2b, _3b, hr, rbi , bb, so , hbp, sb, cs, sh, sf, ibb, war = row if player_name in player_mapper: player_name = player_mapper.get(player_name) else: helper.input_name(player_name) # print player_name entry = {"year":yr, "player_name":player_name, "team_abb":team_abb, "age":age, "bats":bats, "g":g, "ab":ab, "r":r, "h":h, "2b":_2b, "3b":_3b, "hr":hr, "rbi":rbi, "bb":bb, "so":so, "hbp":hbp, "sb":sb, "cs":cs, "sh":sh, "sf":sf, "ibb":ibb, "zWAR":war} entries.append(entry) elif _type == 'pitching': year, player_name, team_abb, age, throws, w, l, era, g, gs, ip, h, r, er, hr, bb, so, war = row if player_name in player_mapper: player_name = player_mapper.get(player_name) else: helper.input_name(player_name) entry = {"year":yr, "player_name":player_name, "team_abb":team_abb, "age":age, "throws":throws, "w":w, "l":l, "era":era, "g":g, "gs":gs, "ip":ip, "h":h, "r":r, "er":er, "hr":hr, "bb":bb, "so":so, "zWAR":war} entries.append(entry) elif _type == 'defense': year, player_name, c_rn, c_er, _1b_rn, _1b_er, _2b_rn, _2b_er, _3b_rn, _3b_er, ss_rn, ss_er, lf_rn, lf_er, cf_rn, cf_er, rf_rn, rf_er, c_arm, of_arm, pb, FOO = row if player_name in player_mapper: player_name = player_mapper.get(player_name) else: helper.input_name(player_name) entry = {"year":yr, "player_name":player_name, "c_range":c_rn, "c_error":c_er, "1b_range":_1b_rn, "1b_error":_1b_er, "2b_range":_2b_rn, "2b_error":_2b_er, "3b_range":_3b_rn, "3b_error":_3b_er, "ss_range":ss_rn, "ss_error":ss_er, "lf_range":lf_rn, "lf_error":lf_er, "cf_range":cf_rn, "cf_error":cf_er, "rf_range":rf_rn, "rf_error":rf_er, "c_arm":c_arm, "of_arm":of_arm, "c_pb":pb} entries.append(entry) # print i, _type, player_name table = 'zips_%s' % (_type) if entries != []: db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0) db.conn.commit()
def process(group_type, time_type): query = """SELECT %s_id, season_id, season_type, attempts, ROUND(sum_efg_plus/attempts,4) AS ShotSkillPlus FROM( SELECT %s_id, season_id, season_type, SUM(attempts*zone_efg_plus) AS sum_efg_plus FROM shots_%s_Relative_%s r WHERE shot_zone_area != 'all' AND shot_zone_basic != 'all' GROUP BY %s_id, season_id, season_type ) a JOIN( SELECT %s_id, season_id, season_type, attempts FROM shots_%s_Relative_%s r WHERE shot_zone_area = 'all' AND shot_zone_basic = 'all' GROUP BY %s_id, season_id, season_type ) b USING (%s_id, season_id, season_type); """ q = query % (group_type, group_type, group_type, time_type, group_type, group_type, group_type, time_type, group_type, group_type) # raw_input(q) res = db.query(q) # raw_input(res) entries = [] _id = '%s_id' % (group_type.lower()) for row in res: # print row type_id, season_id, season_type, attempts, shotskillplus = row entry = { _id: type_id, "season_id": season_id, "season_type": season_type, "attempts": attempts, "ShotSkillPlus": shotskillplus } entries.append(entry) table = "shot_skill_plus_%s_%s" % (group_type, time_type) if time_type == "Career": db.query("TRUNCATE TABLE %s;" % (table)) if entries != []: for i in range(0, len(entries), 1000): db.insertRowDict(entries[i:i + 1000], table, insertMany=True, replace=True, rid=0, debug=1) db.conn.commit()
def update_grades(): qry = "SELECT * FROM tv_show_grades;" res = db.query(qry) for row in res: show_name, genre, episode_length, peak, consistency, adj, runtime, grade = row grade = (float(peak)*3 + float(consistency)*2)/5 + max(adj,0) entry = {"show_name":show_name, "genre":genre, "episode_length":episode_length, "peak_grade":peak, "consistency_grade":consistency, "adjustment":adj, "approx_runtime_hours":runtime, "overall_grade":grade} db.insertRowDict(entry, 'tv_show_grades', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def calculate_war(): player_q = """SELECT year, player_name, team_abb, ab, h, 2b, 3b, hr, bb, so, hbp, ibb, sh, sf, sb, cs FROM zips_offense """ player_qry = player_q player_data = db.query(player_qry) entries = [] for row in player_data: entry = {} year, player_name, team_abb, ab, h, _2, _3, hr, bb, so, hbp, ibb, sh, sf, sb, cs = row pa = ab + bb + hbp + ibb + sh + sf bb2 = bb + ibb _1 = h - _2 - _3 - hr team_abb = team_abb.upper() pf = float(helper.get_park_factors(team_abb, year - 1)) / float(100) babip = float((h - hr)) / float((ab + sh + sf - so - hr)) ops, wOBA, park_wOBA, OPS_plus, wrc, wrc27, wRC_plus, raa, oWAR = helper.get_zips_offensive_metrics( year - 1, pf, pa, ab, bb2, hbp, _1, _2, _3, hr, sb, cs) entry['year'] = year entry['player_name'] = player_name entry['team_abb'] = team_abb entry['pf'] = pf entry['pa'] = pa entry['babip'] = babip entry['OPS_plus'] = OPS_plus entry['park_wOBA'] = park_wOBA entry['wRC_plus'] = wRC_plus entries.append(entry) table = 'zips_WAR_hitters_comp' if entries != []: for i in range(0, len(entries), 1000): db.insertRowDict(entries[i:i + 1000], table, insertMany=True, replace=True, rid=0, debug=1) db.conn.commit()
def update_podcast_grades(): qry = "SELECT * FROM podcasts;" res = db.query(qry) for row in res: podcast_name, genre, peak, consistency, adj, overall = row try: grade = (float(peak)*3 + float(consistency)*2)/5 + max(adj,0) except TypeError: grade = 0 entry = {"podcast_name":podcast_name, "genre":genre, "peak_grade":peak, "consistency_grade":consistency, "adjustment":adj, "overall_grade":grade} db.insertRowDict(entry, 'podcasts', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def update_career_percentiles(): for _type in ('Player', 'Team'): print '\t', _type entries = [] qry = """SELECT %s_id, ssp.season_id, season_type, SUM(attempts), SUM(games), ROUND(SUM(AttemptsPerGame_percentile*games)/SUM(games),1), ROUND(SUM(EFG_Percentile*attempts)/SUM(attempts),1), ROUND(SUM(PAAperGame_percentile*attempts)/SUM(attempts),1), ROUND(SUM(PARperGame_percentile*attempts)/SUM(attempts),1), ROUND(SUM(shotSkill_Percentile*attempts)/SUM(attempts),1) FROM percentiles_%s_Year JOIN (SELECT %s_id, season_id, season_type FROM shot_skill_plus_%s_Career) ssp USING (%s_id, season_type) GROUP By %s_id, season_type;""" query = qry % (_type, _type, _type, _type, _type, _type) res = db.query(query) for i, row in enumerate(res): entry = {} _id, season_id, season_type, attempts, games, AttemptsPerGame_percentile, EFG_Percentile, PAAperGame_percentile, PARperGame_percentile, shotSkill_Percentile = row id_key = _type+'_id' entry[id_key] = _id entry['season_id'] = season_id entry['season_type'] = season_type entry['games'] = games entry['attempts'] = attempts entry['AttemptsPerGame_percentile'] = AttemptsPerGame_percentile entry['EFG_Percentile'] = EFG_Percentile entry['PAAperGame_percentile'] = PAAperGame_percentile entry['PARperGame_percentile'] = PAAperGame_percentile entry['shotSkill_Percentile'] = shotSkill_Percentile entries.append(entry) table = "percentiles_%s_Career" % (_type) db.query("TRUNCATE TABLE %s;" % (table)) if entries != []: for i in range(0, len(entries), 1000): db.insertRowDict(entries[i: i + 1000], table, insertMany=True, replace=True, rid=0,debug=1) db.conn.commit()
def process(): for _type in ('Player', 'Team', 'League'): print '\t' + _type if _type == 'Player': _join = 'JOIN players USING (player_id)\n\t' _career = 'CONCAT(GREATEST(1996, from_year),to_year)' else: _join = '' _career = "'1'" query = """SELECT * FROM( SELECT %s_id, %s AS career, season_type, shot_zone_basic, shot_zone_area, all_games AS games, SUM(attempts) AS attempts, SUM(attempts)/all_atts AS zone_pct, SUM(points)/SUM(attempts)/2 AS efg FROM shots_%s_Breakdown %sJOIN( SELECT %s_id, season_type, SUM(games) AS all_games, SUM(attempts) AS all_atts FROM shots_%s_Breakdown WHERE shot_zone_basic = 'all' AND shot_zone_area = 'all' GROUP BY %s_id, season_type ) allatts USING (%s_id, season_type) GROUP BY %s_id, shot_zone_basic, shot_zone_area, season_type ) a ORDER BY %s_id ASC, shot_zone_basic ASC, shot_zone_area ASC, season_type DESC """ q = query % (_type, _career, _type, _join, _type, _type, _type, _type, _type, _type) # raw_input(q) res = db.query(q) entries = [] _id = '%s_id' % (_type.lower()) for row in res: type_id, career, season_type, shot_zone_basic, shot_zone_area, games, attempts, zone_pct, efg = row entry = {_id:type_id, "season_id":career, "season_type":season_type, "shot_zone_basic":shot_zone_basic, "shot_zone_area":shot_zone_area, "games":games, "attempts":attempts, "zone_pct":zone_pct, "efg":efg} entries.append(entry) table = "shots_%s_Distribution_Career" % (_type) if entries != []: for i in range(0, len(entries), 1000): db.insertRowDict(entries[i: i + 1000], table, insertMany=True, replace=True, rid=0,debug=1) db.conn.commit()
def log_ids(ids): cur_time = datetime.now() entries = [] for _id in ids: id_entry = {'cl_id': _id, 'email_time': cur_time} entries.append(id_entry) if entries not in ([], None): db.insertRowDict(entries, '_email_ids', replace=True, insertMany=True, rid=0) db.conn.commit() return None
def initiate_names(team_name, team_id, year, current, url_base): if team_name in invalid_names: team_name = invalid_names[team_name] check = "SELECT COUNT(*) FROM teams WHERE year = %s AND team_name = '%s' AND team_id = '%s';" % (year, team_name, team_id) check_val = db.query(check)[0][0] if check_val == 0: team_abb = raw_input('What is the team_abb for the %s %s? ' % (year, team_name)) print str(year) + " - " + str(team_id) + " - " + team_name + " - " + team_abb team_entry = {"year":year,"team_id":team_id, "team_name": team_name, "team_abb": team_abb} team_table = "teams" db.insertRowDict(team_entry, team_table, insertMany=False, rid=0, replace=True) db.conn.commit() # raw_input("") process(team_id, year, current, url_base)
def update_sessions(): qry = """SELECT boulder_name, area, sub_area FROM boulders_tried WHERE est_date > '0000-00-00' GROUP BY boulder_name, area, sub_area;""" res = db.query(qry) for row in res: boulder_name, area, sub_area = row ind_qry = """SELECT * FROM boulders_tried WHERE boulder_name = "%s" AND area = "%s" AND sub_area = "%s" ORDER BY est_date, est_time;""" ind_query = ind_qry % (boulder_name, area, sub_area) ind_res = db.query(ind_query) for cnt, ind_row in enumerate(ind_res): entry = {} _date, est_time, boulder_name, area, sub_area, v_grade, est_attempts, est_minutes, return_interest, session_num, completed, first_comment = ind_row entry["est_date"] = _date entry["est_time"] = est_time entry["boulder_name"] = boulder_name entry["area"] = area entry["sub_area"] = sub_area entry["v_grade"] = v_grade entry["est_attempts"] = est_attempts entry["est_minutes"] = est_minutes entry["return_interest"] = return_interest entry["session_num"] = cnt + 1 entry["completed"] = completed entry["comment"] = first_comment db.insertRowDict(entry, 'boulders_tried', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def initiate(yr, _type, player_mapper): path = '/Users/connordog/Dropbox/Desktop_Files/Work_Things/CodeBase/Python_Scripts/Python_Projects/NSBL/ad_hoc/historical_csv_files/' csv_file = path + '%s_zips_%s_splits.csv' % (yr, _type) print yr, _type entries = [] with codecs.open(csv_file, 'rb', encoding='utf-8', errors='ignore') as f: mycsv = csv.reader(f) i = 0 for row in mycsv: if i == 0: i += 1 continue else: i += 1 year, player_name, vs_hand, ab, h, _2b, _3b, hr, rbi, bb, so, hbp, ibb, sh, sf = row if player_name in player_mapper: player_name = player_mapper.get(player_name) else: helper.input_name(player_name) entry = { "year": yr, "player_name": player_name, "vs_hand": vs_hand, "ab": ab, "h": h, "2b": _2b, "3b": _3b, "hr": hr, "rbi": rbi, "bb": bb, "so": so, "hbp": hbp, "ibb": ibb, "sh": sh, "sf": sf } entries.append(entry) table = 'zips_%s_splits' % (_type) if entries != []: db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0) db.conn.commit()
def initiate(): base_path = "/Volumes/Daddy/NOT_ON_LAPTOP/TV_Shows/" for i in range (1, len(os.listdir(base_path))): entry = {} seasons_cnt = 0 episodes_cnt = 0 show_name = os.listdir(base_path)[i] if show_name[0] == 'z': show_name = show_name[1:] seasons_path = base_path+os.listdir(base_path)[i] seasons_count = len([name for name in os.listdir(seasons_path) if name.startswith(show_name)]) seasons_cnt += seasons_count seasons_indices = [] for j in range(1, len(os.listdir(seasons_path))): if show_name in os.listdir(seasons_path)[j]: seasons_indices.append(j) for k in seasons_indices: episodes_paths = base_path+os.listdir(base_path)[i]+'/'+os.listdir(seasons_path)[k] try: episodes_count = len([name for name in os.listdir(episodes_paths) if name.startswith(show_name)]) episodes_cnt += episodes_count except OSError: seasons_cnt -= 1 try: avg_eps = float(episodes_cnt)/float(seasons_cnt) except ZeroDivisionError: avg_eps = 0 if show_name[0] == 'z': show_name = show_name[1:] entry['show_name'] = show_name.replace("_"," ") entry['seasons'] = seasons_cnt entry['episodes'] = episodes_cnt entry['episodes_per_season'] = avg_eps db.insertRowDict(entry, 'tv_show_data', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def input_data(ratings, sql_table, cats, year): print '\t' + sql_table entries = [] for player in ratings: entry = {} entry['year'] = year for cat, val in zip(cats, player): # any category we aren't interested in recording, we mark as foo if cat != 'foo': # entry[cat] = val ##### if cat == 'player_name' and val is not None: entry[cat] = val.replace('*', '').replace('#', '') else: entry[cat] = val if (entry.get("player_name") not in ('Total', None, '', 'Other') and entry.get("team_abb") not in ('Total', None, '', 'Other')): entries.append(entry) elif entry.get("team_name") not in ('Total', None, '', 'Other'): full_name = helper.get_team_name(entry.get("team_name"), year) entry['team_name'] = full_name if sql_table == 'team_standings': entry['games_played'] = int(entry.get('w')) + int( entry.get('l')) entries.append(entry) if 'player_name' in entry: helper.input_name(entry.get('player_name')) # used for debugging # if entries != []: # for entry in entries[0:30]: # print '\t\t', # print entry # raw_input("") if entries != []: db.insertRowDict(entries, sql_table, insertMany=True, rid=0, replace=True) db.conn.commit()
def process_prospect_list(year, list_type, list_key): list_url = base_url + "%s%s" % (year, list_key) print "\n", year, list_type, list_url json = getter.get_url_data(list_url, "json") entries = [] for plr in json: entry = {'prospect_type': list_type} for ky, val in plr.items(): if type(val) in (str, unicode): val2 = "".join([i if ord(i) < 128 else "" for i in val]) if val != val2 and 'name' in ky.lower(): print '\n\n\n\nUNICODE NAME!!!! - \n\t', val print '\t', val2, '\n\n\n\n' if 'playerid' in ky.lower(): val = val2.replace(' ', '') else: val = val2 entry[ky.lower().replace("%", "%%")] = val if ('playername' not in entry or entry['playername'] == ''): continue if 'playerid' not in entry or entry['playerid'] == '': entry['playerid'] = str( entry['playername'].replace(' ', '').replace('*', '').replace( ",", "")) + '_' + str(entry['type'].replace(' ', '')) if 'team' not in entry or entry['team'] == '': entry['team'] = '--empty--' print '\t', year, list_key, entry['playername'] helper2.input_name(entry.get('playername')) db.insertRowDict(entry, 'fg_raw', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit() sleep(sleep_time)
def update_sessions(): qry = """SELECT boulder_name, area, sub_area FROM boulders_tried WHERE est_date > '0000-00-00' GROUP BY boulder_name, area, sub_area;""" res = db.query(qry) for row in res: boulder_name, area, sub_area = row ind_qry = """SELECT * FROM boulders_tried WHERE boulder_name = "%s" AND area = "%s" AND sub_area = "%s" ORDER BY est_date, est_time;""" ind_query = ind_qry % (boulder_name, area, sub_area) ind_res = db.query(ind_query) for cnt, ind_row in enumerate(ind_res): entry = {} _date, est_time, boulder_name, area, sub_area, v_grade, est_attempts, est_minutes, return_interest, session_num, completed, first_comment = ind_row entry["est_date"] = _date entry["est_time"] = est_time entry["boulder_name"] = boulder_name entry["area"] = area entry["sub_area"] = sub_area entry["v_grade"] = v_grade entry["est_attempts"] = est_attempts entry["est_minutes"] = est_minutes entry["return_interest"] = return_interest entry["session_num"] = cnt+1 entry["completed"] = completed entry["comment"] = first_comment db.insertRowDict(entry, 'boulders_tried', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def process(): print "processed_team_defense" db.query("TRUNCATE TABLE `processed_team_defense`") yr_min, yr_max = db.query( "SELECT MIN(year), MAX(year) FROM processed_league_averages_pitching" )[0] for year in range(yr_min, yr_max + 1): print str(year) + "\tdefense" table = 'processed_team_defense' entries = process_defense(year) if entries != []: db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0) db.conn.commit()
def process_hitter_grades(year, entry, row_val): grade_entry = {} grade_entry['year'] = year grade_entry['grades_id'] = entry['grades_id'] grade_entry['Hit_present'] = ifzero(row_val['pHit']) grade_entry['Hit_future'] = ifzero(row_val['fHit']) grade_entry['GamePower_present'] = ifzero(row_val['pGame']) grade_entry['GamePower_future'] = ifzero(row_val['fGame']) grade_entry['RawPower_present'] = ifzero(row_val['pRaw']) grade_entry['RawPower_future'] = ifzero(row_val['fRaw']) grade_entry['Speed_present'] = ifzero(row_val['pSpd']) grade_entry['Speed_future'] = ifzero(row_val['fSpd']) grade_entry['Field_present'] = ifzero(row_val['pFld']) grade_entry['Field_future'] = ifzero(row_val['fFld']) grade_entry['Throws_present'] = ifzero(row_val['pArm']) grade_entry['Throws_future'] = ifzero(row_val['fArm']) grade_entry['Max_EV'] = ifzero(row_val['Max_EV']) grade_entry['HardHit_Pct'] = ifzero(row_val['HardHit%']) db.insertRowDict(grade_entry, 'fg_grades_hitters', replace=True, debug=1) db.conn.commit()
def update_rankings(): qry = """SELECT * FROM tv_show_data;""" res = db.query(qry) for row in res: entry = {} show_name, seasons, episodes, eps_per_season = row row_qry = """SELECT show_name, genre, episode_length, peak_grade, consistency_grade, adjustment, overall_grade FROM tv_show_grades WHERE show_name = "%s";""" row_query = row_qry % (show_name) try: foo, genre, ep_len, peak, consistency, adj, grade = db.query(row_query)[0] runtime_hrs = float(episodes*ep_len)/60.0 except (IndexError, TypeError): update_entry = {"show_name":show_name} db.insertRowDict(update_entry, 'tv_show_grades', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit() ep_len, genre, peak, consistency, adj, runtime_hrs, grade = 0,0,0,0,0,0,0 entry['show_name'] = show_name entry['genre'] = genre entry['episode_length'] = ep_len entry['peak_grade'] = peak entry['consistency_grade'] = consistency entry['adjustment'] = adj entry['approx_runtime_hours'] = runtime_hrs entry['overall_grade'] = grade db.insertRowDict(entry, 'tv_show_grades', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def process_8a(url, table_name): html = requests.get(url, headers=headers) soup = BeautifulSoup(html.content, "lxml") print "\t\tgrabbed html" if table_name != None: qry = "UPDATE %s SET updated = 'FALSE';" % table_name db.query(qry) db.conn.commit() ascent_data = [] for comment in soup.find_all(string=lambda text:isinstance(text, Comment)): if comment.strip() == "Ascents": next_node = comment.next_sibling while next_node and next_node.next_sibling: ascent_data.append(next_node) next_node = next_node.next_sibling if not next_node.name and next_node.strip() == "List Options": break; for item in ascent_data: if str(item).strip() != "": ascents_info = item break ascents = ascents_info.find_all("tr") for i, ascent in enumerate(ascents): row = [] ascent_cells = ascent.findAll("td") if len(ascent_cells) == 9: entry = {} ascent_date = ascent_cells[0] grade = ascent_cells[1] flash = ascent_cells[2] boulder_name = ascent_cells[3] recommended = ascent_cells[4] areas = ascent_cells[5].getText() soft_hard_fa = ascent_cells[6].getText() comment = ascent_cells[7] stars = len(ascent_cells[8].getText()) for span_tag in boulder_name.find("span"): span_tag.replace_with("") boulder_name = boulder_name.getText().strip() if boulder_name[0] == "*": boulder_name = boulder_name[1:] # print str(i-1) + " of " + str(len(ascents)-2) + ": " + boulder_name try: area = areas.split("/")[0].strip() sub_area = areas.split("/")[1].strip() except IndexError: area = areas.strip() sub_area = areas.strip() for span_tag in ascent_date.find("span"): span_tag.replace_with("") ascent_date = ascent_date.getText().strip() ascent_date = datetime.strptime(ascent_date, "%y-%m-%d").date() grade_JS = grade.getText() grade_qry = """SELECT font, hueco, 8a_points FROM boulders_grades WHERE 8a_javascript = "%s";""" grade_query = grade_qry % (grade_JS) euro_grade, v_grade, pts_base = db.query(grade_query)[0] if flash.find("img")["src"] == "/scorecard/images/56f871c6548ae32aaa78672c1996df7f.gif": flash = "FLASH" elif flash.find("img")["src"] == "/scorecard/images/e37046f07ac72e84f91d7f29f8455b58.gif": flash = "ONSIGHT" else: flash = None if "Soft" in soft_hard_fa: soft_hard = "SOFT" elif "Hard" in soft_hard_fa: soft_hard = "HARD" else: soft_hard = "" if "FA" in soft_hard_fa: fa = "FA" else: fa = None if flash == "FLASH": pts = pts_base+50 elif flash == "ONSIGHT": pts = pts_base+100 else: pts = pts_base if fa == "FA": pts += 20 if recommended.find("img")["src"] == "/scorecard/images/UserRecommended_1.gif": recommended = "RECOMMENDED" else: recommended = None for span_tag in comment.find("span"): span_tag.replace_with("") comment = comment.getText().strip().replace("\n", "") if "Total_Duration" in comment: duration_dict = comment.split("Total_Duration")[1].split("}")[0].replace("=","").strip()+"}" try: duration_dict = ast.literal_eval(duration_dict) final_time = duration_dict.get('Final Time') attempts = duration_dict.get('Attempts') minutes = duration_dict.get('Minutes') session = duration_dict.get('Sessions') except SyntaxError: print '\nERROR:', boulder_name, '\n', duration_dict, '\n----------------\n' final_time = None attempts = 0 minutes = 0 session = 0 else: final_time = None attempts = None minutes = None session = None if "*Bounty Extra Soft*." in comment: soft_hard = "BOUNTY EXTRA SOFT" if attempts == 2: pts += 2 updated = "TRUE" entry_columns = ["ascent_date", "boulder_name", "area", "sub_area", "v_grade", "euro_grade", "8a_pts", "flash", "soft_hard", "stars", "fa", "recommended", "final_time", "est_attempts", "est_minutes", "est_sessions", "comment", "updated"] row = [ascent_date, boulder_name, area, sub_area, v_grade, euro_grade, pts, flash, soft_hard, stars, fa, recommended, final_time, attempts, minutes, session, comment, updated] for j, item in enumerate(row): if type(item) in (str, unicode) and item != '': row[j] = "".join([k if ord(k) < 128 else "" for k in item]) entry = {} for i,j in zip(entry_columns, row): entry[i] = j if table_name is not None: db.insertRowDict(entry, table_name, insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
# print 'nope', header.text pass else: print(header.text) category = header.text sib = header.find_next_sibling('dl') # raw_input(sib) for a in sib.find_all('a'): entry = {} # raw_input(a) _name = a.text.encode('utf-8') # add_date = a['add_date'] # add_date = datetime.date.fromtimestamp(float(add_date)) href = a['href'] try: tags = a['tags'] except KeyError: tags = '' append_csv.writerow(['"' + str(category) + '"', '"' + str(_name) + '"', '"' + str(tags) + '"', '"' + str(href) + '"']) entry['category_name'] = category entry['name'] = _name entry['tags'] = tags entry['link'] = href db.insertRowDict(entry, 'bookmarks', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def enter_completed(entry2): entry2['ascent_date'] = entry2.get('est_date') prev_time = entry2.get("est_time") prev_dtime = datetime(100, 1, 1, prev_time.hour, prev_time.minute, prev_time.second) final_time = prev_dtime + timedelta(minutes=int(entry2.get("est_minutes"))) entry2["final_time"] = final_time.time() grade_query = "SELECT MAX(font) AS font, 8a_points FROM boulders_grades WHERE hueco = '%s' GROUP BY hueco" % (entry2.get("v_grade")) grade_res = db.query(grade_query) if len(grade_res) != 1: print "\n\n\nERROR", str(entry2.get("v_grade")), "HAS LENGTH", str(len(res)) else: euro_grade, pts_base = grade_res[0] entry2['euro_grade'] = euro_grade pts = pts_base previous_qry = """SELECT SUM(est_attempts) AS attempts, SUM(est_minutes) AS minutes, COUNT(*) AS sessions FROM boulders_tried WHERE boulder_name = "%s" AND area = "%s" AND sub_area = "%s" AND completed != "TRUE" GROUP BY boulder_name, area;""" previous_query = previous_qry % (entry2.get("boulder_name"), entry2.get("area"), entry2.get("sub_area")) previous_res = db.query(previous_query) if len(previous_res) > 1: print "\n\n\nERROR", str(entry2.get("boulder_name")), "HAS LENGTH", str(len(res)) elif len(previous_res) == 0: prev_att, prev_min, prev_sess = [0,0,0] else: prev_att, prev_min, prev_sess = previous_res[0] est_attempts = int(entry2.get("est_attempts")) + prev_att entry2["est_attempts"] = est_attempts flash = None if est_attempts == 1: flash = "FLASH" pts += 50 entry2["flash"] = flash if est_attempts == 2: pts += 2 if entry2.get("FA") == "FA": pts += 20 entry2["8a_pts"] = pts est_minutes = int(entry2.get("est_minutes")) + prev_min entry2["est_minutes"] = est_minutes est_sessions = 1 + int(prev_sess) entry2["est_sessions"] = est_sessions comment = "Total_Duration={'Final Time':'" + str(final_time.hour).zfill(2) + ":" + str(final_time.minute).zfill(2) + "', 'Attempts':" + str(est_attempts) + ", 'Minutes':" + str(est_minutes) + ", 'Sessions':" + str(est_sessions) + "}. " + str(entry2.get("comment")) entry2['comment'] = comment updated = "FALSE" del entry2['est_time'] del entry2['est_date'] db.insertRowDict(entry2, 'boulders_completed', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def initiate(): entry = {} print "\nUpdate Attempted Boulder\n" last_qry = "SELECT boulder_name, area, sub_area, v_grade, est_attempts, est_minutes, est_date FROM boulders_tried ORDER BY est_date DESC, est_time DESC LIMIT 1;" last_entry = db.query(last_qry)[0] last_name, last_area, last_sub, last_grade, last_attempts, last_minutes, last_date = last_entry entry['boulder_name'] = last_name entry['area'] = last_area entry['sub_area'] = last_sub entry['v_grade'] = last_grade entry['est_attempts'] = last_attempts entry['est_minutes'] = last_minutes entry['comment'] = "" entry['est_date'] = last_date entry['est_time'] = datetime.strptime('00:00:00','%H:%M:%S').time() i = 1 while i < 11: if ((i > 0) and (i < 8)): i, cats, entry_vals = process_basic(i, entry) elif ((i > 7) and (i < 10)): i, cats, entry_vals = process_time(i) elif ((i > 9) and (i < 11)): i, cats, entry_vals = process_complete(i, entry) for cat, val in zip(cats, entry_vals): if cat is not None: entry[cat] = val print "\n" if entry.get('sub_area') is None: sa = "" else: sa = "\nAND sub_area = '%s'" % (entry.get('sub_area')) prev_qry = """SELECT est_date, est_time FROM boulders_tried WHERE boulder_name = "%s" AND area = "%s"%s AND (est_date < "%s" OR (est_date = "%s" AND est_time < "%s") );""" prev_query = prev_qry % (entry.get('boulder_name'), entry.get('area'), sa, entry.get('est_date'), entry.get('est_date'), entry.get('est_time')) prev_cnt = db.query(prev_query) sess_num = len(prev_cnt) entry["session_num"] = sess_num+1 db.insertRowDict(entry, 'boulders_tried', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def process_update(row): _date, boulder_name, area, sub_area, v_grade, final_comment, final_time, final_minutes, final_attempts, final_sessions, u_atts, u_mins, u_sessions, u_completed = row return_interest = None update_sessions = u_sessions - u_completed update_comment = "".join(final_comment.split("}.")[1:]).replace("*Bounty Extra Soft*.","").strip() for sess_num in range(1,update_sessions+1): entry = {} return_interest = None if sess_num < 10: sess_val = "0"+str(sess_num) else: sess_val = str(sess_num) est_time = '00:00:%s' % (sess_val) est_attempts = 1 est_minutes = 1 backfill_comment = "Backfill Comment #%s" % (sess_num) entry["est_date"] = _date entry["est_time"] = est_time entry["boulder_name"] = boulder_name entry["area"] = area entry["sub_area"] = sub_area entry["v_grade"] = v_grade entry["est_attempts"] = est_attempts entry["est_minutes"] = est_minutes entry["return_interest"] = return_interest entry["session_num"] = sess_num entry["completed"] = "FALSE" entry["comment"] = backfill_comment db.insertRowDict(entry, 'boulders_tried', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit() if (u_completed == 1): entry = {} return_interest = None est_time = final_time est_attempts = final_attempts - update_sessions est_minutes = final_minutes - update_sessions entry["est_date"] = _date entry["est_time"] = est_time entry["boulder_name"] = boulder_name entry["area"] = area entry["sub_area"] = sub_area entry["v_grade"] = v_grade entry["est_attempts"] = est_attempts entry["est_minutes"] = est_minutes entry["return_interest"] = return_interest entry["session_num"] = final_sessions entry["completed"] = "TRUE" entry["comment"] = update_comment db.insertRowDict(entry, 'boulders_tried', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit() comment_updater(boulder_name, area, sub_area, update_comment)