def doDailyClubs(infile, conn, cdate, firsttime=False): """ infile is a file-like object """ global changecount from datetime import datetime, timedelta curs = conn.cursor() reader = csv.reader(infile) hline = next(reader) headers = cleanheaders(hline) try: clubcol = headers.index("clubnumber") except ValueError: if not hline[0].startswith('{"Message"'): print("'clubnumber' not in '%s'" % hline) return try: prospectiveclubcol = headers.index("prospectiveclub") except ValueError: prospectiveclubcol = False # Find out what fields we have in the database itself dbfields = [] curs.execute("describe clubs") for l in curs.fetchall(): dbfields.append(l[0]) inform("clubs for", cdate, suppress=1) dbheaders = [p for p in headers] # Convert between Toastmasters' names for address and location and ours; they've changed it a few times. *sigh* if "address1" in dbheaders: addrcol1 = dbheaders.index("address1") else: addrcol1 = dbheaders.index("location") if "address2" in dbheaders: addrcol2 = dbheaders.index("address2") else: addrcol2 = dbheaders.index("address") dbheaders[addrcol1] = "place" dbheaders[addrcol2] = "address" expectedheaderscount = len(dbheaders) dbheaders.append("firstdate") dbheaders.append("lastdate") # For now... areacol = dbheaders.index("area") divisioncol = dbheaders.index("division") statecol = dbheaders.index("state") # Now, suppress anything in the file that's not in the database: suppress = [] oldheaders = dbheaders dbheaders = [] for i in range(len(oldheaders)): if oldheaders[i] in dbfields: dbheaders.append(oldheaders[i]) else: suppress.append(i) suppress.reverse() # We remove these columns from the input Club.setfieldnames(dbheaders) # We need to get clubs for the most recent update so we know whether to update an entry # or start a new one. yesterday = datetime.strftime( datetime.strptime(cdate, "%Y-%m-%d") - timedelta(1), "%Y-%m-%d") clubhist = Club.getClubsOn(curs, date=yesterday) for row in reader: if len(row) < expectedheaderscount: break # we're finished if prospectiveclubcol is not None and row[prospectiveclubcol]: continue # Ignore prospective clubs for i in suppress: del row[i] if len(row) > expectedheaderscount: # Special case...Millbrae somehow snuck two club websites in! row[16] = row[16] + "," + row[17] del row[17] # print row[addrcol1] # print row[addrcol2] # Now, clean up the address: # Address line 1 is "place" information and can be multiple lines. # Address line 2 is the real address and should be treated as one line, with spaces normalized. place = "\n".join([x.strip() for x in row[addrcol1].split(" ")]) row[addrcol1] = place address = normalize(row[addrcol2]) row[addrcol2] = address # Toastmasters is currently reversing the "Area" and "Division" items. "Area" should be a # number; if not, swap the two. try: thearea = row[areacol] thedivision = row[divisioncol] areanum = int(row[areacol]) except ValueError: row[areacol] = thedivision row[divisioncol] = thearea # Collapse state names into their abbreviations if row[statecol] in statelist: row[statecol] = statelist[row[statecol]] # Get the right number of items into the row by setting today as the # tentative first and last date row.append(cdate) row.append(cdate) # And create the object club = Club(row) # Now, clean up things coming from Toastmasters if club.clubstatus.startswith("Open") or club.clubstatus.startswith( "None"): club.clubstatus = "Open" else: club.clubstatus = "Restricted" # Clean up the club and district numbers and the area club.clubnumber = cleanitem(club.clubnumber) club.district = cleanitem(club.district) club.area = cleanitem(club.area) # If a club is partially unassigned, mark it as completely unassigned. if ((club.area == "0A") or (club.area == "0D") or (club.division == "0D") or (club.division == "0A")): club.area = "0A" club.division = "0D" # Clean up the charter date if not club.charterdate.strip(): continue # This is a prospective club that Toastmasters didn't mark properly; ignore it. club.charterdate = cleandate(club.charterdate) # Clean up advanced status club.advanced = "1" if (club.advanced != "") else "0" # Clean up online status club.allowsonlineattendance = ("1" if (club.allowsonlineattendance != "") else "0") # Add missing schemes to any URLs club.fixURLSchemes() # Now, take care of missing latitude/longitude if ("latitude") in dbheaders: try: club.latitude = float(club.latitude) except ValueError: club.latitude = 0.0 else: club.latitude = 0.0 if ("longitude") in dbheaders: try: club.longitude = float(club.longitude) except ValueError: club.longitude = 0.0 else: club.longitude = 0.0 # Sometimes, Toastmasters gets the latitude and longitude backwards # If that turns out to create an impossible location (which it will in California), # let's swap them. if abs(club.latitude) > 90.0: (club.latitude, club.longitude) = (club.longitude, club.latitude) # And put it into the database if need be if club.clubnumber in clubhist: changes = different(club, clubhist[club.clubnumber], dbheaders[:-2]) else: changes = [] if club.clubnumber not in clubhist and not firsttime: # This is a new (or reinstated) club; note it in the changes database. curs.execute( 'INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, "New Club", "", "")', (club.clubnumber, cdate), ) if club.clubnumber not in clubhist or changes: club.firstdate = club.lastdate # Encode newlines in the place as double-semicolons for the database club.place = club.place.replace("\n", ";;") values = [club.__dict__[x] for x in dbheaders] # And then put the place back into normal form club.place = club.place.replace(";;", "\n") thestr = ("INSERT IGNORE INTO clubs (" + ",".join(dbheaders) + ") VALUES (" + ",".join(["%s" for each in values]) + ");") try: changecount += curs.execute(thestr, values) except Exception as e: print(e) # Capture changes for (item, old, new) in changes: if item == "place": # Clean up the place (old and new) for the database old = old.replace("\n", ";;") new = new.replace("\n", ";;") try: curs.execute( "INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, %s, %s, %s)", (club.clubnumber, cdate, item, old, new), ) except Exception as e: print(e) clubhist[club.clubnumber] = club if different(club, clubhist[club.clubnumber], dbheaders[:-2]): print("it's different after being set.") sys.exit(3) else: # update the lastdate changecount += curs.execute( "UPDATE clubs SET lastdate = %s WHERE clubnumber = %s AND lastdate = %s;", (cdate, club.clubnumber, clubhist[club.clubnumber].lastdate), ) # If all the files were processed, today's work is done. curs.execute( 'INSERT IGNORE INTO loaded (tablename, loadedfor) VALUES ("clubs", %s)', (cdate, ), )
def overrideClubs(clubs, newAlignment, exclusive=True): """ Updates 'clubs' to reflect the alignment in the newAlignment spreadsheet. Typically used at a TM year boundary or for planning realignment. newAlignment is a "test alignment" file - a CSV with the clubs in the new alignment. Non-blank columns in the file override information from the database, with the following exceptions: "likelytoclose" is used during planning to identify clubs in danger. "clubname" does NOT override what comes from Toastmasters. "newarea" is of the form Xn (X is the Division; n is the area). Specify exclusive=False to keep clubs not included in the spreadsheet; otherwise, clubs not in the spreadsheet are discarded. """ pfile = open(newAlignment, 'r') reader = csv.DictReader(pfile) keepers = set() # Sadly, the callers of this routine have different types of key. # We have to be resilient against this. if isinstance(list(clubs.keys())[0], int): fixup = lambda x: int(x) elif isinstance(list(clubs.keys())[0], (int, float)): fixup = lambda x: float(x) else: fixup = lambda x: x for row in reader: clubnum = fixup(row['clubnumber']) try: club = clubs[clubnum] except KeyError: from simpleclub import Club club = Club(list(row.values()), fieldnames=list(row.keys()), fillall=True) if 'district' not in list(row.keys()): club.district = globals.parms.district clubs[clubnum] = club keepers.add(clubnum) if row['newarea'] and row['newarea'] != '0D0A': club.division = row['newarea'][0] club.area = row['newarea'][1:] for item in reader.fieldnames: if row[item].strip() and item not in [ 'clubnumber', 'clubname', 'newarea' ]: # Maintain the same type if item in club.__dict__ and isinstance( club.__dict__[item], numbers.Number): if isinstance(club.__dict__[item], int): club.__dict__[item] = int(row[item]) elif isinstance(club.__dict__[item], float): club.__dict__[item] = float(row[item]) else: club.__dict__[item] = int(row[item]) else: club.__dict__[item] = row[item] pfile.close() # Remove clubs not in the file: if exclusive: clubnumbers = list(clubs.keys()) for c in clubnumbers: if c not in keepers: del clubs[c] return clubs
def doDailyClubs(infile, conn, cdate, firsttime=False): """ infile is a file-like object """ global changecount from datetime import datetime, timedelta curs = conn.cursor() reader = csv.reader(infile) hline = reader.next() headers = cleanheaders(hline) try: clubcol = headers.index('clubnumber') except ValueError: if not hline[0].startswith('{"Message"'): print "'clubnumber' not in '%s'" % hline return try: prospectiveclubcol = headers.index('prospectiveclub') except ValueError: prospectiveclubcol = False # Find out what fields we have in the database itself dbfields = [] curs.execute("describe clubs") for l in curs.fetchall(): dbfields.append(l[0]) inform("clubs for", cdate, suppress=1) dbheaders = [p for p in headers] # Convert between Toastmasters' names for address and location and ours; they've changed it a few times. *sigh* if 'address1' in dbheaders: addrcol1 = dbheaders.index('address1') else: addrcol1 = dbheaders.index('location') if 'address2' in dbheaders: addrcol2 = dbheaders.index('address2') else: addrcol2 = dbheaders.index('address') dbheaders[addrcol1] = 'place' dbheaders[addrcol2] = 'address' expectedheaderscount = len(dbheaders) dbheaders.append('firstdate') dbheaders.append('lastdate') # For now... areacol = dbheaders.index('area') divisioncol = dbheaders.index('division') statecol = dbheaders.index('state') # Now, suppress anything in the file that's not in the database: suppress = [] oldheaders = dbheaders dbheaders = [] for i in xrange(len(oldheaders)): if oldheaders[i] in dbfields: dbheaders.append(oldheaders[i]) else: suppress.append(i) suppress.reverse() # We remove these columns from the input Club.setfieldnames(dbheaders) # We need to get clubs for the most recent update so we know whether to update an entry # or start a new one. yesterday = datetime.strftime(datetime.strptime(cdate, '%Y-%m-%d') - timedelta(1),'%Y-%m-%d') clubhist = Club.getClubsOn(curs, date=yesterday) for row in reader: if len(row) < expectedheaderscount: break # we're finished if prospectiveclubcol is not None and row[prospectiveclubcol]: continue # Ignore prospective clubs for i in suppress: del row[i] # Convert to unicode. Toastmasters usually uses UTF-8 but occasionally uses Windows CP1252 on the wire. try: row = [unicode(t.strip(), "utf8") for t in row] except UnicodeDecodeError: row = [unicode(t.strip(), "CP1252") for t in row] if len(row) > expectedheaderscount: # Special case...Millbrae somehow snuck two club websites in! row[16] = row[16] + ',' + row[17] del row[17] #print row[addrcol1] #print row[addrcol2] # Now, clean up the address: # Address line 1 is "place" information and can be multiple lines. # Address line 2 is the real address and should be treated as one line, with spaces normalized. place = '\n'.join([x.strip() for x in row[addrcol1].split(' ')]) row[addrcol1] = place address = normalize(row[addrcol2]) row[addrcol2] = address # Toastmasters is currently reversing the "Area" and "Division" items. "Area" should be a # number; if not, swap the two. try: thearea = row[areacol] thedivision = row[divisioncol] areanum = int(row[areacol]) except ValueError: row[areacol] = thedivision row[divisioncol] = thearea # Collapse state names into their abbreviations if row[statecol] in statelist: row[statecol] = statelist[row[statecol]] # Get the right number of items into the row by setting today as the # tentative first and last date row.append(cdate) row.append(cdate) # And create the object club = Club(row) # Now, clean up things coming from Toastmasters if club.clubstatus.startswith('Open') or club.clubstatus.startswith('None'): club.clubstatus = 'Open' else: club.clubstatus = 'Restricted' # Clean up the club and district numbers and the area club.clubnumber = cleanitem(club.clubnumber) club.district = cleanitem(club.district) club.area = cleanitem(club.area) # If a club is partially unassigned, mark it as completely unassigned. if (club.area == '0A') or (club.area == '0D') or (club.division == '0D') or (club.division == '0A'): club.area = '0A' club.division = '0D' # Clean up the charter date club.charterdate = cleandate(club.charterdate) # Clean up advanced status club.advanced = '1' if (club.advanced != '') else '0' # Clean up online status club.allowsonlineattendance = '1' if (club.allowsonlineattendance != '') else '0' # Now, take care of missing latitude/longitude if ('latitude') in dbheaders: try: club.latitude = float(club.latitude) except ValueError: club.latitude = 0.0 else: club.latitude = 0.0 if ('longitude') in dbheaders: try: club.longitude = float(club.longitude) except ValueError: club.longitude = 0.0 else: club.longitude = 0.0 # Sometimes, Toastmasters gets the latitude and longitude backwards # If that turns out to create an impossible location (which it will in California), # let's swap them. if abs(club.latitude) > 90.0: (club.latitude, club.longitude) = (club.longitude, club.latitude) # And put it into the database if need be if club.clubnumber in clubhist: changes = different(club, clubhist[club.clubnumber], dbheaders[:-2]) else: changes = [] if club.clubnumber not in clubhist and not firsttime: # This is a new (or reinstated) club; note it in the changes database. curs.execute('INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, "New Club", "", "")', (club.clubnumber, cdate)) if club.clubnumber not in clubhist or changes: club.firstdate = club.lastdate # Encode newlines in the place as double-semicolons for the database club.place = club.place.replace('\n',';;') values = [club.__dict__[x] for x in dbheaders] # And then put the place back into normal form club.place = club.place.replace(';;','\n') thestr = 'INSERT IGNORE INTO clubs (' + ','.join(dbheaders) + ') VALUES (' + ','.join(['%s' for each in values]) + ');' try: changecount += curs.execute(thestr, values) except Exception, e: print e # Capture changes for (item, old, new) in changes: if (item == 'place'): # Clean up the place (old and new) for the database old = old.replace('\n', ';;') new = new.replace('\n', ';;') try: curs.execute('INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, %s, %s, %s)', (club.clubnumber, cdate, item, old, new)) except Exception, e: print e clubhist[club.clubnumber] = club if different(club, clubhist[club.clubnumber], dbheaders[:-2]): print 'it\'s different after being set.' sys.exit(3)
def doDailyClubs(infile, conn, cdate, firsttime=False): """ infile is a file-like object """ global changecount from datetime import datetime, timedelta curs = conn.cursor() reader = csv.reader(infile) hline = next(reader) headers = cleanheaders(hline) try: clubcol = headers.index("clubnumber") except ValueError: if not hline[0].startswith('{"Message"'): print("'clubnumber' not in '%s'" % hline) return try: prospectiveclubcol = headers.index("prospectiveclub") except ValueError: prospectiveclubcol = False # Find out what fields we have in the database itself dbfields = [] curs.execute("describe clubs") for l in curs.fetchall(): dbfields.append(l[0]) inform("clubs for", cdate, suppress=1) dbheaders = [p for p in headers] # Convert between Toastmasters' names for address and location and ours; they've changed it a few times. *sigh* if "address1" in dbheaders: addrcol1 = dbheaders.index("address1") else: addrcol1 = dbheaders.index("location") if "address2" in dbheaders: addrcol2 = dbheaders.index("address2") else: addrcol2 = dbheaders.index("address") dbheaders[addrcol1] = "place" dbheaders[addrcol2] = "address" expectedheaderscount = len(dbheaders) dbheaders.append("firstdate") dbheaders.append("lastdate") # For now... areacol = dbheaders.index("area") divisioncol = dbheaders.index("division") statecol = dbheaders.index("state") # Now, suppress anything in the file that's not in the database: suppress = [] oldheaders = dbheaders dbheaders = [] for i in range(len(oldheaders)): if oldheaders[i] in dbfields: dbheaders.append(oldheaders[i]) else: suppress.append(i) suppress.reverse() # We remove these columns from the input Club.setfieldnames(dbheaders) # We need to get clubs for the most recent update so we know whether to update an entry # or start a new one. yesterday = datetime.strftime( datetime.strptime(cdate, "%Y-%m-%d") - timedelta(1), "%Y-%m-%d" ) clubhist = Club.getClubsOn(curs, date=yesterday) for row in reader: if len(row) < expectedheaderscount: break # we're finished if prospectiveclubcol is not None and row[prospectiveclubcol]: continue # Ignore prospective clubs for i in suppress: del row[i] if len(row) > expectedheaderscount: # Special case...Millbrae somehow snuck two club websites in! row[16] = row[16] + "," + row[17] del row[17] # print row[addrcol1] # print row[addrcol2] # Now, clean up the address: # Address line 1 is "place" information and can be multiple lines. # Address line 2 is the real address and should be treated as one line, with spaces normalized. place = "\n".join([x.strip() for x in row[addrcol1].split(" ")]) row[addrcol1] = place address = normalize(row[addrcol2]) row[addrcol2] = address # Toastmasters is currently reversing the "Area" and "Division" items. "Area" should be a # number; if not, swap the two. try: thearea = row[areacol] thedivision = row[divisioncol] areanum = int(row[areacol]) except ValueError: row[areacol] = thedivision row[divisioncol] = thearea # Collapse state names into their abbreviations if row[statecol] in statelist: row[statecol] = statelist[row[statecol]] # Get the right number of items into the row by setting today as the # tentative first and last date row.append(cdate) row.append(cdate) # And create the object club = Club(row) # Now, clean up things coming from Toastmasters if club.clubstatus.startswith("Open") or club.clubstatus.startswith("None"): club.clubstatus = "Open" else: club.clubstatus = "Restricted" # Clean up the club and district numbers and the area club.clubnumber = cleanitem(club.clubnumber) club.district = cleanitem(club.district) club.area = cleanitem(club.area) # If a club is partially unassigned, mark it as completely unassigned. if ( (club.area == "0A") or (club.area == "0D") or (club.division == "0D") or (club.division == "0A") ): club.area = "0A" club.division = "0D" # Clean up the charter date club.charterdate = cleandate(club.charterdate) # Clean up advanced status club.advanced = "1" if (club.advanced != "") else "0" # Clean up online status club.allowsonlineattendance = ( "1" if (club.allowsonlineattendance != "") else "0" ) # Add missing schemes to any URLs club.fixURLSchemes() # Now, take care of missing latitude/longitude if ("latitude") in dbheaders: try: club.latitude = float(club.latitude) except ValueError: club.latitude = 0.0 else: club.latitude = 0.0 if ("longitude") in dbheaders: try: club.longitude = float(club.longitude) except ValueError: club.longitude = 0.0 else: club.longitude = 0.0 # Sometimes, Toastmasters gets the latitude and longitude backwards # If that turns out to create an impossible location (which it will in California), # let's swap them. if abs(club.latitude) > 90.0: (club.latitude, club.longitude) = (club.longitude, club.latitude) # And put it into the database if need be if club.clubnumber in clubhist: changes = different(club, clubhist[club.clubnumber], dbheaders[:-2]) else: changes = [] if club.clubnumber not in clubhist and not firsttime: # This is a new (or reinstated) club; note it in the changes database. curs.execute( 'INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, "New Club", "", "")', (club.clubnumber, cdate), ) if club.clubnumber not in clubhist or changes: club.firstdate = club.lastdate # Encode newlines in the place as double-semicolons for the database club.place = club.place.replace("\n", ";;") values = [club.__dict__[x] for x in dbheaders] # And then put the place back into normal form club.place = club.place.replace(";;", "\n") thestr = ( "INSERT IGNORE INTO clubs (" + ",".join(dbheaders) + ") VALUES (" + ",".join(["%s" for each in values]) + ");" ) try: changecount += curs.execute(thestr, values) except Exception as e: print(e) # Capture changes for (item, old, new) in changes: if item == "place": # Clean up the place (old and new) for the database old = old.replace("\n", ";;") new = new.replace("\n", ";;") try: curs.execute( "INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, %s, %s, %s)", (club.clubnumber, cdate, item, old, new), ) except Exception as e: print(e) clubhist[club.clubnumber] = club if different(club, clubhist[club.clubnumber], dbheaders[:-2]): print("it's different after being set.") sys.exit(3) else: # update the lastdate changecount += curs.execute( "UPDATE clubs SET lastdate = %s WHERE clubnumber = %s AND lastdate = %s;", (cdate, club.clubnumber, clubhist[club.clubnumber].lastdate), ) # If all the files were processed, today's work is done. curs.execute( 'INSERT IGNORE INTO loaded (tablename, loadedfor) VALUES ("clubs", %s)', (cdate,), )