def __init__(self, layerParams, filterParams): Filter.__init__(self, layerParams, filterParams) try: self.time_change = utils.num(layerParams['time_change']) except KeyError: self.time_change = utils.num(filterParams['time_change']) try: self.units = layerParams['units'] except KeyError: self.units = filterParams['units'] try: self.frequency = layerParams['frequency'] except KeyError: self.frequency = filterParams['frequency'] try: self.season = layerParams['season'] except KeyError: self.season = filterParams['season'] try: self.start_date = layerParams['start_date'] except KeyError: self.start_date = filterParams['start_date'] try: self.end_date = layerParams['end_date'] except KeyError: self.end_date = filterParams['end_date']
def __init__(self, layerParams, filterParams): Filter.__init__(self, layerParams, filterParams) try: self.window_length = utils.num(layerParams['window_length']) except KeyError: self.window_length = utils.num(filterParams['window_length']) try: self.polyorder = utils.num(layerParams['polyorder']) except KeyError: self.polyorder = utils.num(filterParams['polyorder'])
def run(self, timeserieData, longitude = None, latitude = None): if loader.hasLayer(self.quality_layer): self.fill_value = utils.num(self.fill_value) self.quality_layer_n_composites = utils.num(self.quality_layer_n_composites) qualityData = self.getQualityValues(longitude, latitude) good_values = self.getGoodValues() return hagens_filter.run(timeserieData, qualityData, self.quality_layer_n_composites, good_values, self.fill_value ) else: return []
def load_features_csv(self, input_file, file_id): features = [] if not os.path.exists(input_file): return features delim = self.detect_delimiter(input_file) # print 'delim is %s' % delim with open(input_file, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=delim) next(reader, None) # skip the headers for elements in reader: if len(elements)==5: feature_id = utils.num(elements[0]) mz = utils.num(elements[1]) rt = utils.num(elements[2]) intensity = utils.num(elements[3]) identification = elements[4] # unused feature = Feature(feature_id, mz, rt, intensity, file_id) elif len(elements)==4: feature_id = utils.num(elements[0]) mz = utils.num(elements[1]) rt = utils.num(elements[2]) intensity = utils.num(elements[3]) feature = Feature(feature_id, mz, rt, intensity, file_id) features.append(feature) return features
def load_features_txt(self, input_file, file_id): features = [] if not os.path.exists(input_file): return features with open(input_file, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter='\t') next(reader, None) # skip the headers feature_id = 1 for elements in reader: feature = Feature(feature_id=feature_id, mass=utils.num(elements[0]), \ rt=utils.num(elements[1]), intensity=utils.num(elements[2]), file_id=file_id) features.append(feature) feature_id = feature_id + 1 return features
def load_features_csv(self, input_file, file_id): features = [] if not os.path.exists(input_file): return features delim = self.detect_delimiter(input_file) # print 'delim is %s' % delim with open(input_file, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=delim) next(reader, None) # skip the headers for elements in reader: if len(elements) == 5: feature_id = utils.num(elements[0]) mz = utils.num(elements[1]) rt = utils.num(elements[2]) intensity = utils.num(elements[3]) identification = elements[4] # unused feature = Feature(feature_id, mz, rt, intensity, file_id) elif len(elements) == 4: feature_id = utils.num(elements[0]) mz = utils.num(elements[1]) rt = utils.num(elements[2]) intensity = utils.num(elements[3]) feature = Feature(feature_id, mz, rt, intensity, file_id) features.append(feature) return features
def __init__(self, layerParams, datasourceParams): Datasource.__init__(self, layerParams) self.expression = self.expression self.fill_value = utils.num(self.fill_value) self.collection_id = self.collection_id.upper(); self.pixel_resolution = utils.num(self.pixel_resolution) self.fn_parsedate = getattr(self, self.fn_parsedate + "Date"); self.nThreads = int(datasourceParams['threads']) privateKeyFilepath = os.path.join(datasourceParams['run_path'],datasourceParams['private_key']) self.credentials = ee.ServiceAccountCredentials(datasourceParams['account'], privateKeyFilepath); self.credentials = ee.ServiceAccountCredentials(datasourceParams['account'], privateKeyFilepath);
def load_transformation(self, transformation): transformations = [] if not os.path.exists(transformation): return transformations with open(transformation, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') i = 1 for elements in reader: name=elements[0] sub=utils.num(elements[1]) mul=utils.num(elements[2]) iso=utils.num(elements[3]) trans = Transformation(i, name, sub, mul, iso) transformations.append(trans) i = i + 1 return transformations
def parse_thursday(thday): print(get_thursday(thday['thursday'])) time.sleep(4) d, e = get_page(get_thursday(thday['thursday'])) ts = d.select('section.events__table table') rs = ts[0].select('tr') for r in rs[1:]: cs = r.select('td') film = {} boxoffice = {'thursday': str(thday['thursday'].date())} for idx, c in enumerate(cs): print(idx, c) if idx == 0: boxoffice['pos'] = c.text print('\tpos: ', boxoffice['pos']) if idx == 1: film['title'] = c.text print('\ttitle: ', film['title']) film['page'] = c.select_one('a')['href'] print('\tpage: ', film['page']) film['id'] = c.select_one('a')['rel'][0] boxoffice['film'] = film['id'] print('\tname: ', film['id']) if idx == 2: boxoffice['distributor'] = c.text print('\tdistributor: ' + boxoffice['distributor']) if idx == 3: boxoffice['thursday_rur'] = num(c.text) print('\tthursdayRur: ' + boxoffice['thursday_rur']) save_film(film) save_thursday_boxoffice(boxoffice)
def __init__(self, layerParams, datasourceParams): Datasource.__init__(self, layerParams) self.expression = self.expression self.fill_value = utils.num(self.fill_value) self.collection_id = self.collection_id.upper(); self.pixel_resolution = utils.num(self.pixel_resolution) self.fn_parsedate = getattr(self, self.fn_parsedate + "Date"); self.nThreads = int(datasourceParams['threads']) self.ignore_filter = ast.literal_eval(self.ignore_filter.capitalize()) if hasattr(self, "ignore_filter") else None; privateKeyFilepath = os.path.join(datasourceParams['run_path'],datasourceParams['private_key']) self.credentials = ee.ServiceAccountCredentials(datasourceParams['account'], privateKeyFilepath); self.cache = Cache()
def __init__(self): self.opts = {} self.args = {'nrows_lst': [], 'ncols_lst': [], 'winp_lst': []} self.begin = 0 self.end = 10 self.data_set = None self.parse_args(sys.argv[1:]) if '-h' in self.opts: self.print_help() exit() for o in ['-r', '-c', '-w', '-b', '-e']: if o not in self.opts: raise Exception("Missing cmd line option : %s" % o) if '-d' in self.opts: self.data_set = self.opts['-d'] else: raise Exception("Data set must be specified : %s" % DataSet.list_datasets()) for opt, arg in zip(['-r', '-c', '-w'], ['nrows_lst', 'ncols_lst', 'winp_lst']): if opt in self.opts: self.args[arg] = [ utils.num(x) for x in self.opts[opt].split(',') ] self.begin = int(self.opts['-b']) if '-b' in self.opts else 0 self.end = int(self.opts['-e']) if '-e' in self.opts else 10 #print self.args self.dt = DataTest(data_set=self.data_set) self.dt.make_tests(**self.args)
def __init__(self): self.opts = {} self.args = { 'nrows_lst' : [], 'ncols_lst' : [], 'winp_lst' : [] } self.begin = 0 self.end = 10 self.data_set = None self.parse_args(sys.argv[1:]) if '-h' in self.opts : self.print_help(); exit() for o in [ '-r', '-c', '-w', '-b', '-e' ] : if o not in self.opts : raise Exception("Missing cmd line option : %s" % o) if '-d' in self.opts : self.data_set = self.opts['-d'] else : raise Exception( "Data set must be specified : %s" % DataSet.list_datasets() ) for opt, arg in zip(['-r', '-c', '-w' ],['nrows_lst', 'ncols_lst', 'winp_lst']) : if opt in self.opts : self.args[arg] = [ utils.num(x) for x in self.opts[opt].split(',') ] self.begin = int(self.opts['-b']) if '-b' in self.opts else 0 self.end = int(self.opts['-e']) if '-e' in self.opts else 10 #print self.args self.dt = DataTest(data_set=self.data_set) self.dt.make_tests(**self.args)
def load_database(self, database): moldb = [] if not os.path.exists(database): return moldb with open(database, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') for elements in reader: if len(elements)==5: mol = DatabaseEntry(db_id=elements[0], name=elements[1], formula=elements[2], \ mass=utils.num(elements[3]), rt=utils.num(elements[4])) moldb.append(mol) elif len(elements)==4: mol = DatabaseEntry(db_id=elements[0], name=elements[1], formula=elements[2], \ mass=utils.num(elements[3]), rt=0) moldb.append(mol) return moldb
def load_database(self, database): moldb = [] if not os.path.exists(database): return moldb with open(database, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') for elements in reader: if len(elements) == 5: mol = DatabaseEntry(db_id=elements[0], name=elements[1], formula=elements[2], \ mass=utils.num(elements[3]), rt=utils.num(elements[4])) moldb.append(mol) elif len(elements) == 4: mol = DatabaseEntry(db_id=elements[0], name=elements[1], formula=elements[2], \ mass=utils.num(elements[3]), rt=0) moldb.append(mol) return moldb
def getConnection(self): imap = imaplib.IMAP4_SSL('imap.googlemail.com') imap.login(self.email, self.password) self.response, mailCount = imap.select() self.mailCount = num(mailCount[0]) #print imap return imap
def run(self, timeserieData, longitude=None, latitude=None): if loader.hasLayer(self.quality_layer): self.fill_value = utils.num(self.fill_value) self.quality_layer_n_composites = utils.num( self.quality_layer_n_composites) qualityData = self.getQualityValues(longitude, latitude) good_values = self.getGoodValues() return hagens_filter.run(timeserieData, qualityData, self.quality_layer_n_composites, good_values, self.fill_value) else: return []
def parse_weekend(week): print(get_weekend(week['weekend'])) time.sleep(4) d, e = get_page(get_weekend(week['weekend'])) rs = d.select('table#krestable tr') for r in rs[1:]: cs = r.select('td') film = {} boxoffice = {'weekend': str(week['weekend'].date())} for idx, c in enumerate(cs): print(idx, c) if idx == 1: boxoffice['pos'] = c.text print('\tpos: ', boxoffice['pos']) if idx == 3: print('\ttitle: ', c.text) film['title'] = c.text print('\tpage: ', c.select_one('a')['href']) film['page'] = c.select_one('a')['href'] print('\tname: ', c.select_one('a')['rel']) film['id'] = c.select_one('a')['rel'][0] boxoffice['film'] = c.select_one('a')['rel'][0] if idx == 4: print('\toriginal: ' + c.text) film['original'] = c.text if idx == 5: print('\tdistributor: ' + c.text) boxoffice['distributor'] = c.text if idx == 6: print('\tweekendRur: ' + c.text) boxoffice['weekend_rur'] = num(c.text) if idx == 8: print('\tscreens: ' + c.text) boxoffice['screens'] = num(c.text) if idx == 10: print('\tdays: ' + c.text) boxoffice['days'] = num(c.text) if idx == 11: print('\ttotalRur: ' + c.text) boxoffice['total_rur'] = num(c.text) if idx == 12: print('\tspectaculars: ' + c.text) boxoffice['spectaculars'] = num(c.text) save_film(film) save_weekend_boxoffice(boxoffice)
def load_features_sima(self, input_file, file_id): features = [] if not os.path.exists(input_file): return features with open(input_file, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter='\t') feature_id = 1 for elements in reader: mass = float(elements[0]) charge = float(elements[1]) mass = mass / charge intensity = utils.num(elements[2]) rt = utils.num(elements[3]) feature = Feature(feature_id, mass, rt, intensity, file_id) if len(elements) > 4: # for debugging with synthetic data gt_peak_id = utils.num(elements[4]) gt_metabolite_id = utils.num(elements[5]) gt_adduct_type = elements[6] feature.gt_metabolite = gt_metabolite_id feature.gt_adduct = gt_adduct_type features.append(feature) feature_id = feature_id + 1 return features
def load_features_sima(self, input_file, file_id): features = [] if not os.path.exists(input_file): return features with open(input_file, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter='\t') feature_id = 1 for elements in reader: mass = float(elements[0]) charge = float(elements[1]) mass = mass/charge intensity = utils.num(elements[2]) rt = utils.num(elements[3]) feature = Feature(feature_id, mass, rt, intensity, file_id) if len(elements)>4: # for debugging with synthetic data gt_peak_id = utils.num(elements[4]) gt_metabolite_id = utils.num(elements[5]) gt_adduct_type = elements[6] feature.gt_metabolite = gt_metabolite_id feature.gt_adduct = gt_adduct_type features.append(feature) feature_id = feature_id + 1 return features
def load_features_csv(self, input_file, file_id): features = [] if not os.path.exists(input_file): return features delim = self.detect_delimiter(input_file) with open(input_file, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=delim) next(reader, None) # skip the headers for elements in reader: if len(elements)==6: feature_id = utils.num(elements[0]) mz = utils.num(elements[1]) rt = utils.num(elements[2]) feature = Feature(feature_id=feature_id, mass=mz, rt=rt, intensity=0) feature.into = utils.num(elements[3]) # integrated peak intensity feature.maxo = utils.num(elements[4]) # maximum peak intensity feature.intb = utils.num(elements[5]) # baseline corrected integrated peak intensities feature.intensity = feature.maxo # we will use this for now elif len(elements)==5: feature_id = utils.num(elements[0]) mz = utils.num(elements[1]) rt = utils.num(elements[2]) intensity = utils.num(elements[3]) identification = elements[4] # unused feature = Feature(feature_id, mz, rt, intensity, file_id) elif len(elements)==4: feature_id = utils.num(elements[0]) mz = utils.num(elements[1]) rt = utils.num(elements[2]) intensity = utils.num(elements[3]) feature = Feature(feature_id, mz, rt, intensity, file_id) features.append(feature) return features
#Executa o BFAST result = [] if bfastIndex != -1: result = filters[bfastIndex].run(values, None, None, minimalSegmentSize, frequency, startDate, endDate) else: raise IndexError("Bfast filter could not be found.") #Acrescenta ao resultado as datas, os valores da tendência e os valores originais (agrupados ou não) datesList = [[i] for i in dates] utils.joinArray(datesList, result) result = datesList utils.joinArray(result, values) return { 'series': series, 'values': result } result = []; # for i in xrange(len(argv)): # print("argv["+str(i)+"]: " + argv[i]) if argv[1] == 'TS': result = time_series(argv[2], argv[3], argv[4]); elif argv[1] == 'BFAST': # layerId, startYear, endYear, interpolation, groupData, timeChange, timeChangeUnits, geoJsonGeometry result = trend(argv[2], utils.num(argv[3]), utils.num(argv[4]), argv[5], argv[6], utils.num(argv[7]), argv[8], argv[9]); print(result)
def search_phone(self): print 'processing message num: ' + str(self.number) try: response, message_data = self.imap.fetch(self.number, '(BODY.PEEK[HEADER])') except: print "Exception in HEADER" return False raw_message = message_data[0][1] # message_data, the data structure returned by imaplib, encodes some data re: the request type header = HeaderParser().parsestr(raw_message) if header['Content-Type'] is not None and 'multipart' in header['Content-Type']: print "INcorrect content type" return False # right now we're just skipping any multipart messages. this needs to be rewritten to parse the text parts of said messgs. try: response, message_data = self.imap.fetch(self.number, '(BODY.PEEK[TEXT])') except: print "Exception in TEXT" return False text_payload = message_data[0][1] found_digits = number_re.findall(text_payload) found_digits += number_re.findall(header['Subject']) if len(found_digits) > 0: print "Message %d has numbers." % num(self.number) print found_digits ### need to cast the Date header into a MySQL object. ts = header['Date'] print 'header date: ' + str(ts) if parse(ts) is not None: #making sure the date header is not empty date = parse(ts) print 'about to insert into the database' ### sometimes it fails due to unicode issues print 'about to parse name and email from header' print 'header: ' + str(header['From']) try: name, email = email_re.match(header['From']).groups()[1:3] except: print "Unexpected error:", sys.exc_info()[0] return False print 'parsing name and email from FROM header: ' + str(name) + ', ' + str(email) try: m = Message( user=self.user, sender=header['From'][:255], recipient=header['To'][:255], sender_name=str(name)[:255], sender_email=email[:255], subject=header['Subject'][:255], date_add=date, payload=str(text_payload[:65534]) ) m.save() except Exception as e: print "Can't save", "test", e pure_digits = uniqify(map(''.join, found_digits)) # the phone number regexp will create lists like ['','650','555','1212']. this collapses the list into a string. print 'We found pure digits: ' + str(pure_digits) for phone_number in pure_digits: if len(str(phone_number)) > 7: # for now, we want numbers with area codes only. print phone_number PhoneNumber(value=phone_number, message=m, user=self.user, mailbox=self.mailbox).save() self.found_phones += 1
print('\ttitle: ', cell.text) film['title'] = cell.text print('\tpage: ', cell.select_one('a')['href']) film['page'] = cell.select_one('a')['href'] print('\tname: ', cell.select_one('a')['name']) film['id'] = cell.select_one('a')['name'] boxoffice['film'] = cell.select_one('a')['name'] if index == 2: print('\toriginal: ' + cell.text) film['original'] = cell.text if index == 3: print('\tdistributor: ' + cell.text) boxoffice['distributor'] = cell.text if index == 4: print('\tscreens: ' + cell.text) boxoffice['screens'] = num(cell.text) if index == 5: print('\ttotalRur: ' + cell.text) boxoffice['total_rur'] = num(cell.text) if index == 6: print('\ttotalUsd: ' + cell.text) boxoffice['total_usd'] = num(cell.text) if index == 7: print('\tspectaculars: ' + cell.text) boxoffice['spectaculars'] = num(cell.text) if index == 8: print('\tdays: ' + cell.text) boxoffice['days'] = num(cell.text) save_film(film) save_boxoffice(boxoffice)
def searchPhone(self, message_number): print 'processing message num: ' + str(message_number) try: response, message_data = self.imap.fetch(message_number, '(BODY.PEEK[HEADER])') except: print "Exception in HEADER" return False raw_message = message_data[0][ 1] # message_data, the data structure returned by imaplib, encodes some data re: the request type header = HeaderParser().parsestr(raw_message) if header['Content-Type'] is not None and 'multipart' in header[ 'Content-Type']: print "INcorrect content type" return False # right now we're just skipping any multipart messages. this needs to be rewritten to parse the text parts of said messgs. try: response, message_data = self.imap.fetch(message_number, '(BODY.PEEK[TEXT])') except: print "Exception in TEXT" return False text_payload = message_data[0][1] found_digits = number_re.findall(text_payload) if found_digits != []: print "Message %d has numbers." % num(message_number) print found_digits ### need to cast the Date header into a MySQL object. ts = header['Date'] print 'header date: ' + str(ts) if rfc822.parsedate_tz( ts) is not None: #making sure the date header is not empty ts_tuple = rfc822.parsedate_tz(ts) #perhaps in the future we can intead set the ts_tuple to (0,0,0,0,0,0,0) and interpret it in the UI as 'no date header'. assuming that is actually the problem. #otherwise, we're setting it to the date of the most recently received email... and this could get awkward. #TODO: fix this once the UI is ready. ts_python_datetime = datetime.datetime(*(ts_tuple[0:6])) ts_mysql_datetime = ts_python_datetime.isoformat(' ') print 'about to insert into the database' ### sometimes it fails due to unicode issues print 'about to parse name and email from header' print 'header: ' + str(header['From']) try: name, email = email_re.match(header['From']).groups()[1:3] except: print "Unexpected error:", sys.exc_info()[0] return False print 'parsing name and email from FROM header: ' + str( name) + ', ' + str(email) try: m = Message(sender=header['From'][:255], recipient=header['To'][:255], sender_name=str(name)[:255], sender_email=email[:255], subject=header['Subject'][:255], date=ts_mysql_datetime, payload=str(text_payload[:65534])) m.save() except: return False pure_digits = uniqify( map(''.join, found_digits) ) # the phone number regexp will create lists like ['','650','555','1212']. this collapses the list into a string. print 'We found pure digits: ' + str(pure_digits) for phone_number in pure_digits: if len(str(phone_number) ) > 7: # for now, we want numbers with area codes only. print phone_number PhoneNumber(value=phone_number, message=m).save()
boxoffice['film'] = film['id'] print('\tname: ', film['id']) if idx == 2: boxoffice['distributor'] = c.text print('\tdistributor: ' + boxoffice['distributor']) if idx == 3: boxoffice['thursday_rur'] = num(c.text) print('\tthursdayRur: ' + boxoffice['thursday_rur']) save_film(film) save_thursday_boxoffice(boxoffice) for row in rows: cells = row.select('td') thursday = {} for index, cell in enumerate(cells): print(index, cell) if index == 0: thursday['title'] = cell.text print('\ttitle: ', thursday['title']) thursday['page'] = cell.select_one('a')['href'] print('\tpage: ', thursday['page']) parts = cell.select_one('a')['href'].split('/') thursday['thursday'] = parse(parts[-2], dayfirst=True) print('\tthursday: ', thursday['thursday']) if index == 1: thursday['total_rur'] = num(cell.text) print('\ttotalRur: ' + thursday['total_rur']) save_thursday(thursday) parse_thursday(thursday)
result = [] if bfastIndex != -1: result = filters[bfastIndex].run(values, None, None, minimalSegmentSize, frequency, startDate, endDate) else: raise IndexError("Bfast filter could not be found.") # Acrescenta ao resultado as datas, os valores da tendência e os valores originais (agrupados ou não) datesList = [[i] for i in dates] utils.joinArray(datesList, result) result = datesList utils.joinArray(result, values) return {'series': series, 'values': result} result = [] # for i in xrange(len(argv)): # print("argv["+str(i)+"]: " + argv[i]) if argv[1] == 'TS': result = time_series(argv[2], argv[3], argv[4]) elif argv[1] == 'BFAST': # layerId, startYear, endYear, interpolation, groupData, timeChange, timeChangeUnits, geoJsonGeometry result = trend(argv[2], utils.num(argv[3]), utils.num(argv[4]), argv[5], argv[6], utils.num(argv[7]), argv[8], argv[9]) print(result)
def getGoodValues(self): good_values = [] for value in self.quality_layer_good_values.split(','): good_values.append(utils.num(value)) return good_values
def getGoodValues(self) : good_values = [] for value in self.quality_layer_good_values.split(','): good_values.append( utils.num(value) ) return good_values
def searchPhone(self, message_number): print 'processing message num: ' + str(message_number) try: response, message_data = self.imap.fetch(message_number, '(BODY.PEEK[HEADER])') except: print "Exception in HEADER" return False raw_message = message_data[0][1] # message_data, the data structure returned by imaplib, encodes some data re: the request type header = HeaderParser().parsestr(raw_message) if header['Content-Type'] is not None and 'multipart' in header['Content-Type']: print "INcorrect content type" return False # right now we're just skipping any multipart messages. this needs to be rewritten to parse the text parts of said messgs. try: response, message_data = self.imap.fetch(message_number, '(BODY.PEEK[TEXT])') except: print "Exception in TEXT" return False text_payload = message_data[0][1] found_digits = number_re.findall(text_payload) if found_digits != []: print "Message %d has numbers." % num(message_number) print found_digits ### need to cast the Date header into a MySQL object. ts = header['Date'] print 'header date: ' + str(ts) if rfc822.parsedate_tz(ts) is not None: #making sure the date header is not empty ts_tuple = rfc822.parsedate_tz(ts) #perhaps in the future we can intead set the ts_tuple to (0,0,0,0,0,0,0) and interpret it in the UI as 'no date header'. assuming that is actually the problem. #otherwise, we're setting it to the date of the most recently received email... and this could get awkward. #TODO: fix this once the UI is ready. ts_python_datetime = datetime.datetime(*(ts_tuple[0:6])) ts_mysql_datetime = ts_python_datetime.isoformat(' ') print 'about to insert into the database' ### sometimes it fails due to unicode issues print 'about to parse name and email from header' print 'header: ' + str(header['From']) try: name, email = email_re.match(header['From']).groups()[1:3] except: print "Unexpected error:", sys.exc_info()[0] return False print 'parsing name and email from FROM header: ' + str(name) + ', ' + str(email) try: m = Message( sender=header['From'][:255], recipient=header['To'][:255], sender_name=str(name)[:255], sender_email=email[:255], subject=header['Subject'][:255], date=ts_mysql_datetime, payload=str(text_payload[:65534]) ) m.save() except: return False pure_digits = uniqify(map(''.join, found_digits)) # the phone number regexp will create lists like ['','650','555','1212']. this collapses the list into a string. print 'We found pure digits: ' + str(pure_digits) for phone_number in pure_digits: if len(str(phone_number)) > 7: # for now, we want numbers with area codes only. print phone_number PhoneNumber(value=phone_number, message=m).save()
boxoffice['total_rur'] = num(c.text) if idx == 12: print('\tspectaculars: ' + c.text) boxoffice['spectaculars'] = num(c.text) save_film(film) save_weekend_boxoffice(boxoffice) doc, err = get_page(urls['weekends']) rows = doc.select('table.calendar_year tbody tr') for row in rows: cells = row.select('td') weekend = {} for index, cell in enumerate(cells): print(index, cell) if index == 0: print('\ttitle: ', cell.text) weekend['title'] = cell.text print('\tpage: ', cell.select_one('a')['href']) weekend['page'] = cell.select_one('a')['href'] parts = cell.select_one('a')['href'].split('/') weekend['weekend'] = parse(parts[-2], dayfirst=True) if index == 1: print('\ttotalRur: ' + cell.text) weekend['total_rur'] = num(cell.text) if index == 3: print('\tfilms: ' + cell.text) weekend['films'] = num(cell.text) save_weekend(weekend) parse_weekend(weekend)