def get_all_mails_for_all_days(mails): mails = sort_mails(mails) for i in range(len(mails)): if utils.dateParser(mails[i].headers['Date']).year < 1998: pass else: mails = mails[i:] break first_date = utils.dateParser(mails[0].headers['Date']) last_date = utils.dateParser(mails[-1].headers['Date']) ret_list = [] index = 0 for d in range((last_date - first_date).days): current_date = first_date + datetime.timedelta(d) new_index, mm = get_mail_by_day(current_date, mails[index:]) index += new_index #print index ret_list.append((current_date, mm)) return ret_list
def group_mails_by_week(mails): mails = sorted(mails, key = lambda x: utils.dateParser(x.headers['Date']).toordinal()) start_date = utils.dateParser(mails[0].headers['Date']).isocalendar() ret_list = [] current_year = start_date[0] current_week = start_date[1] new_mails = [] for i in range(len(mails)): iso_date = utils.dateParser(mails[i].headers['Date']).isocalendar() if iso_date[0] == current_year and iso_date[1] == current_week: new_mails.append(mails[i]) else: ret_list.append((current_year, current_week, new_mails)) new_mails = [] if current_week == 53: current_week = 1 current_year += 1 else: current_week += 1 i -= 1 return ret_list
def get_mail_by_day(date, mails): ret_list = [] found_one = False year, month, day = date.year, date.month, date.day for i in range(len(mails)): mail_date = utils.dateParser(mails[i].headers['Date']) if mail_date.year == year and mail_date.month == month and mail_date.day == day: ret_list.append(mails[i]) found_one = True else: if found_one == True: return (i ,ret_list) if found_one: return (i, ret_list) return (0, ret_list)
def getDataframe(ticker, dateRange=False, dataFields=False, sorting=0, printError=False): ''' Returns the dataframe corresponding to the passed ticker. Returns False if dataframe is not present or values requested are not valid. Daterange must be a 2 element list, in the following format: [[<start date>], [<end date>]], date format = string "YYYY-MM-DD". dateFields must be a list of strings corresponding to the columns of the dataframe you want returned. By default the dataframe is descending i.e. df.iloc[0] = 2018-01-01 df.iloc[1] = 2018-01-02 ''' global DatabaseDictionary try: DatabaseLock.acquire() dataframe = DatabaseDictionary[ticker] DatabaseLock.release() if (dateRange): if type(dateRange[0]) is str: dateRange = [ dateParser(dateRange[0]), dateParser(dateRange[1]) ] if (dataFields): mask = (dataframe.index >= dateRange[0]) & (dataframe.index <= dateRange[1]) dataframeReturn = dataframe.loc[mask, dataFields] else: mask = (dataframe.index >= dateRange[0]) & (dataframe.index <= dateRange[1]) dataframeReturn = dataframe.loc[mask] elif (dataFields): dataframeReturn = dataframe[dataFields] else: dataframeReturn = dataframe if (len(dataframeReturn) == 0): #Data is not available return False else: return dataframeReturn except: #Dataframe has not yet been loaded into memory. Get from file DatabaseLock.release() try: if (dateRange): if type(dateRange[0]) is str: dateRange = [ dateParser(dateRange[0]), dateParser(dateRange[1]) ] if (dataFields): dataframe = loadDataframeFromFile(ticker + ".csv")[1] mask = (dataframe.index >= dateRange[0]) & (dataframe.index <= dateRange[1]) dataframe = dataframe.loc[mask, dataFields] else: dataframe = loadDataframeFromFile(ticker + ".csv")[1] mask = (dataframe.index >= dateRange[0]) & (dataframe.index <= dateRange[1]) dataframe = dataframe.loc[mask] elif (dataFields): dataframe = loadDataframeFromFile(ticker + ".csv")[1][dataFields] else: dataframe = loadDataframeFromFile(ticker + ".csv")[1] if (len(dataframe) == 0): #Data is not available return False else: #dataframe.sort_values('date',ascending=sorting) return dataframe except Exception as e: if printError: print(e) print(str(e)) return False
def sort_mails(mails): return sorted(mails, key = lambda x: utils.dateParser(x.headers['Date']).toordinal())
def get_mails_by_day(mails, day, WEEK_DAY_dict = WEEK_DAYS): ret_mails = [] for mail in mails: if WEEK_DAY_dict[utils.dateParser(mail.headers['Date']).isoweekday()] == day: ret_mails.append(mail) return ret_mails
def filter_by_hour(mails, hour): # day is a value of WEEK_DAYS ret_mail = [] for mail in mails: if utils.dateParser(mail.headers['Date']).hour == hour: ret_mail.append(mail) return ret_mail
def count_mails_in_day(mails, day, WEEK_DAY_dict = WEEK_DAYS): # day is a value of WEEK_DAYS count = 0 for mail in mails: if WEEK_DAY_dict[utils.dateParser(mail.headers['Date']).isoweekday()] == day: count += 1 return count