def get_all_mails_for_all_days(mails):
    mails = sort_mails(mails)
    for i in range(len(mails)):
        if utils.dateParser(mails[i].headers['Date']).year < 1998:
            pass
        else:
            mails = mails[i:]
            break
    first_date = utils.dateParser(mails[0].headers['Date'])
    last_date = utils.dateParser(mails[-1].headers['Date'])
    ret_list = []
    index = 0
    
    for d in range((last_date - first_date).days):
        current_date = first_date + datetime.timedelta(d)
        new_index, mm = get_mail_by_day(current_date, mails[index:])
        index += new_index
        #print index
        ret_list.append((current_date, mm))
    return ret_list
def group_mails_by_week(mails):
    mails = sorted(mails, key = lambda x: utils.dateParser(x.headers['Date']).toordinal())
    start_date = utils.dateParser(mails[0].headers['Date']).isocalendar()
    
    ret_list = []
    current_year = start_date[0]
    current_week = start_date[1]
    new_mails = []
    for i in range(len(mails)):
        iso_date = utils.dateParser(mails[i].headers['Date']).isocalendar()
        if iso_date[0] == current_year and iso_date[1] == current_week:
            new_mails.append(mails[i])
        else:
            ret_list.append((current_year, current_week, new_mails))
            new_mails = []
            if current_week == 53:
                current_week = 1
                current_year += 1
            else:
                current_week += 1
            i -= 1
    return ret_list
def get_mail_by_day(date, mails):
    ret_list = []
    found_one = False
    year, month, day = date.year, date.month, date.day
    for i in range(len(mails)):
        mail_date = utils.dateParser(mails[i].headers['Date'])
        if mail_date.year == year and mail_date.month == month and mail_date.day == day:
            ret_list.append(mails[i])
            found_one = True
        else:
            if found_one == True:
                return (i ,ret_list)
    if found_one:
        return (i, ret_list)
    return (0, ret_list)
示例#4
0
def getDataframe(ticker,
                 dateRange=False,
                 dataFields=False,
                 sorting=0,
                 printError=False):
    '''
    Returns the dataframe corresponding to the passed ticker.
    Returns False if dataframe is not present or values requested are not valid.

    Daterange must be a 2 element list, in the following format: [[<start date>], [<end date>]], date format = string "YYYY-MM-DD".
    dateFields must be a list of strings corresponding to the columns of the dataframe you want returned.
    By default the dataframe is descending i.e. df.iloc[0] = 2018-01-01 df.iloc[1] = 2018-01-02 
    '''
    global DatabaseDictionary

    try:
        DatabaseLock.acquire()
        dataframe = DatabaseDictionary[ticker]
        DatabaseLock.release()

        if (dateRange):
            if type(dateRange[0]) is str:
                dateRange = [
                    dateParser(dateRange[0]),
                    dateParser(dateRange[1])
                ]
            if (dataFields):
                mask = (dataframe.index >= dateRange[0]) & (dataframe.index <=
                                                            dateRange[1])
                dataframeReturn = dataframe.loc[mask, dataFields]
            else:
                mask = (dataframe.index >= dateRange[0]) & (dataframe.index <=
                                                            dateRange[1])
                dataframeReturn = dataframe.loc[mask]
        elif (dataFields):
            dataframeReturn = dataframe[dataFields]
        else:
            dataframeReturn = dataframe

        if (len(dataframeReturn) == 0):
            #Data is not available
            return False
        else:
            return dataframeReturn
    except:
        #Dataframe has not yet been loaded into memory. Get from file
        DatabaseLock.release()
        try:
            if (dateRange):
                if type(dateRange[0]) is str:
                    dateRange = [
                        dateParser(dateRange[0]),
                        dateParser(dateRange[1])
                    ]
                if (dataFields):
                    dataframe = loadDataframeFromFile(ticker + ".csv")[1]
                    mask = (dataframe.index >=
                            dateRange[0]) & (dataframe.index <= dateRange[1])
                    dataframe = dataframe.loc[mask, dataFields]
                else:
                    dataframe = loadDataframeFromFile(ticker + ".csv")[1]
                    mask = (dataframe.index >=
                            dateRange[0]) & (dataframe.index <= dateRange[1])
                    dataframe = dataframe.loc[mask]
            elif (dataFields):
                dataframe = loadDataframeFromFile(ticker +
                                                  ".csv")[1][dataFields]
            else:
                dataframe = loadDataframeFromFile(ticker + ".csv")[1]

            if (len(dataframe) == 0):
                #Data is not available
                return False
            else:
                #dataframe.sort_values('date',ascending=sorting)
                return dataframe
        except Exception as e:
            if printError:
                print(e)
                print(str(e))
            return False
def sort_mails(mails):
    return sorted(mails, key = lambda x: utils.dateParser(x.headers['Date']).toordinal())
def get_mails_by_day(mails, day, WEEK_DAY_dict = WEEK_DAYS):
    ret_mails = []
    for mail in mails:
        if WEEK_DAY_dict[utils.dateParser(mail.headers['Date']).isoweekday()] == day:
            ret_mails.append(mail)
    return ret_mails
def filter_by_hour(mails, hour): # day is a value of WEEK_DAYS
    ret_mail = []
    for mail in mails:
        if utils.dateParser(mail.headers['Date']).hour == hour:
            ret_mail.append(mail)
    return ret_mail
def count_mails_in_day(mails, day, WEEK_DAY_dict = WEEK_DAYS): # day is a value of WEEK_DAYS
    count = 0
    for mail in mails:
        if WEEK_DAY_dict[utils.dateParser(mail.headers['Date']).isoweekday()] == day:
            count += 1
    return count