def getstats(filepath, participant_dict, p_type, message_type='sms'):
    ff = filterfields(filepath)
    ff.setdata(ff.filterbyequality(pr.m_type, message_type))
    participant_stats = {}
    for participant_id in participant_dict:
        survey_no_list = participant_dict[participant_id]
        p_data = ff.filterbyequality(pr.m_source, participant_id) + \
                 ff.filterbyequality(pr.m_target, participant_id)
        if [] == p_data:
            print 'no data exists for pid: ' + participant_id
            continue
        pid_dict = hlp.getuniqueparticipants(p_data)
        for survey_no in survey_no_list:
            print 'Participant no.', participant_id, ' S.no.: ', survey_no
            idx = survey_no
            survey_no = survey_no_list[survey_no][0]
            end_date = ff.converttodate(survey_no[sr.s_time])
            start_date = end_date - dt.timedelta(days=7)
            data_between_dates = ff.filterbetweendates(start_date, end_date, data_to_work=p_data)
            original_start_date = ff.converttodate(pr.start_datetime)
            data_start_to_date = ff.filterbetweendates(original_start_date, start_date, data_to_work=p_data)
            between_stats, before_stats = graphstats(data_start_to_date, data_between_dates, participant_id, p_type,
                                                     original_start_date, start_date, ff, pid_dict)
            temp_dict = {'between': between_stats, 'before': before_stats, 'pid_dict': pid_dict}
            participant_stats[participant_id] = {idx: temp_dict}
    return participant_stats
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-f', '--messageFile', type=str, required=True)
    parser.add_argument('-mt', '--messageTypes', type=str, nargs='+')
    parser.add_argument('-o', '--outputFolder', type=str, required=True)
    parser.add_argument('-of', '--outputFile', type=str, required=True)
    parser.add_argument('-pd', '--participantDictionary', type=str)
    parser.add_argument('-i', '--ignoreParticipants', type=str)
    parser.add_argument('-mc', '--messageTypeConvert', type=str, nargs='*')

    args = parser.parse_args()

    message_file = args.messageFile
    message_types = args.messageTypes
    output_folder = args.outputFolder
    output_file = args.outputFile
    pid_dict = args.participantDictionary
    ignore_pids = args.ignoreParticipants
    message_type_conversions = args.messageTypeConvert

    ff = filterfields(message_file)
    ff.setdata(ff.getdata()[1:])

    to_set_data = []

    # extract the relevant data
    for message_type in message_types:
        to_set_data.extend(ff.filterbyequality(pr.m_type, message_type))

    ff.setdata(to_set_data)

    if ignore_pids is not None:
        ignore_pids = hlp.recovervariable(ignore_pids)
        for pid in ignore_pids:
            ff.removebyequality(pr.m_source, pid)
            ff.removebyequality(pr.m_target, pid)


    # set the pid to normal id dictionary
    if pid_dict is None:
        pid_dict = hlp.getuniqueparticipants(ff.getdata(), mtype='all', separate_pid_npid=True)

    # replace the message type names with the ones provided
    if message_type_conversions is not None:
        for idx in range(0, len(message_type_conversions), 2):
            message_to_convert = message_type_conversions[idx]
            to_convert_to = message_type_conversions[idx+1]
            ff.replacebyequality(pr.m_type, message_to_convert, to_convert_to)

    message_types = ff.getuniqueelements(pr.m_type)
    coded_participant_list = pid_dict[pr.participant['all']].values()
    storage_dict = initiatestorage(coded_participant_list, message_types)
    storage_dict = getperparticipantinout(ff.getdata(), storage_dict, pid_dict)
    plotperparticipantbar(storage_dict, 'Participant ID', '# of Messages', message_types, 'Per Participant Messages',
                          output_folder+output_file)
    hlp.dumpvariable(pid_dict, 'pid_dict.dict', output_folder)
    hlp.dumpvariable(ff.getdata(), 'messageData.list', output_folder)
def main():
    parser = argparse.ArgumentParser('Script to perform sentiment analysis using VADER')

    parser.add_argument('-m', '-M', type=str, required=True,
                        help='Location of the message file')
    parser.add_argument('-mt', '-MT', type=str, required=True, nargs='+',
                        help='types of messages to filter')
    parser.add_argument('-f', '-F', type=str, required=True,
                        help='filename where data is stored, no extension needed')
    parser.add_argument('-s', '-S', type=str, required=True,
                        help='location of folder to store the file, ends with a /')
    parser.add_argument('-p', '-P', action='store_true',
                        help='flag to store polarities separately')
    parser.add_argument('-w', '-W', type=str, required=False,
                        help='conduct weekly analysis, path to the survey data for '
                             'creating week information')
    parser.add_argument('-l', '-L', type=str, nargs='+', required=True,
                        help='the filters to use, make one or more choices: seenB, wasB, didB')
    parser.add_argument('-lf', '-LF', type=str, nargs='+', required=True,
                        help='location of filtered data, from runSurveyStats.py, in same order as -l/L flag')

    args = parser.parse_args()
    message_file = args.m
    message_types = args.mt
    filename_to_store = args.f
    location_to_store = args.s
    separate_polarity_score = args.p
    survey_file = args.w
    filters_chosen = args.l
    filter_files = args.lf

    catch_all_data = hlp.getfilterdata(filters_chosen, filter_files, catch_all=True)

    if separate_polarity_score and survey_file is not None:
        print 'Cannot have separate polarity scores and weekly analysis together, ' \
              'please remove the -p/-P flag'
        return

    if survey_file is not None:
        wi = weeklyinfo()
        week_dates = wi.getweeklyfo(survey_file)
        gh = ghelper()
    ff = filterfields(message_file)
    data = []
    for message_type in message_types:
        data.extend(ff.filterbyequality(pr.m_type, message_type))
    pid_dict = hlp.getuniqueparticipants(data, 'all' if len(message_types) > 1 else message_types[0])
    sentiment_analyzer = vadersenti(data[1:])
    returned_data = sentiment_analyzer.compilesentiment(pr.m_content, separate_sentiment_list=separate_polarity_score)
    if separate_polarity_score:
        hlp.dumpvariable(returned_data, filename_to_store + '.data', location_to_store)
    else:
        header = pr.message_header + ['pos', 'neg', 'neu', 'compound']
        final_data = [header] + returned_data
        hlp.writecsv(final_data, location_to_store + filename_to_store + '.csv')
        weekly_data = gh.filterweeklydata(pid_dict, returned_data, week_dates,
                                          'all' if len(message_types) > 1 else message_types[0])
        hlp.dumpvariable(weekly_data, 'weekly_data.dict', location_to_store)
        summarized_sentiment = {}
        for pid in weekly_data:
            summarized_sentiment[pid] = {}
            participant_data = weekly_data[pid]
            for week_no in participant_data:
                summarized_sentiment[pid][week_no] = sentiment_analyzer.summarizesentiment(participant_data[week_no],
                                                                                           separate_in_out=True,
                                                                                           message_type=message_type)
        hlp.dumpvariable(summarized_sentiment, 'weekly_summarized_sentiment.dict', location_to_store)
        plt = plots()
        overlay_data = gh.createbullyingoverlay(catch_all_data, week_dates, ff)
        plt.plotweeklyprogression(summarized_sentiment, location_to_store, 'Sentiment Progress', 'Week',
                                  'Sentiment Value', sentiment_legend=['Positive', 'Negative', 'Neutral'],
                                  overlay_data=overlay_data)

    print 'done'