Exemplos de ghelper em Python, exemplos de graphhelper.ghelper em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: generatedegreeweightdict.py Projeto: syedshabihhasan/CyberBullying

def main():
    parser = argparse.ArgumentParser('Script to generate distribution '
                                     'of edge weights/degrees for all '
                                     'participants')
    parser.add_argument('-m', '-M', type=str, required=True,
                        help='location of the message file')
    parser.add_argument('-mt', '-MT', type=str, default='all',
                        help='types of messages to plot, currently supports '
                             'one of the following: sms, fb, twitter, or all')
    parser.add_argument('-r', '-R', type=str, required=True,
                        help='survey file')
    parser.add_argument('-s', '-S', type=str, required=True,
                        help='folder to store data in, leading / required')
    parser.add_argument('-p', '-P', action='store_true',
                        help='flag to generate plots')

    args = parser.parse_args()
    survey_file = args.r
    message_file = args.m
    m_type = args.mt
    folder_to_store = args.s
    generate_plots = args.p

    wi = weeklyinfo()
    week_info = wi.getweeklyfo(survey_file)

    ff = filterfields(message_file)
    filtered_data = []
    if m_type == 'all':
        for message_type in ['sms', 'fb_message']:
            filtered_data.extend(ff.filterbyequality(pr.m_type, message_type))
    else:
        filtered_data = ff.filterbyequality(pr.m_type, m_type)
    _, links_tuple, _, pid_dict = hlp.creategraph(filtered_data, filterType=args.mt)
    gh = ghelper()
    plt = plots()
    weekly_deg_dist, _ = gh.getweeklydistributions(pid_dict, filtered_data, message_type=args.mt,
                                                   is_degree=True, week_info=week_info)
    hlp.dumpvariable(weekly_deg_dist, 'weekly_deg_dist.dict', folder_to_store)
    weekly_ew_dist, _ = gh.getweeklydistributions(pid_dict, filtered_data, message_type=args.mt,
                                                  is_degree=False, week_info=week_info)
    hlp.dumpvariable(weekly_ew_dist, 'weekly_ew_dist.dict', folder_to_store)
    if generate_plots:
        plt.plotweeklyprogression(weekly_deg_dist, folder_to_store + 'deg_', 'No. of friends',
                                  'Week No.', 'Friends')
        plt.plotweeklyprogression(weekly_ew_dist, folder_to_store + 'ew_', 'No. of messages exhanged',
                                  'Week No.', 'Messages')

    print 'done...'

Exemplo n.º 2

0

Exibir arquivo

Arquivo: getsentimentvader.py Projeto: syedshabihhasan/CyberBullying

def main():
    parser = argparse.ArgumentParser('Script to perform sentiment analysis using VADER')

    parser.add_argument('-m', '-M', type=str, required=True,
                        help='Location of the message file')
    parser.add_argument('-mt', '-MT', type=str, required=True, nargs='+',
                        help='types of messages to filter')
    parser.add_argument('-f', '-F', type=str, required=True,
                        help='filename where data is stored, no extension needed')
    parser.add_argument('-s', '-S', type=str, required=True,
                        help='location of folder to store the file, ends with a /')
    parser.add_argument('-p', '-P', action='store_true',
                        help='flag to store polarities separately')
    parser.add_argument('-w', '-W', type=str, required=False,
                        help='conduct weekly analysis, path to the survey data for '
                             'creating week information')
    parser.add_argument('-l', '-L', type=str, nargs='+', required=True,
                        help='the filters to use, make one or more choices: seenB, wasB, didB')
    parser.add_argument('-lf', '-LF', type=str, nargs='+', required=True,
                        help='location of filtered data, from runSurveyStats.py, in same order as -l/L flag')

    args = parser.parse_args()
    message_file = args.m
    message_types = args.mt
    filename_to_store = args.f
    location_to_store = args.s
    separate_polarity_score = args.p
    survey_file = args.w
    filters_chosen = args.l
    filter_files = args.lf

    catch_all_data = hlp.getfilterdata(filters_chosen, filter_files, catch_all=True)

    if separate_polarity_score and survey_file is not None:
        print 'Cannot have separate polarity scores and weekly analysis together, ' \
              'please remove the -p/-P flag'
        return

    if survey_file is not None:
        wi = weeklyinfo()
        week_dates = wi.getweeklyfo(survey_file)
        gh = ghelper()
    ff = filterfields(message_file)
    data = []
    for message_type in message_types:
        data.extend(ff.filterbyequality(pr.m_type, message_type))
    pid_dict = hlp.getuniqueparticipants(data, 'all' if len(message_types) > 1 else message_types[0])
    sentiment_analyzer = vadersenti(data[1:])
    returned_data = sentiment_analyzer.compilesentiment(pr.m_content, separate_sentiment_list=separate_polarity_score)
    if separate_polarity_score:
        hlp.dumpvariable(returned_data, filename_to_store + '.data', location_to_store)
    else:
        header = pr.message_header + ['pos', 'neg', 'neu', 'compound']
        final_data = [header] + returned_data
        hlp.writecsv(final_data, location_to_store + filename_to_store + '.csv')
        weekly_data = gh.filterweeklydata(pid_dict, returned_data, week_dates,
                                          'all' if len(message_types) > 1 else message_types[0])
        hlp.dumpvariable(weekly_data, 'weekly_data.dict', location_to_store)
        summarized_sentiment = {}
        for pid in weekly_data:
            summarized_sentiment[pid] = {}
            participant_data = weekly_data[pid]
            for week_no in participant_data:
                summarized_sentiment[pid][week_no] = sentiment_analyzer.summarizesentiment(participant_data[week_no],
                                                                                           separate_in_out=True,
                                                                                           message_type=message_type)
        hlp.dumpvariable(summarized_sentiment, 'weekly_summarized_sentiment.dict', location_to_store)
        plt = plots()
        overlay_data = gh.createbullyingoverlay(catch_all_data, week_dates, ff)
        plt.plotweeklyprogression(summarized_sentiment, location_to_store, 'Sentiment Progress', 'Week',
                                  'Sentiment Value', sentiment_legend=['Positive', 'Negative', 'Neutral'],
                                  overlay_data=overlay_data)

    print 'done'

Exemplo n.º 3

0

Exibir arquivo

Arquivo: create_training_testing_sets.py Projeto: syedshabihhasan/CyberBullying

def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-m', '-M', type=str, required=True,
                        help='Message list file')
    parser.add_argument('-r', '-R', type=str, required=True,
                        help='survey file')
    parser.add_argument('-p', '-P', type=str, required=True,
                        help='PID dict inverted')
    parser.add_argument('-b', '-B', type=str, required=True,
                        help='bullying dictionary')
    parser.add_argument('-o', '-O', type=str, required=True,
                        help='Output folder')
    parser.add_argument('-l', '-L', type=str, nargs='+',
                        help='Filters chosen')
    parser.add_argument('-f', '-f', type=str, nargs='+',
                        help='Filter files')

    args = parser.parse_args()

    output_folder = args.o

    message_data = hlp.recovervariable(args.m)
    pid_dict = hlp.recovervariable(args.p)

    filters_chosen = args.l
    filter_files = args.f
    catch_all_data = hlp.getfilterdata(filters_chosen, filter_files, catch_all=True)

    wi = weeklyinfo()
    weekly_info = wi.getweeklyfo(args.r)

    ff = filterfields()

    gh = ghelper()
    bullying_overlay = gh.createbullyingoverlay(catch_all_data, weekly_info, ff)
    bullying_overlay = flip_bullying_overlay(bullying_overlay, weekly_info.keys())

    pid_list = pid_dict.keys()
    pid_list.sort()
    for pid in pid_list:
        training_set_final = []
        testing_set_final = []
        pid_list_training = deepcopy(pid_list)
        pid_list_training.remove(pid)
        ff.setdata(message_data)
        testing_raw_data = ff.filterbyequality(pr.m_source, pid_dict[pid]) + \
                           ff.filterbyequality(pr.m_target, pid_dict[pid])
        ff.removebyequality(pr.m_source, pid_dict[pid])
        ff.removebyequality(pr.m_target, pid_dict[pid])
        training_raw_data = ff.getdata()
        fe = raw_features(data=None)
        _, _ = fe.get_scoring_factors(training_raw_data)

        training_weekly_data = {}

        for training_pid in pid_list_training:
            training_weekly_data[training_pid] = {}
            data_to_use = ff.filterbyequality(pr.m_source, pid_dict[training_pid]) + \
                          ff.filterbyequality(pr.m_target, pid_dict[training_pid])
            if 0 == len(data_to_use):
                print 'no data found, probably filtered into the testing set, Training PID: '+\
                      training_pid+', Testing PID: '+pid
                continue
            pid_weekly_w_bullying, global_in_degree, global_out_degree, global_in_ew, global_out_ew, incoming_ss, \
            outgoing_ss = get_pid_level_features(data_to_use, weekly_info, ff,
                                                 bullying_overlay, pid_dict,
                                                 training_pid, fe)
            for week_no in pid_weekly_w_bullying:
                fr_in_degree, fr_out_degree, fr_in_ew, \
                fr_out_ew, fr_in_senti, fr_out_senti, \
                current_in_ss, current_out_ss = get_week_features(pid_weekly_w_bullying, week_no, fe,
                                                                  global_in_degree, global_out_degree,
                                                                  global_in_ew, global_out_ew,
                                                                  incoming_ss, outgoing_ss,
                                                                  pid_dict[training_pid])
                training_set_final.append(
                        [training_pid, week_no,
                         fr_in_senti[0], fr_in_senti[1], fr_in_senti[2],
                         fr_out_senti[0], fr_out_senti[1], fr_out_senti[2],
                         fr_in_degree, fr_out_degree,
                         fr_in_ew, fr_out_ew,
                         current_in_ss, current_out_ss,
                         pid_weekly_w_bullying[week_no]['label']])

        # testing pid
        pid_weekly_w_bullying, global_in_degree, global_out_degree, \
        global_in_ew, global_out_ew, incoming_ss, outgoing_ss = get_pid_level_features(testing_raw_data, weekly_info,
                                                                                       ff, bullying_overlay, pid_dict,
                                                                                       pid, fe)
        for week_no in pid_weekly_w_bullying:
            fr_in_degree, fr_out_degree, fr_in_ew, \
            fr_out_ew, fr_in_senti, fr_out_senti, \
            current_in_ss, current_out_ss = get_week_features(pid_weekly_w_bullying, week_no, fe,
                                                              global_in_degree, global_out_degree,
                                                              global_in_ew, global_out_ew,
                                                              incoming_ss, outgoing_ss,
                                                              pid_dict[pid])
            testing_set_final.append(
                    [pid, week_no,
                     fr_in_senti[0], fr_in_senti[1], fr_in_senti[2],
                     fr_out_senti[0], fr_out_senti[1], fr_out_senti[2],
                     fr_in_degree, fr_out_degree,
                     fr_in_ew, fr_out_ew,
                     current_in_ss, current_out_ss,
                     pid_weekly_w_bullying[week_no]['label']])
        header = ['pid', 'wkno',
                  'frWInSenPos', 'frWInSenNeu', 'frWInSenNeg',
                  'frWOutSenPos', 'frWOutSenNeu', 'frWOutSenNeg',
                  'frInDegO', 'frOutDegO',
                  'frInEdgeO', 'frOutEdgeO',
                  'inSenSc', 'outSenSc',
                  'label']
        training_set_final = [header] + training_set_final
        testing_set_final = [header] + testing_set_final

        hlp.writecsv(training_set_final, output_folder+pid+'_tr.csv')
        hlp.writecsv(testing_set_final, output_folder+pid+'_ts.csv')

Exemplo n.º 4

0

Exibir arquivo

Arquivo: degreeweightanalysis.py Projeto: syedshabihhasan/CyberBullying

def main():
    parser = argparse.ArgumentParser('Script to generate a CDF comparing the degrees of our participants')

    parser.add_argument('-l', '-L', type=str, nargs='+', required=True,
                        help='the filters to use, make one or more choices: seenB, wasB, didB')
    parser.add_argument('-f', '-F', type=str, nargs='+', required=True,
                        help='location of filtered data, from runSurveyStats.py, in the same order as -l/L flag')
    parser.add_argument('-m', '-M', type=str, required=True,
                        help='location of the message file')
    parser.add_argument('-mt', '-MT', type=str, default='sms',
                        help='type of message we are filtering, default: sms')
    parser.add_argument('-n', '-N', action='store_true',
                        help='flag indicates that processing should include participants which did not witness '
                             'anything mentioned in the values passed for flags -l/L')
    parser.add_argument('-a', '-A', action='store_true',
                        help='flag indicates that processing should include a plot of all participants')
    parser.add_argument('-s', '-S', type=str, required=True,
                        help='folder to store in, leading /')
    parser.add_argument('-r', '-R', type=str, required=True,
                        help='survey file')

    args = parser.parse_args()
    filters_chosen = args.l
    for filter_v in filters_chosen:
        if filter_v not in ['seenB', 'didB', 'wasB']:
            raise Exception('filter value was not from the ones specified')
    filter_files = args.f
    assert len(filter_files) == len(filters_chosen), e.len_filter_file_ne_len_filters_chosen
    include_other_participants = args.n
    include_all_participants = args.a
    location_to_store = args.s
    if not os.path.exists(location_to_store):
        os.mkdir(location_to_store)
    message_file = args.m
    message_type = args.mt
    survey_file = args.r

    wi = weeklyinfo()
    week_info = wi.getweeklyfo(survey_file)
    gh = ghelper()
    plt = plots()


    # get the filtered messages
    ff = filterfields(message_file)
    filtered_data = []
    if message_type == 'all':
        for message_type in ['sms', 'fb', 'twitter']:
            filtered_data.extend(ff.filterbyequality(pr.m_type, message_type))
    else:
        filtered_data = ff.filterbyequality(pr.m_type, message_type)

    # generate the links and the graph for the filtered data
    links, links_tuple, graph_obj, pid_dict = hlp.creategraph(filtered_data, filterType=message_type)

    # get the pids from the chosen filters
    bullying_pid_dict = hlp.getfilterdata(filters_chosen, filter_files)
    cumulative_bully_pid = hlp.getfilterdata(filters_chosen, filter_files, cumulative_list=True)

    # get all the information from the filters
    catch_all_data = hlp.getfilterdata(filters_chosen, filter_files, catch_all=True)

    # generate the distributions for in degree and plot them
    in_distributions = gh.generatedistributions(graph_obj, bullying_pid_dict, include_all_participants,
                                                include_other_participants, pid_dict, message_type,
                                                cumulative_bully_pid, in_dist=True)
    in_distributions_ew = gh.generatedistributions(graph_obj, bullying_pid_dict, include_all_participants,
                                                include_other_participants, pid_dict, message_type,
                                                cumulative_bully_pid, in_dist=True, is_degree=False)
    plt.generatetablehist(in_distributions, location_to_store + 'in_degree_table.csv', generate_totals=True)
    plt.generatetablehist(in_distributions_ew, location_to_store + 'in_edge_weight.csv', generate_totals=True)

    # generate the distributions for out degree and plot them
    out_distributions = gh.generatedistributions(graph_obj, bullying_pid_dict, include_all_participants,
                                                 include_other_participants, pid_dict, message_type,
                                                 cumulative_bully_pid, in_dist=False)
    out_distributions_ew = gh.generatedistributions(graph_obj, bullying_pid_dict, include_all_participants,
                                                 include_other_participants, pid_dict, message_type,
                                                 cumulative_bully_pid, in_dist=False)
    plt.generatetablehist(out_distributions, location_to_store + 'out_degree_table.csv', generate_totals=True)
    plt.generatetablehist(out_distributions_ew, location_to_store + 'out_edge_weight.csv', generate_totals=True)


    # line plot of degrees
    weekly_dist_degrees, _ = gh.getweeklydistributions(pid_dict, filtered_data,
                                                    message_type=message_type,
                                                    is_degree=True, week_info=week_info)
    overlay_info = gh.createbullyingoverlay(catch_all_data, week_info, ff)
    plt.plotweeklyprogression(weekly_dist_degrees, location_to_store +'deg_', 'No of friends',
                              'Week No', 'Friends', overlay_data=overlay_info)
    # line plot of weights
    weekly_dist_ew, _ = gh.getweeklydistributions(pid_dict, filtered_data,
                                                    message_type=message_type,
                                                    is_degree=False, week_info=week_info)
    overlay_info = gh.createbullyingoverlay(catch_all_data, week_info, ff)
    plt.plotweeklyprogression(weekly_dist_ew, location_to_store +'ew_', 'No. of messages exchanged',
                              'Week No', 'Messages', overlay_data=overlay_info)
    print 'TADAAA!'