示例#1
0
def main():
    # Split the data in various ways
    qa_df = pd.read_csv("../output/QandA.csv")
    df_description = qa_df.describe()
    group_type = qa_df.groupby('responsetype')
    group_type_description = group_type.describe()
    answers = qa_df[qa_df.responsetype == 'answer']
    questions = qa_df[qa_df.responsetype == 'question']

    # Make a table for putting in the github markdown
    writer = MarkdownTableWriter()
    writer.from_dataframe(df_description, add_index_column=True)
    writer.write_table()

    # Generate the different bar charts
    barchart(group_type, 'Response Type', 'responses')
    barchart(questions.groupby('name'),
             'Who Asked Questions',
             'question_names',
             rotatation=90)
    barchart(questions.groupby('company'),
             'Which Companies Asked Questions',
             'question_companies',
             rotatation=90)
    barchart(answers.groupby('name'), 'Who Gave Answers', 'answer_names')
示例#2
0
def tab_comp(df_all, df_interpol, varname1, varname2, filename):
    df = pd.DataFrame(columns=varname1)
    df['metric'] = [
        'RMSE', 'bias', 'R2', 'N', 'RMSE', 'bias', 'R2', 'N', 'RMSE', 'bias',
        'R2', 'N'
    ]
    df['time'] = [
        'all', 'all', 'all', 'all', 'night', 'night', 'night', 'night', 'day',
        'day', 'day', 'day'
    ]
    df.set_index(['metric', 'time'], inplace=True)

    sza = df_interpol['sza']

    day = sza < 70
    night = sza > 110

    for i in range(np.size(varname1)):

        x = df_interpol[varname1[i]].values
        y = df_interpol[varname2[i]].values
        x2 = x[~np.isnan(y) & ~np.isnan(x)]
        y2 = y[~np.isnan(y) & ~np.isnan(x)]

        df.loc[('R2', 'all'), varname1[i]] = r2_score(x2, y2)
        df.loc[('bias', 'all'), varname1[i]] = np.mean(x2 - y2)
        df.loc[('RMSE', 'all'), varname1[i]] = mean_squared_error(x2, y2)
        df.loc[('N', 'all'), varname1[i]] = len(x2)

        x = df_interpol.loc[night, varname1[i]].values
        y = df_interpol.loc[night, varname2[i]].values
        x2 = x[~np.isnan(y) & ~np.isnan(x)]
        y2 = y[~np.isnan(y) & ~np.isnan(x)]

        df.loc[('R2', 'night'), varname1[i]] = r2_score(x2, y2)
        df.loc[('bias', 'night'), varname1[i]] = np.mean(x2 - y2)
        df.loc[('RMSE', 'night'), varname1[i]] = mean_squared_error(x2, y2)
        df.loc[('N', 'night'), varname1[i]] = len(x2)

        x = df_interpol.loc[day, varname1[i]].values
        y = df_interpol.loc[day, varname2[i]].values
        x2 = x[~np.isnan(y) & ~np.isnan(x)]
        y2 = y[~np.isnan(y) & ~np.isnan(x)]

        df.loc[('R2', 'day'), varname1[i]] = r2_score(x2, y2)
        df.loc[('bias', 'day'), varname1[i]] = np.mean(x2 - y2)
        df.loc[('RMSE', 'day'), varname1[i]] = mean_squared_error(x2, y2)
        df.loc[('N', 'day'), varname1[i]] = len(x2)
    trunc = lambda x: math.trunc(100 * x) / 100
    df = df.applymap(trunc)
    df = df.reset_index()
    df.to_csv(filename + '.csv')

    writer = MarkdownTableWriter()
    writer.from_dataframe(df)
    writer.write_table()
    # change the output stream to a file
    with open(filename + '.md', "w") as f:
        writer.stream = f
        writer.write_table()
示例#3
0
def Create_mdTable(df):
    ''' I use this function to create MD style tables 
    '''
    writer = MarkdownTableWriter()
    writer.from_dataframe(df)

    return writer.write_table()
示例#4
0
def topics_to_md(model,topics_dict):
    writer = MarkdownTableWriter()
    writer.table_name = model
    writer.from_dataframe(
        pd.DataFrame(topics_dict),
        add_index_column=True,
    )
    writer.write_table()
示例#5
0
def print_format(df, format):
    if "csv" in format:
        print(df.to_csv(index=False))
    if "latex" in format:
        print(df.to_latex(index=False, float_format="%.2f"))
    if "text" in format:
        print(df)
    if "md" in format:
        from pytablewriter import MarkdownTableWriter
        writer = MarkdownTableWriter()
        writer.from_dataframe(df)
        writer.write_table()
def displayMarkdownFeatures(feature_list1, feature_list2, columnName1,
                            columnName2):

    # create empty dictionary
    classifier_dict = {}
    # run perfomance for Naive Bayes
    naive = algo_performance(GaussianNB(), "Naive Bayes", my_dataset,
                             feature_list1, feature_list2)
    classifier_dict.update(naive)
    # run perfomance for Decision Tree
    dt_tree = algo_performance(tree.DecisionTreeClassifier(), "Decision Tree",
                               my_dataset, feature_list1, feature_list2)
    classifier_dict.update(dt_tree)
    # run perfomance for Random Forest
    rnd_forest = algo_performance(RandomForestClassifier(n_estimators=10),
                                  "Random Forest", my_dataset, feature_list1,
                                  feature_list2)
    classifier_dict.update(rnd_forest)
    # run perfomance for AdaBoost
    ada_boost = algo_performance(AdaBoostClassifier(), "AdaBoost", my_dataset,
                                 feature_list1, feature_list2)
    classifier_dict.update(ada_boost)
    # run perfomance for Support Vector
    svm_svc = algo_performance(svm.SVC(gamma='scale'), "Support Vector",
                               my_dataset, feature_list1, feature_list2)
    classifier_dict.update(svm_svc)
    # print classifer_dict
    # print(classifier_dict)

    # create panda frame from dictionary, set column headers
    new_pd = pd.DataFrame.from_dict(classifier_dict,
                                    orient='index',
                                    columns=[
                                        columnName1 + ' features accuracy',
                                        columnName1 + ' features precision',
                                        columnName1 + ' features recall',
                                        columnName2 + ' features accuracy',
                                        columnName2 + ' features precision',
                                        columnName2 + ' features recall'
                                    ])

    # using library for Markdown Table Writer
    writer = MarkdownTableWriter()
    # set table name
    writer.table_name = "Mean Accuracy, Precision and Recall for Features"
    # create markdown table
    writer.from_dataframe(new_pd, add_index_column=True)
    # display markdown table for copy/paste
    writer.write_table()
示例#7
0
def main():
    path = 'dialogue_all.csv'
    df = pd.read_csv(path, sep=',')
    w = MarkdownTableWriter()
    w.from_dataframe(df)
    with open("dialogue_all.md", "w") as u:
        w.stream = u
        w.write_table()
    d = ""
    with open("dialogue_all.md", "r") as u:
        d = u.read()
        d = d.replace(" ", "")
    with open("dialogue_all.md", "w") as u:
        u.write("# 一覧表\n\n")
        u.write(d)
示例#8
0
def plot_bc_results():

    # Get environment names
    envs = [
        'Ant-v2', 'HalfCheetah-v2', 'Hopper-v2', 'Humanoid-v2', 'Reacher-v2',
        'Walker2d-v2'
    ]

    # Create pandas dataframe
    df = pd.DataFrame(index=envs,
                      columns=['Mean(BC)', 'Std(BC)', 'Mean(Exp)', 'Std(Exp)'])

    for i in range(len(envs)):
        print(i)

        # Get expert data data
        with open(os.getcwd() + "/expert_data/" + envs[i] + ".pkl", 'rb') as f:
            expert_data = pickle.loads(f.read())
            print('Expert data loaded.')

        # Get bc data
        with open(os.getcwd() + "/results/bc_evaluation/" + envs[i] + ".pkl",
                  'rb') as f:
            bc_data = pickle.loads(f.read())
            print('BC data loaded.')

        df.loc[envs[i]] = [
            np.mean(bc_data['returns']),
            np.std(bc_data['returns']),
            np.mean(expert_data['returns']),
            np.std(expert_data['returns'])
        ]

        writer = MarkdownTableWriter()
        writer.table_name = "add_index_column"
        writer.from_dataframe(df, add_index_column=True)

    return writer.write_table()
示例#9
0
    def writeModelData(self, filename):
        from pytablewriter import MarkdownTableWriter

        tbN = self.NimmerjahnEffectTableAffinities()
        tbN.insert(0, 'Condition', tbN.index)

        # Convert numbers representing affinities into strings in
        # scientific notation, using Markdown formatting
        def sci(val):
            if isinstance(val, str):
                return val
            elif val == 0:
                return '0.0'
            else:
                try:
                    return '{:.1E}'.format(val)  #.replace('E+0', '*10^')
                except OverflowError:
                    return '0.0'

        # Rename columns of DataFrame
        tbN.columns = [name.replace('FcgR', 'mFcγR') for name in tbN.columns]
        tbN.loc[:, tbN.columns !=
                'Effectiveness'] = tbN.loc[:, tbN.columns !=
                                           'Effectiveness'].applymap(sci)
        tbN['Condition'] = tbN['Condition'].apply(
            lambda x: ('m' + x).replace('FcgR', 'FcγR'))
        tbN['Effectiveness'] = tbN['Effectiveness'].apply(str)

        writer = MarkdownTableWriter()
        writer.from_dataframe(tbN)

        # Change writer stream to filename
        with open(filename, 'w') as f:
            writer.stream = f
            writer.write_table()

        writer.close()
    acc_score, prec_score, rec_score, f1_measure_score = algo_get_scores(
        LogisticRegression(class_weight='balanced'), "Logistic Regression",
        my_dataset, kBest_features)
    # append values to list for printing out later
    k_score_list.append([kparam, prec_score, rec_score, f1_measure_score])
    # print acc_score, prec_score, rec_score

# create panda Dataframe for markdown writer
kbest_pd = pd.DataFrame(k_score_list,
                        columns=['k', 'Precision', 'Recall', 'f1'])
# assign writer
writer = MarkdownTableWriter()
# set table name
writer.table_name = "K Values at different values"
# create markdown table
writer.from_dataframe(kbest_pd)
# display markdown table for copy/paste
writer.write_table()

# Based on output we want to use k value of 5
final_k_value = 5
k_scores = run_kbest(my_dataset, new_feature_list, final_k_value)

# map features to scores, making sure to skip the first element which is poi
scores = zip(new_feature_list[1:], k_scores)
# Sort the scores using 2nd element which is the value,
# sort in reverse to get highest values first
scores = sorted(scores, key=choose_2nd_element, reverse=True)
# print out scores
print "Scores sorted by highest first:"
pp.pprint(scores)
示例#11
0
def generate_markdown(to, topics, questions):
  timestamp = 'latest updated at {}'.format(date.today().strftime("%Y/%m/%d"))
  # generate to questions.md
  question_json = {
    'Number': [],
    'Title': [],
    'Level': [],
    'Accepted': [],
    'Submissions': [],
    'Acceptance': []
  }
  number = '{:,}'
  acceptance = '{:.0f}%'
  title = '[{}](https://leetcode.com/problems/{})'
  easy = 0
  medium = 0
  hard = 0
  for qid in sorted(questions.keys()):
    q = questions[qid]
    question_json['Number'].append(qid)
    question_json['Title'].append(title.format(q['title'], q['title_slug']))
    question_json['Level'].append(LEVELS[q['level']])
    question_json['Accepted'].append(number.format(q['accepted']))
    question_json['Submissions'].append(number.format(q['submissions']))
    question_json['Acceptance'].append(acceptance.format(q['acceptance']))
    if q['level'] == 1: easy += 1
    if q['level'] == 2: medium += 1
    if q['level'] == 3: hard += 1
  questions_md = os.path.join(to, 'questions.md')
  with open(questions_md, 'w') as md:
    total = len(questions.keys())
    md.write('# List of All Questions\n\n')
    md.write('**Total Questions: {}, Easy: {}, Medium: {}, Hard: {}, {}.**\n\n'.format(
      total,
      easy,
      medium,
      hard,
      timestamp))
    table = pandas.DataFrame.from_dict(question_json)
    writer = MarkdownTableWriter()
    writer.from_dataframe(table)
    writer.column_styles = [
        Style(align="right"),
        Style(align="left"),
        Style(align="center"),
        Style(align="right"),
        Style(align="right"),
        Style(align="right")
    ]
    md.write(writer.dumps())
    md.write('\n\n')
    print('Created {}.'.format(questions_md))
  # generate to topics.md
  topic_slug_to_name = {}
  name = '[{}]({}.md)'
  topic_json = {
    'Name': [],
    'Total': [],
    'Easy': [],
    'Medium': [],
    'Hard': []
  }
  for t in topics:
    topic_slug_to_name[t['slug']] = t['name']
    topic_json['Name'].append(name.format(t['name'], t['slug']))
    topic_json['Total'].append(number.format(len(t['questions'])))
    topic_json['Easy'].append(number.format(len(t['difficulty']['easy'])))
    topic_json['Medium'].append(number.format(len(t['difficulty']['medium'])))
    topic_json['Hard'].append(number.format(len(t['difficulty']['hard'])))
  topics_md = os.path.join(to, 'topics.md')
  with open(topics_md, 'w') as md:
    md.write('# List of All Topics\n\n')
    md.write('**Total Questions: {}, Easy: {}, Medium: {}, Hard: {}, {}.**\n\n'.format(
      total,
      easy,
      medium,
      hard,
      timestamp))
    table = pandas.DataFrame.from_dict(topic_json)
    writer = MarkdownTableWriter()
    writer.from_dataframe(table)
    writer.column_styles = [
        Style(align="left"),
        Style(align="right"),
        Style(align="right"),
        Style(align="right"),
        Style(align="right")
    ]
    md.write(writer.dumps())
    md.write('\n\n')
    print('Created {}.'.format(topics_md))
  # generate to each of topics
  for t in topics:
    topic_name_md = os.path.join(to, t['slug'] + '.md')
    with open(topic_name_md, 'w') as md:
      total = len(t['questions'])
      easy = 0
      medium = 0
      hard = 0
      for qid in t['questions']:
        if questions[qid]['level'] == 1: easy += 1
        if questions[qid]['level'] == 2: medium += 1
        if questions[qid]['level'] == 3: hard += 1
      md.write('# List of All Questions in {}\n\n'.format(t['name']))
      md.write('**Total Questions: {}, Easy: {}, Medium: {}, Hard: {}, {}.**\n\n'.format(
        total,
        easy,
        medium,
        hard,
        timestamp))
      md.write('- [{}](#{})\n'.format(t['name'], t['name'].replace(' ', '-')))
      for k in sorted(t['similarities'].keys()):
        similarity_topics = map(lambda _t: topic_slug_to_name[_t], t['similarities'][k]['topics'])
        similarity_topics = sorted(similarity_topics)
        md.write('- [{}](#{})\n'.format(', '.join(similarity_topics), '-'.join(similarity_topics).replace(' ', '-')))
      md.write('\n')
      question_json = {
        'Number': [],
        'Title': [],
        'Level': [],
        'Accepted': [],
        'Submissions': [],
        'Acceptance': []
      }
      for qid in sorted(t['questions']):
        if len(questions[qid]['topics']) == 1:
          q = questions[qid]
          question_json['Number'].append(qid)
          question_json['Title'].append(title.format(q['title'], q['title_slug']))
          question_json['Level'].append(LEVELS[q['level']])
          question_json['Accepted'].append(number.format(q['accepted']))
          question_json['Submissions'].append(number.format(q['submissions']))
          question_json['Acceptance'].append(acceptance.format(q['acceptance']))
      md.write('## {}\n\n'.format(t['name']))
      table = pandas.DataFrame.from_dict(question_json)
      writer = MarkdownTableWriter()
      writer.from_dataframe(table)
      writer.column_styles = [
          Style(align="right"),
          Style(align="left"),
          Style(align="center"),
          Style(align="right"),
          Style(align="right"),
          Style(align="right")
      ]
      md.write(writer.dumps())
      md.write('\n\n')
      for k in sorted(t['similarities'].keys()):
        question_json = {
          'Number': [],
          'Title': [],
          'Level': [],
          'Accepted': [],
          'Submissions': [],
          'Acceptance': []
        }
        similarity_topics = map(lambda _t: topic_slug_to_name[_t], t['similarities'][k]['topics'])
        similarity_topics = sorted(similarity_topics)
        similarity_questions = sorted(t['similarities'][k]['questions'])
        for qid in similarity_questions:
          q = questions[qid]
          question_json['Number'].append(qid)
          question_json['Title'].append(title.format(q['title'], q['title_slug']))
          question_json['Level'].append(LEVELS[q['level']])
          question_json['Accepted'].append(number.format(q['accepted']))
          question_json['Submissions'].append(number.format(q['submissions']))
          question_json['Acceptance'].append(acceptance.format(q['acceptance']))
        md.write('## {}\n\n'.format(', '.join(similarity_topics)))
        table = pandas.DataFrame.from_dict(question_json)
        writer = MarkdownTableWriter()
        writer.from_dataframe(table)
        writer.column_styles = [
            Style(align="right"),
            Style(align="left"),
            Style(align="center"),
            Style(align="right"),
            Style(align="right"),
            Style(align="right")
        ]
        md.write(writer.dumps())
        md.write('\n\n')
      print('Created {}.'.format(topic_name_md))