Exemplo n.º 1
0
def plotCondSecond(df, num, color, first):
    df_cond = df[df["minDist"] == num]
    return plotBar(
        df_cond,
        "secondMinDist",
        "rating",
        range(7)[num:],
        color=color,
        x_label="Reviewer Distance to Second Closest Author",
        y_label="Avg Rating",
        sameFigure=(not first),
        plotMean=False
    )
Exemplo n.º 2
0


df.loc[df["authorCountryMode"] == "United Kingdom", "authorCountryMode"]\
    = "United\nKingdom"
countryCounts = df['authorCountryMode'].value_counts().to_dict()
countries = [c for c, count in countryCounts.iteritems() if count > 10]
countries.sort(key=lambda c: df[df['authorCountryMode']==c]['accepted'].mean())
countries = countries[1:]

p = plotBar(
    df,
    "authorCountryMode",
    "accepted",
    countries,
    title="Proportion of Papers Accepted by Country",
    x_label="Mode Author Country",
    y_label="Proportion Accepted",
    categorical=True,
    errorBars=True
)

j = 0
for container in plt.gca().containers:
    for i, child in enumerate(container.get_children()):
        if j > 3:
            child.set_color(plt.cm.coolwarm_r((child.get_height() - 0.05)/0.20))
        j += 1
gcf().set_size_inches((40, 10))

y = ylim()[1] - 0.025
Exemplo n.º 3
0
    for a in paper.authors:
        authorExpList.append(a.topPastPapers)

numRuns = 500
avgRatings = []
errors = []
for i in range(1, max(numAuthorsList) + 1):
    ratings = np.empty([numRuns, 1])
    for j in range(numRuns):
        sample = random.sample(authorExpList, i)
        maxExp = max(sample)
        ratings[j] = getAvg(maxExp)
    avgRatings.append(ratings.mean())
    errors.append(ratings.std()/math.sqrt(ratings.shape[0]))

p1 = plotBar(
    df,
    "numAuthors",
    "avgRating",
    range(1, 8),
    color="green",
    x_label="Number of Authors",
    y_label="Average Rating",
    marker="o",
    plotMean=False,
)
p2 = plt.errorbar(np.arange(1, 8), avgRatings[:7], yerr=errors[:7], marker = 'o')
plt.xlim([0,8])
plt.legend((p1, p2), ('Real', 'Simulated'), loc=4)
plt.show()
Exemplo n.º 4
0
           x_percentile=False,
           xlim=[-.1, .1])

df_dist = df[df["distDiff"] >= 0]
plotBucket(df,
           "distDiff",
           "higherRating",
           delta=5,
           x_label="Difference in Min Distance to Reviewer",
           y_label="Probability of a Higher Rating",
           color="Green",
           x_percentile=False)

plotBar(df_original,
        "minDist",
        "paperRatingDiff", [1, 2, 3, 4, 5, 6, 7, 8],
        x_label="Min Distance between Reviewer and Paper Authors",
        y_label="Deviation from Paper Average Rating",
        title="Deviation from Paper Average vs. Min Dist to Reviewer",
        color="Blue")

# df["avgRatingDist"] = df["rating"] - df["rating2"]
# for i in range(7):
#     for j in range(7):
#         if (j >= i):
#             print "(%d, %d)" % (i,j)
#             print df[(df["minDist"] == i) & (df["minDist2"]==j)]["avgRatingDist"].mean()
#             print df[(df["minDist"] == i) & (df["minDist2"]==j)]["avgRatingDist"].shape[0]

show()
Exemplo n.º 5
0
	plotBucket(
		df,
		"paperId",
		"accepted",
		x_label="Submission Order",
		y_label="P(Accept)",
		title="Acceptance vs. Submission Order",
		numBuckets=10
	)

#how subsequent reviews influence each other
plotBar(
	df_review,
	"order",
	"rating",
	range(12)[1:],
	x_label="Review Order",
	y_label="Rating",
	title="Rating vs. Order of Review",
)

df = df_review
plots = []

colors =\
    ["red", "pink", "", "cyan", "blue",]
for i in [-2, -1, 1, 2]:
	df["rating%d" % i] = df["rating"] == i

	plots.append(plotBar(
		df_review,
               "avgRating",
               color="white",
               x_label="Number of Authors",
               y_label="Average Rating",
               marker="None",
               x_percentile=False)
p = []
df1 = df.copy()[slices[0]]
ofInterest = df1["numAuthors"] >= 3
newValue = np.round(df1[ofInterest]["numAuthors"].mean(), 2)
df1.loc[ofInterest, "numAuthors"] = newValue
p.append(
    plotBar(df1,
            "numAuthors",
            "avgRating", [1, 2, newValue],
            color="blue",
            sameFigure=True,
            marker="s",
            plotMean=False))

colors = ["red", "green", "purple"]
p.extend([
    plotBucket(df[slices[i]],
               "numAuthors",
               "avgRating",
               numBuckets=4,
               color=colors[i - 1],
               sameFigure=True,
               marker="s",
               plotMean=False) for i in [1, 2, 3]
])
Exemplo n.º 7
0
    plotBucket(df,
               "paperId",
               "accepted",
               x_label="Submission Order",
               y_label="P(Accept)",
               title="Acceptance vs. Submission Order",
               numBuckets=10)


#how subsequent reviews influence each other
plotBar(
    df_review,
    "order",
    "rating",
    range(12)[1:],
    x_label="Review Order",
    y_label="Rating",
    title="Rating vs. Order of Review",
)

df = df_review
plots = []

colors =\
    ["red", "pink", "", "cyan", "blue",]
for i in [-2, -1, 1, 2]:
    df["rating%d" % i] = df["rating"] == i

    plots.append(
        plotBar(df_review,
Exemplo n.º 8
0
plotBucket(
    df,
    "distDiff",
    "higherRating",
    delta=5,
    x_label="Difference in Min Distance to Reviewer",
    y_label="Probability of a Higher Rating",
    color="Green",
    x_percentile=False,
)

plotBar(
    df_original,
    "minDist",
    "paperRatingDiff",
    [1, 2, 3, 4, 5, 6, 7, 8],
    x_label="Min Distance between Reviewer and Paper Authors",
    y_label="Deviation from Paper Average Rating",
    title="Deviation from Paper Average vs. Min Dist to Reviewer",
    color="Blue",
)


# df["avgRatingDist"] = df["rating"] - df["rating2"]
# for i in range(7):
#     for j in range(7):
#         if (j >= i):
#             print "(%d, %d)" % (i,j)
#             print df[(df["minDist"] == i) & (df["minDist2"]==j)]["avgRatingDist"].mean()
#             print df[(df["minDist"] == i) & (df["minDist2"]==j)]["avgRatingDist"].shape[0]

Exemplo n.º 9
0
#Plot co-author dist
p1 = plotBucket(
    df_merged,
    "avgDist",
    "rating",
    delta=10,
    x_label="Reviewer Author Distance",
    y_label="Avg Rating",
    xlim=[0, 8],
    x_percentile=False
)
p2 = plotBar(
    df_merged,
    "minDist",
    "rating",
    range(7)[1:],
    color="red",
    sameFigure=True,
)

legend([p1, p2],
       ["Average Author Distance",
       "Min Author Distances"])


#Plot min co-author dist vs P(Accept)
g = df_merged.groupby('paperId')
h = pd.concat([
    g["minDist"].mean(),
    g["minDist"].min(),
    g["accepted"].median()], axis=1)
Exemplo n.º 10
0
    color="white",
    x_label="Number of Authors",
    y_label="Average Rating",
    marker="None",
    x_percentile=False
)
p = []
df1 = df.copy()[slices[0]]
ofInterest = df1["numAuthors"] >= 3
newValue = np.round(df1[ofInterest]["numAuthors"].mean(), 2)
df1.loc[ofInterest, "numAuthors"] = newValue
p.append(plotBar(
    df1,
    "numAuthors",
    "avgRating",
    [1, 2, newValue],
    color="blue",
    sameFigure=True,
    marker="s",
    plotMean=False))

colors = ["red", "green", "purple"]
p.extend([
    plotBucket(
        df[slices[i]],
        "numAuthors",
        "avgRating",
        numBuckets=4,
        color=colors[i-1],
        sameFigure=True,
            marker="s",
Exemplo n.º 11
0


df.loc[df["authorCountryMode"] == "United Kingdom", "authorCountryMode"]\
    = "United\nKingdom"
countryCounts = df['authorCountryMode'].value_counts().to_dict()
countries = [c for c, count in countryCounts.iteritems() if count > 10]
countries.sort(
    key=lambda c: df[df['authorCountryMode'] == c]['accepted'].mean())
countries = countries[1:]

p = plotBar(df,
            "authorCountryMode",
            "accepted",
            countries,
            title="Proportion of Papers Accepted by Country",
            x_label="Mode Author Country",
            y_label="Proportion Accepted",
            categorical=True,
            errorBars=True)

j = 0
for container in plt.gca().containers:
    for i, child in enumerate(container.get_children()):
        if j > 3:
            child.set_color(
                plt.cm.coolwarm_r((child.get_height() - 0.05) / 0.20))
        j += 1
gcf().set_size_inches((40, 10))

y = ylim()[1] - 0.025
    delta=10,
    color="white",
    x_label="Number of Authors",
    y_label="Average Rating",
    marker="None",
    x_percentile=False)

df1 = df_paper.copy()[slices[0]]
ofInterest = df1["#Authors"] >= 3
newValue = np.round(df1[ofInterest]["#Authors"].mean(), 2)
df1.loc[ofInterest, "#Authors"] = newValue
p1 = plotBar(
    df1,
    "#Authors",
    "avgRating",
    [1, 2, newValue],
    color="blue",
    sameFigure=True,
    marker="s",
    plotMean=False)

p2 = plotBucket(
    df_paper[slices[1]],
    "#Authors",
    "avgRating",
    delta=20,
    color="red",
    sameFigure=True,
    x_percentile=False,
    marker="s",
    plotMean=False)
Exemplo n.º 13
0
#         ycol,
#         delta=5,
#         color="blue",
#         title="default",
#         x_label="default",
#         y_label="default",
#         x_percentile=True,
#         xlim=None):

authorFrame = userFrame[userFrame["#Papers"] > 0]
authorFrame["acceptanceRate"] = authorFrame["#Accepted"] \
    * 1.0/authorFrame["#Papers"]
plotBar(authorFrame,
        "#Papers",
        "acceptanceRate", [0, 1, 2, 3, 4, 5],
        title="Acceptance Rate vs. Number of Submissions",
        x_label="Number of Submissions",
        y_label="Acceptance Rate",
        xlim=[0, 6])

plotBucket(userFrame,
           "#PastPapers",
           "#Papers",
           x_label="Number of Past Papers",
           y_label="Number of Submissions",
           x_percentile=False,
           xlim=[0, 200])

plotBucket(authorFrame,
           "#PastPapers",
           "acceptanceRate",
Exemplo n.º 14
0
#         delta=5,
#         color="blue",
#         title="default",
#         x_label="default",
#         y_label="default",
#         x_percentile=True,
#         xlim=None):

authorFrame = userFrame[userFrame["#Papers"] > 0]
authorFrame["acceptanceRate"] = authorFrame["#Accepted"] \
    * 1.0/authorFrame["#Papers"]
plotBar(
    authorFrame,
    "#Papers",
    "acceptanceRate",
    [0, 1, 2, 3, 4, 5],
    title="Acceptance Rate vs. Number of Submissions",
    x_label="Number of Submissions",
    y_label="Acceptance Rate",
    xlim=[0, 6]
)


plotBucket(
    userFrame,
    "#PastPapers",
    "#Papers",
    x_label="Number of Past Papers",
    y_label="Number of Submissions",
    x_percentile=False,
    xlim=[0, 200])