Exemplo n.º 1
0
def get_vote_outliers(voters):
	# Run a really simple statistical model to see which voters don't
	# match predicted outcomes.

	import numpy
	from logistic_regression import logistic_regression, calcprob

	# Build a binary matrix of predictors.
	predictor_names = ('party', 'ideolog_score')
	party_values = { "Democrat": -1, "Republican": 1 }
	vote_values = { "+": 1, "-": 0 }
	x = [ [] for predictor in predictor_names ]
	y = [ ]
	for voter in voters:
		x[0].append(party_values.get(voter.party, 0)) # independents and unrecognized parties get 0
		x[1].append(getattr(voter, 'ideolog_score', 0)) # ideology scores may not be available in a Congress, also not available for vice president
		y.append(vote_values.get(voter.option.key, .5)) # present, not voting, etc => .5
	x = numpy.array(x)
	y = numpy.array(y)

	# Perform regression.
	try:
		regression_beta, J_bar, l = logistic_regression(x, y)
	except ValueError:
		# Something went wrong. No outliers will be reported.
		return

	# Predict votes.
	estimate = calcprob(regression_beta, x)/100.0

	# Mark voters whose vote is far from the prediction.
	for i, v in enumerate(voters):
		v.is_outlier = (abs(y[i]-estimate[i]) > .7)
Exemplo n.º 2
0
def get_vote_outliers(voters):
	# Run a really simple statistical model to see which voters don't
	# match predicted outcomes.

	import numpy
	from logistic_regression import logistic_regression, calcprob

	# Build a binary matrix of predictors.
	predictor_names = ('party', 'ideolog_score')
	party_values = { "Democrat": -1, "Republican": 1 }
	vote_values = { "+": 1, "-": 0 }
	x = [ [] for predictor in predictor_names ]
	y = [ ]
	for voter in voters:
		x[0].append(party_values.get(voter.party, 0)) # independents and unrecognized parties get 0
		x[1].append(getattr(voter, 'ideolog_score', 0)) # ideology scores may not be available in a Congress, also not available for vice president
		y.append(vote_values.get(voter.option.key, .5)) # present, not voting, etc => .5
	x = numpy.array(x)
	y = numpy.array(y)

	# Perform regression.
	try:
		regression_beta, J_bar, l = logistic_regression(x, y)
	except ValueError:
		# Something went wrong. No outliers will be reported.
		return

	# Predict votes.
	estimate = calcprob(regression_beta, x)/100.0

	# Mark voters whose vote is far from the prediction.
	for i, v in enumerate(voters):
		v.is_outlier = (abs(y[i]-estimate[i]) > .7)