-
Notifications
You must be signed in to change notification settings - Fork 1
/
20100920a.py
75 lines (63 loc) · 2.13 KB
/
20100920a.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
Compute the variation explained by a linear model.
This uses the R software.
"""
from StringIO import StringIO
import argparse
from SnippetUtil import HandlingError
import Form
import FormOut
import Util
import RUtil
import Carbone
import const
g_tags = ['pca:compute']
g_table = const.read('20100709a')
g_independent_names = ['temperature', 'precipitation']
g_dependent_name = 'pc1'
def get_form():
"""
@return: the body of a form
"""
form_objects = [
Form.MultiLine('table', 'R table', g_table),
Form.MultiLine('independent', 'names of independent variables',
'\n'.join(g_independent_names)),
Form.SingleLine('dependent', 'name of the dependent variable',
g_dependent_name)]
return form_objects
def get_form_out():
return FormOut.Report()
def get_response_content(fs):
# get the independent variable names
indep = Util.get_stripped_lines(fs.independent.splitlines())
dep = fs.dependent
# get the r table
rtable = RUtil.RTable(fs.table.splitlines())
header_row = rtable.headers
data_rows = rtable.data
Carbone.validate_headers(header_row)
# check requested variable names as column headers
bad_indep_names = set(indep) - set(header_row)
if bad_indep_names:
raise ValueError(
'these requested independent variable names '
'were not found as columns '
'in the data table: ' + str(bad_indep_names))
if dep not in header_row:
raise ValueError(
'the dependent variable name '
'was not found as a column in the data table')
return RUtil.run_with_table(fs.table, (indep, dep), get_script_content)
def get_script_content(data, temp_table_name):
"""
@param data: the (indep, dep) data pair
@param temp_table_name: name of the temporary table file
"""
indep, dep = data
symbolic_indep_sum = ' + '.join('d$' + x for x in indep)
lines = [
'd <- read.table("%s")' % temp_table_name,
'myfit <- lm(d$%s ~ %s)' % (dep, symbolic_indep_sum),
'summary(myfit)']
return '\n'.join(lines) + '\n'