def create_summary_ci_tables(): isp_or_infra = pd.DataFrame.from_records( get_rows(CI_HIGHLEVEL_QUERY_ISP_OR_INFRA), columns=summary_isp_or_infra_header()) isp_and_infra = pd.DataFrame.from_records( get_rows(CI_HIGHLEVEL_QUERY_ISP_AND_INFRA), columns=summary_isp_and_infra_header()) update_sheet(SPREADSHEET_TITLE_SUMMARY_ISP_OR_INFRA, isp_or_infra) update_sheet(SPREADSHEET_TITLE_SUMMARY_ISP_AND_INFRA, isp_and_infra)
def rows(): content = request.get_json() response = app.response_class(response=json.dumps( {'rows': list(get_rows(content['data']['source']))}), status=200, mimetype='application/json') return response
def get_all_cidrs( ) -> List[Union[ipaddress.IPv4Network, ipaddress.IPv6Network]]: cidrs = [ ipaddress.ip_network(x[0]) for x in get_rows("select cidr from whois_data") ] return cidrs
def rows(self): rows = get_rows(self.table()) check = lambda: all(len(x.split()) == len(self.header.split()) for x in rows) if check: return rows raise ValueError("Please check your table. You have different number" "of columns for each row of the table")
def read_whois_cache(cidr: str): return get_rows("select data from whois_data where cidr = %s", (cidr, ))[0][0]
''' parser.add_argument('--array_spec', type=str, required=True) args, unknown = parser.parse_known_args() LOSS = args.loss NUM_WLS = args.num_wls GAMMA = args.gamma DATAFILE = args.datafile MULTIPLIER = args.multiplier RHORANGESTRING = args.array_spec if '_' in args.array_spec: LIN_OR_EXP = 'lin' arange_args = [float(arg) for arg in args.array_spec.split('_')] RHORANGE = np.arange(*arange_args) else: LIN_OR_EXP = 'exp' arange_args = [float(arg) for arg in args.array_spec.split(',')] RHORANGE = np.asarray(arange_args) filename = os.path.join(DATADIR, DATAFILE) class_index = 0 training_ratio = 0.8 N = utils.get_num_instances(filename) test_N = int(N - N*training_ratio) assert(test_N > 0) rows = utils.get_rows(filename) * MULTIPLIER rows = utils.shuffle(rows, seed = random.randint(1, 2000000)) print 'running ...' rho_runs = [run(utils.shuffle(rows, seed=random.randint(1, 2000000)), test_N)[-1] for _ in range(5)] plotRun(RHORANGE, rho_runs)
def main(): # Load data filename = 'balance-scale.csv' class_index = 0 training_ratio = 0.8 N = utils.get_num_instances(filename) train_N = int(N*training_ratio) rows = utils.get_rows(filename) rows = utils.shuffle(rows, seed=357) train_rows = rows[:train_N] test_rows = rows[train_N:] # Set parameters num_weaklearners = 10 gamma = 0.1 M = 100 print 'Num weak learners:', num_weaklearners # Test Adaboost.OLM model = AdaBoostOLM(loss='logistic') model.initialize_dataset(filename, class_index, N) dataset = model.get_dataset() model.gen_weaklearners(num_weaklearners, min_grace=5, max_grace=20, min_tie=0.01, max_tie=0.9, min_conf=0.01, max_conf=0.9, min_weight=5, max_weight=200) for i, row in enumerate(train_rows): X = row[1:] Y = row[0] pred = model.predict(X) model.update(Y) cnt = 0 for i, row in enumerate(test_rows): X = row[1:] Y = row[0] pred = model.predict(X) model.update(Y) cnt += (pred == Y)*1 result = round(100 * cnt / float(len(test_rows)), 2) print 'Adaboost.OLM:', result # Test OnlineMBBM model = AdaBoostOLM(loss='zero_one', gamma=gamma) model.M = M model.initialize_dataset(filename, class_index, N) model.gen_weaklearners(num_weaklearners, min_grace=5, max_grace=20, min_tie=0.01, max_tie=0.9, min_conf=0.01, max_conf=0.9, min_weight=5, max_weight=200) for i, row in enumerate(train_rows): X = row[1:] Y = row[0] pred = model.predict(X) model.update(Y) cnt = 0 for i, row in enumerate(test_rows): X = row[1:] Y = row[0] pred = model.predict(X) model.update(Y) cnt += (pred == Y)*1 result = round(100 * cnt / float(len(test_rows)), 2) print 'OnlineMBBM:', result # Test one vs all method model = oneVSall() model.initialize_dataset(filename, class_index, dataset.num_classes(), N) model.initialize_binary_learners(num_weaklearners, min_grace=5, max_grace=20, min_tie=0.01, max_tie=0.9, min_conf=0.01, max_conf=0.9, min_weight=5, max_weight=200) for i, row in enumerate(train_rows): X = row[1:] Y = row[0] pred = model.predict(X) model.update(Y) cnt = 0 for i, row in enumerate(test_rows): X = row[1:] Y = row[0] pred = model.predict(X) model.update(Y) cnt += (pred == Y)*1 result = round(100 * cnt / float(len(test_rows)), 2) print 'one vs all:', result # Test boosting with weak learners built upon one vs all methods model = oneVSallBoost() model.initialize_dataset(filename, class_index, N) model.gen_weaklearners(num_weaklearners, min_grace=5, max_grace=20, min_tie=0.01, max_tie=0.9, min_conf=0.01, max_conf=0.9, min_weight=5, max_weight=200) for i, row in enumerate(train_rows): X = row[1:] Y = row[0] pred = model.predict(X) model.update(Y) cnt = 0 for i, row in enumerate(test_rows): X = row[1:] Y = row[0] pred = model.predict(X) model.update(Y) cnt += (pred == Y)*1 result = round(100 * cnt / float(len(test_rows)), 2) print 'one vs all boost:', result
"Author5_Name", "Author5_Title", "Author5_Org", "Author5_OrgID", "Author6_Name", "Author6_Title", "Author6_Org", "Author6_OrgID", "Image1", "File1", "File2", "File3", "File4", ] rows = get_rows('hub/imports/fixtures/2016Presentations.xlsx', '2016') published = datetime(month=11, day=18, year=2016) bucket_prefix = '/uploads/aashe2016/' def add_files(presentation): file_key = "File%d" for i in range(3, 5): key = file_key % i filename = row[columns.index(key)].value if filename: url = "https://hub-media.aashe.org%s%s" % ( bucket_prefix, filename) print presentation.title print "adding %s" % url file = File.objects.create(
cn = ConferenceName.objects.create(name=conf) kwargs['conf_name'] = cn # get the material type _type = row[columns.index("PresType")].value try: pt = PresentationType.objects.get(name=_type) except: print "Creating PresentationType: %s" % _type pt = PresentationType.objects.create(name=_type) kwargs['presentation_type'] = pt return kwargs rows = get_rows('hub/imports/fixtures/2016Presentations.xlsx', '2016') # run the sanity check first skip_index_list = sanity_check(rows, columns, column_mappings) print "importing presentations" rows = get_rows('hub/imports/fixtures/2016Presentations.xlsx', '2016') count = 0 for row in rows: """ openpyxl returns incomplete rows, so I extend them here. """ if len(row) < len(columns): class MockVal:
import matplotlib.pyplot as plt import numpy as np import scipy.stats as stats from openpyxl import load_workbook from sklearn.linear_model import LinearRegression, RANSACRegressor from utils import replace_comma, split_comma, split_slash, get_mean, get_rows WB = load_workbook(filename='input.xlsx', read_only=True) WS = WB.active RANGE = range(3, 49) ROWS = get_rows(RANGE, WS) P_SIGMA = 3000 Q_SIGMA = 5 class Data: def __init__(self, col: str): self.col = col self.row = ROWS self.list = [col + str(item) for item in self.row] def get_values(self): out = [WS[item].value for item in self.list] return out def get_pressure(self): out = self.get_values() out = [replace_comma(item) for item in out] out = [split_slash(item) for item in out]