def course_topic_display_questions_handler(request, course_id = 'CSC540', homework_id = 'CSC540_1'): args = {} args.update(csrf(request)) args['course'] = course_id args['homework'] = homework_id if request.method == 'POST': topics_selected = request.POST.getlist('topic') cursor = connection.cursor() query = "select homework_mindifficulty, homework_maxdifficulty from homework where homework_id='" + homework_id + "';" cursor.execute(query) result = cursor.fetchall() mindifficulty = result[0][0] maxdifficulty = result[0][1] question_ids = [] question_texts = [] for topic in topics_selected: query = "select qb.questionbank_qid, qb.questionbank_text from questionbank qb where questionbank_level >= " + str(mindifficulty) + " AND questionbank_level <= " + str(maxdifficulty) + " AND questionbank_topic='" + topic +"';" cursor.execute(query) result = cursor.fetchall() for row in result: question_ids.append(row[0]) question_texts.append(row[1]) print zip(question_ids, question_texts) args['questionsList'] = zip(question_ids, question_texts) cursor.close() return render_to_response('course_select_questions.html', args)
def __call__(self, track, slice=None): master = self.mMaster statement = "SELECT cov_mean, cov_median, sum FROM %(master)s_vs_%(track)s_readcoverage" % locals( ) data = [(x[0], x[1], math.log(x[2])) for x in self.getAll(statement) if x[2] > 0] return odict(zip(("mean coverage", "median coverage", "length"), zip(*data)))
def test_double_deletion(self): cobra_model = self.model #turn into a double deletion unit test initialize_growth_medium(cobra_model, 'LB') #Expected growth rates for the salmonella model with deletions in LB medium the_loci = ['STM4081', 'STM0247', 'STM3867', 'STM2952'] the_genes = tpiA, metN, atpA, eno = list(map(cobra_model.genes.get_by_id, the_loci)) growth_dict = {} growth_list = [[2.41, 2.389, 1.775, 1.81], [2.389, 2.437, 1.86, 1.79], [1.775, 1.86, 1.87, 1.3269], [1.81, 1.79, 1.3269, 1.81]] for the_gene, the_rates in zip(the_genes, growth_list): growth_dict[the_gene] = dict(zip(the_genes, the_rates)) the_solution = double_deletion(cobra_model, element_list_1=the_genes, element_list_2=the_genes) #Potential problem if the data object doesn't have a tolist function s_data = the_solution['data'].tolist() s_x = the_solution['x'] s_y = the_solution['y'] for gene_x, rates_x in zip(s_x, s_data): for gene_y, the_rate in zip(s_y, rates_x): self.assertAlmostEqual(growth_dict[gene_x][gene_y], the_rate, places=2)
def download(self, cameras, path): left_dir = os.path.join(path, 'left') right_dir = os.path.join(path, 'right') target_dir = os.path.join(path, 'raw') if not os.path.exists(target_dir): os.mkdir(target_dir) left_pages = [os.path.join(left_dir, x) for x in sorted(os.listdir(left_dir))] right_pages = [os.path.join(right_dir, x) for x in sorted(os.listdir(right_dir))] # Write the orientation as a JPEG comment to the end of the file if len(left_pages) != len(right_pages): logger.warn("The left and right camera produced an inequal" " amount of images, please fix the problem!") logger.warn("Will not combine images") return if (self.config['first_page'] and not self.config['first_page'].get(str) == 'left'): combined_pages = reduce(operator.add, zip(right_pages, left_pages)) else: combined_pages = reduce(operator.add, zip(left_pages, right_pages)) logger.info("Combining images.") for idx, fname in enumerate(combined_pages): fext = os.path.splitext(os.path.split(fname)[1])[1] target_file = os.path.join(target_dir, "{0:04d}{1}" .format(idx, fext)) shutil.copyfile(fname, target_file) shutil.rmtree(right_dir) shutil.rmtree(left_dir)
def test5(self): convert_nbody = nbody_system.nbody_to_si(5.0 | units.kg, 10.0 | units.m) instance = Huayno(convert_nbody) instance.initialize_code() particles = datamodel.Particles(2) self.assertEquals(len(instance.particles), 0) particles.mass = [15.0, 30.0] | units.kg particles.radius = [10.0, 20.0] | units.m particles.position = [[10.0, 20.0, 30.0], [20.0, 40.0, 60.0]] | units.m particles.velocity = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] | units.m / units.s instance.particles.add_particles(particles) self.assertEquals(len(instance.particles), 2) instance.set_state(1, 16|units.kg, 20.0|units.m, 40.0|units.m, 60.0|units.m, 1.0|units.ms, 1.0|units.ms, 1.0|units.ms) curr_state = instance.get_state(1) for expected, actural in zip((16|units.kg, 20.0|units.m, 40.0|units.m, 60.0|units.m, 1.0|units.ms, 1.0|units.ms, 1.0|units.ms, 0 | units.m), curr_state): self.assertAlmostRelativeEquals(actural,expected) instance.set_state(1, 16|units.kg, 20.0|units.m, 40.0|units.m, 60.0|units.m, 1.0|units.ms, 1.0|units.ms, 1.0|units.ms , 20.0|units.m) curr_state = instance.get_state(1) for expected, actural in zip((16|units.kg, 20.0|units.m, 40.0|units.m, 60.0|units.m, 1.0|units.ms, 1.0|units.ms, 1.0|units.ms, 20 | units.m), curr_state): self.assertAlmostRelativeEquals(actural,expected)
def parse(self, basefile): # Find out possible skeleton entries by loading the entire # graph of resource references, and find resources that only # exist as objects. # # Note: if we used download_from_triplestore we know that this list # is clean -- we could just iterate the graph w/o filtering g = Graph() self.log.info("Parsing %s" % basefile) g.parse(self.store.downloaded_path(basefile), format="nt") self.log.info("Compiling object set") # create a uri -> True dict mapping -- maybe? objects = dict(zip([str(o).split("#")[0] for (s, p, o) in g], True)) self.log.info("Compiling subject set") subjects = dict(zip([str(s).split("#")[0] for (s, p, o) in g], True)) self.log.info("%s objects, %s subjects. Iterating through existing objects" % (len(objects), len(subjects))) for o in objects: if not o.startswith(self.config.url): continue if '9999:999' in o: continue if o in subjects: continue for repo in otherrepos: skelbase = repo.basefile_from_uri(repo) if skelbase: skel = repo.triples_from_uri(o) # need to impl with self.store.open_distilled(skelbase, "wb") as fp: fp.write(skel.serialize(format="pretty-xml")) self.log.info("Created skel for %s" % o)
def main(): attrs = ('high', 'low', 'avg', 'vol', 'vol_cur', 'last', 'buy', 'sell', 'updated', 'server_time') #initialize connection connection = btceapi.BTCEConnection() f = open('/media/Big Daddy/New_Documents/python_data/ltc_btc_depth.pkl', 'ab') while 1: #sleep for .5 seconds, i.e. collect at 2Hz time.sleep(1) try: #get ticker ticker = btceapi.getTicker("ltc_btc", connection) #get asks/bids asks, bids = btceapi.getDepth("ltc_btc") ask_prices, ask_volumes = zip(*asks) bid_prices, bid_volumes = zip(*bids) #start list with all of the ticker info curTrades = trades(coin='ltc',updated=ticker.updated,server_time=ticker.server_time,ask_prices=ask_prices,ask_volumes=ask_volumes,bid_prices=bid_prices,bid_volumes=bid_volumes,buy=ticker.buy,sell=ticker.sell) #print out_list #now we have a huge list with all the info, write to a single line in the csv file # Pickle class using protocol 0. pickle.dump(curTrades,f) #if connection is lost, just try to reconnect (this does seem to happen, so this line is actually pretty important for long data collects) except: connection = btceapi.BTCEConnection() pass
def check_simple_str_interpolation(self, specifiers: List[ConversionSpecifier], replacements: Node) -> None: checkers = self.build_replacement_checkers(specifiers, replacements) if checkers is None: return rhs_type = self.accept(replacements) rep_types = [] # type: List[Type] if isinstance(rhs_type, TupleType): rep_types = rhs_type.items elif isinstance(rhs_type, AnyType): return else: rep_types = [rhs_type] if len(checkers) > len(rep_types): self.msg.too_few_string_formatting_arguments(replacements) elif len(checkers) < len(rep_types): self.msg.too_many_string_formatting_arguments(replacements) else: if len(checkers) == 1: check_node, check_type = checkers[0] check_node(replacements) elif isinstance(replacements, TupleExpr): for checks, rep_node in zip(checkers, replacements.items): check_node, check_type = checks check_node(rep_node) else: for checks, rep_type in zip(checkers, rep_types): check_node, check_type = checks check_type(rep_type)
def test_update(self): User, users = self.classes.User, self.tables.users sess = Session() john, jack, jill, jane = sess.query(User).order_by(User.id).all() sess.query(User).filter(User.age > 29).\ update({'age': User.age - 10}, synchronize_session='evaluate') eq_([john.age, jack.age, jill.age, jane.age], [25, 37, 29, 27]) eq_(sess.query(User.age).order_by( User.id).all(), list(zip([25, 37, 29, 27]))) sess.query(User).filter(User.age > 29).\ update({User.age: User.age - 10}, synchronize_session='evaluate') eq_([john.age, jack.age, jill.age, jane.age], [25, 27, 29, 27]) eq_(sess.query(User.age).order_by( User.id).all(), list(zip([25, 27, 29, 27]))) sess.query(User).filter(User.age > 27).\ update( {users.c.age_int: User.age - 10}, synchronize_session='evaluate') eq_([john.age, jack.age, jill.age, jane.age], [25, 27, 19, 27]) eq_(sess.query(User.age).order_by( User.id).all(), list(zip([25, 27, 19, 27]))) sess.query(User).filter(User.age == 25).\ update({User.age: User.age - 10}, synchronize_session='fetch') eq_([john.age, jack.age, jill.age, jane.age], [15, 27, 19, 27]) eq_(sess.query(User.age).order_by( User.id).all(), list(zip([15, 27, 19, 27])))
def scatter_time_vs_s(time, norm, point_labels, title): plt.figure() size = 100 for i, l in enumerate(sorted(norm.keys())): if l is not "fbpca": plt.scatter(time[l], norm[l], label=l, marker='o', c='b', s=size) for label, x, y in zip(point_labels, list(time[l]), list(norm[l])): plt.annotate(label, xy=(x, y), xytext=(0, -80), textcoords='offset points', ha='right', arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), va='bottom', size=11, rotation=90) else: plt.scatter(time[l], norm[l], label=l, marker='^', c='red', s=size) for label, x, y in zip(point_labels, list(time[l]), list(norm[l])): plt.annotate(label, xy=(x, y), xytext=(0, 30), textcoords='offset points', ha='right', arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), va='bottom', size=11, rotation=90) plt.legend(loc="best") plt.suptitle(title) plt.ylabel("norm discrepancy") plt.xlabel("running time [s]")
def combine_polys(poly_old, poly_new, mode='add'): '''Add, subtract, or intersect two polygons ''' # TODO: handle overlapping segments better if not mode: raise ValueError seg_old = gather_segments(poly_old) seg_new = gather_segments(poly_new) seg_new_all = break_segments(seg_new, seg_old) seg_old_all = break_segments(seg_old, seg_new) in_old = segments_in_polygon(seg_new_all, poly_old) in_new = segments_in_polygon(seg_old_all, poly_new) if mode == 'Add': keep_in_old = False keep_in_new = False elif mode == 'Subtract': keep_in_old = True keep_in_new = False else: keep_in_old = True keep_in_new = True seg_new = [seg for (seg, is_in_old) in zip(seg_new_all, in_old) if (is_in_old == keep_in_old)] seg_old = [seg for (seg, is_in_new) in zip(seg_old_all, in_new) if (is_in_new == keep_in_new)] seg = order_segments(seg_new + seg_old).tolist() return seg
def update(self): if self.plot_gnss and self.gnss != []: plt.figure(1) gnssT = zip(*self.gnss) gnss_plt = plot(gnssT[0],gnssT[1],'black') ref_gnssT = zip(*self.ref_gnss) ref_gnss_plt = plot(ref_gnssT[0],ref_gnssT[1],'b') if self.plot_pose and self.pose_pos != []: plt.figure(1) poseT = zip(*self.pose_pos) pose_plt = plot(poseT[0],poseT[1],'r') if self.plot_odometry and self.odo != []: plt.figure(2) odoT = zip(*self.odo) odo_plt = plot(odoT[0],odoT[1],'b') if self.plot_yaw: if self.odo_yaw != []: plt.figure(3) odo_yaw_plt = plot(self.odo_yaw,'b') if self.ahrs_yaw != []: plt.figure(3) ahrs_yaw_plt = plot(self.ahrs_yaw,'g') if self.gnss_yaw != []: plt.figure(3) gnss_yaw_plt = plot(self.gnss_yaw, 'black') if self.pose_yaw != []: plt.figure(3) pose_yaw_plt = plot(self.pose_yaw,'r') if self.plot_gnss or self.plot_pose or self.plot_odometry or self.plot_yaw: draw()
def nan_check(i, node, fn): """ Runs `fn` while checking its inputs and outputs for NaNs / Infs. Parameters ---------- i : Currently ignored. TODO: determine why it is here or remove). node : theano.gof.Apply The Apply node currently being executed. fn : callable The thunk to execute for this Apply node. """ inputs = fn.inputs for x, var in zip(inputs, node.inputs): # If the input is the result of computation, then we # don't need to check it. It is already done after the # computation. if (var.owner is None and getattr(var.tag, 'nan_guard_mode_check', True)): do_check_on(x[0], node, fn, True) fn() outputs = fn.outputs for x, var in zip(outputs, node.outputs): if getattr(var.tag, 'nan_guard_mode_check', True): do_check_on(x[0], node, fn, False)
def main(): # THIS CODE IS RUN ONE TIME ONLY (#SAVETHESERVERS) # For Champions: #api = RiotAPI('3e888957-13a3-4ba2-901c-fae3e421d998') #r = api.get_championList() #with open('testChampion.json', 'w') as outfile: # json.dump(r, outfile, indent=4) # -------------------------------------------------------- with open('testChampion.json', 'r') as data_file: data = json.load(data_file) championIds = [data['data'][championName]['id'] for championName in data['data']] championNames = [championName for championName in data['data']] championTitles = [data['data'][championName]['title'] for championName in data['data']] championReference = dict(zip(championIds, zip(championNames, championTitles))) with open('championReference.json', 'w') as outfile: json.dump(championReference, outfile, indent=4)
def reconstruct(self, t, x, y, z, core_x, core_y): """Reconstruct angles for many detections :param t: arrival times in the detectors in ns. :param x,y,z: positions of the detectors in m. :param core_x,core_y: core position at z = 0 in m. :return: theta as derived by Montanus2014, phi as derived by Montanus2014. """ if not logic_checks(t, x, y, z): return nan, nan regress2d = RegressionAlgorithm() theta, phi = regress2d.reconstruct_common(t, x, y) dtheta = 1. iteration = 0 while dtheta > 0.001: iteration += 1 if iteration > self.MAX_ITERATIONS: return nan, nan nxnz = tan(theta) * cos(phi) nynz = tan(theta) * sin(phi) nz = cos(theta) x_proj = [xi - zi * nxnz for xi, zi in zip(x, z)] y_proj = [yi - zi * nynz for yi, zi in zip(y, z)] t_proj = [ti + zi / (c * nz) - self.time_delay(xpi, ypi, core_x, core_y, theta, phi) for ti, xpi, ypi, zi in zip(t, x_proj, y_proj, z)] theta_prev = theta theta, phi = regress2d.reconstruct_common(t_proj, x_proj, y_proj) dtheta = abs(theta - theta_prev) return theta, phi
def discrepancy(observed, simulated, expected): """Calculates Freeman-Tukey statistics (Freeman and Tukey 1950) as a measure of discrepancy between observed and r replicates of simulated data. This is a convenient method for assessing goodness-of-fit (see Brooks et al. 2000). D(x|\theta) = \sum_j (\sqrt{x_j} - \sqrt{e_j})^2 :Parameters: observed : Iterable of observed values (length n) simulated : Iterable of simulated values (length rxn) expected : Iterable of expected values (length rxn) :Returns: D_obs : Discrepancy of observed values D_sim : Discrepancy of simulated values """ try: simulated = simulated.astype(float) except AttributeError: simulated = simulated.trace().astype(float) try: expected = expected.astype(float) except AttributeError: expected = expected.trace().astype(float) D_obs = np.sum([(np.sqrt(observed)-np.sqrt(e))**2 for e in expected], 1) D_sim = np.sum([(np.sqrt(s)-np.sqrt(e))**2 for s,e in zip(simulated, expected)], 1) # Print p-value count = sum(s>o for o,s in zip(D_obs,D_sim)) print_('Bayesian p-value: p=%.3f' % (1.*count/len(D_obs))) return D_obs, D_sim
def save_results(self): """ Saves the bit_error_probability and corresponding variance in a csv file Saves the codewords, their transmissions, and their resulting decoding in files separted by channel noise variance :return: """ self.save_time = epoch_time = str(int(time.time())) if not os.path.exists("./stats/advanced-run"): os.makedirs("./stats/advanced-run") m = "sum-prod" if self.mode == Decoder.SUM_PROD else "max-prod" with open("stats/advanced-run/%(time)s-%(mode)s-%(num_codewords)s-codewords-variance-bit_error_probability.csv" % { "time": epoch_time, "mode": m, "num_codewords": self.iterations}, 'wb') as result_csv: writer = csv.writer(result_csv) writer.writerow(["variance", "bit_error_probability"]) for v, error in zip(self.variance_levels, self.bit_error_probability): writer.writerow([v, error]) for i, v in enumerate(self.variance_levels): with open("stats/advanced-run/%(time)s-%(mode)s-%(num_codewords)s-codewords-variance-%(var)s-codewords-decoded.csv" % { "time": epoch_time, "mode": m, "num_codewords": str(self.iterations), "var": str(v)}, 'wb') as result_csv: writer = csv.writer(result_csv) writer.writerow(["codeword", "decoded", "transmission"]) for codeword, transmission, decoded in zip(self.codewords[i], self.transmissions[i], self.decoded[i]): writer.writerow([''.join(str(elem) for elem in codeword), ''.join(str(elem) for elem in decoded), ' '.join(str(elem) for elem in transmission)])
def test_scale(self): ls = load_wkt('LINESTRING(240 400 10, 240 300 30, 300 300 20)') # test defaults of 1.0 sls = transform.scale(ls) self.assertTrue(sls.equals(ls)) # different scaling in different dimensions sls = transform.scale(ls, 2, 3, 0.5) els = load_wkt('LINESTRING(210 500 5, 210 200 15, 330 200 10)') self.assertTrue(sls.equals(els)) # Do explicit 3D check of coordinate values for a, b in zip(sls.coords, els.coords): for ap, bp in zip(a, b): self.assertEqual(ap, bp) # retest with named parameters for the same result sls = transform.scale(geom=ls, xfact=2, yfact=3, zfact=0.5, origin='center') self.assertTrue(sls.equals(els)) ## other `origin` parameters # around the centroid sls = transform.scale(ls, 2, 3, 0.5, origin='centroid') els = load_wkt('LINESTRING(228.75 537.5, 228.75 237.5, 348.75 237.5)') self.assertTrue(sls.equals(els)) # around the second coordinate tuple sls = transform.scale(ls, 2, 3, 0.5, origin=ls.coords[1]) els = load_wkt('LINESTRING(240 600, 240 300, 360 300)') self.assertTrue(sls.equals(els)) # around some other 3D Point of origin sls = transform.scale(ls, 2, 3, 0.5, origin=Point(100, 200, 1000)) els = load_wkt('LINESTRING(380 800 505, 380 500 515, 500 500 510)') self.assertTrue(sls.equals(els)) # Do explicit 3D check of coordinate values for a, b in zip(sls.coords, els.coords): for ap, bp in zip(a, b): self.assertEqual(ap, bp)
def _testDistribution(self, initial_known): classes = np.random.randint(5, size=(20000,)) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] initial_dist = [0.2] * 5 if initial_known else None iterator = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle( 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply( resampling.rejection_resample( target_dist=target_dist, initial_dist=initial_dist, class_func=lambda c, _: c, seed=27)).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() with self.test_session() as sess: sess.run(init_op) returned = [] with self.assertRaises(errors.OutOfRangeError): while True: returned.append(sess.run(get_next)) returned_classes, returned_classes_and_data = zip(*returned) _, returned_data = zip(*returned_classes_and_data) self.assertAllEqual([compat.as_bytes(str(c)) for c in returned_classes], returned_data) total_returned = len(returned_classes) # Subsampling rejects a large percentage of the initial data in # this case. self.assertGreater(total_returned, 20000 * 0.2) class_counts = np.array([ len([True for v in returned_classes if v == c]) for c in range(5)]) returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2)
def add_lines(self, levels, colors, linewidths, erase=True): """ Draw lines on the colorbar. *colors* and *linewidths* must be scalars or sequences the same length as *levels*. Set *erase* to False to add lines without first removing any previously added lines. """ y = self._locate(levels) igood = (y < 1.001) & (y > -0.001) y = y[igood] if cbook.iterable(colors): colors = np.asarray(colors)[igood] if cbook.iterable(linewidths): linewidths = np.asarray(linewidths)[igood] N = len(y) x = np.array([0.0, 1.0]) X, Y = np.meshgrid(x, y) if self.orientation == "vertical": xy = [list(zip(X[i], Y[i])) for i in range(N)] else: xy = [list(zip(Y[i], X[i])) for i in range(N)] col = collections.LineCollection(xy, linewidths=linewidths) if erase and self.lines: for lc in self.lines: lc.remove() self.lines = [] self.lines.append(col) col.set_color(colors) self.ax.add_collection(col)
def getcallargs(func, *positional, **named): """TFDecorator-aware replacement for inspect.getcallargs. Args: func: A callable, possibly decorated *positional: The positional arguments that would be passed to `func`. **named: The named argument dictionary that would be passed to `func`. Returns: A dictionary mapping `func`'s named arguments to the values they would receive if `func(*positional, **named)` were called. `getcallargs` will use the argspec from the outermost decorator that provides it. If no attached decorators modify argspec, the final unwrapped target's argspec will be used. """ argspec = getargspec(func) call_args = named.copy() this = getattr(func, 'im_self', None) or getattr(func, '__self__', None) if ismethod(func) and this: positional = (this,) + positional remaining_positionals = [arg for arg in argspec.args if arg not in call_args] call_args.update(dict(zip(remaining_positionals, positional))) default_count = 0 if not argspec.defaults else len(argspec.defaults) if default_count: for arg, value in zip(argspec.args[-default_count:], argspec.defaults): if arg not in call_args: call_args[arg] = value return call_args
def dup_add(f, g, K): """ Add dense polynomials in ``K[x]``. Examples ======== >>> from sympy.polys import ring, ZZ >>> R, x = ring("x", ZZ) >>> R.dup_add(x**2 - 1, x - 2) x**2 + x - 3 """ if not f: return g if not g: return f df = dup_degree(f) dg = dup_degree(g) if df == dg: return dup_strip([ a + b for a, b in zip(f, g) ]) else: k = abs(df - dg) if df > dg: h, f = f[:k], f[k:] else: h, g = g[:k], g[k:] return h + [ a + b for a, b in zip(f, g) ]
def test_add_patch_info(): """Test adding patch info to source space.""" # let's setup a small source space src = read_source_spaces(fname_small) src_new = read_source_spaces(fname_small) for s in src_new: s['nearest'] = None s['nearest_dist'] = None s['pinfo'] = None # test that no patch info is added for small dist_limit try: add_source_space_distances(src_new, dist_limit=0.00001) except RuntimeError: # what we throw when scipy version is wrong pass else: assert all(s['nearest'] is None for s in src_new) assert all(s['nearest_dist'] is None for s in src_new) assert all(s['pinfo'] is None for s in src_new) # now let's use one that works add_source_space_distances(src_new) for s1, s2 in zip(src, src_new): assert_array_equal(s1['nearest'], s2['nearest']) assert_allclose(s1['nearest_dist'], s2['nearest_dist'], atol=1e-7) assert_equal(len(s1['pinfo']), len(s2['pinfo'])) for p1, p2 in zip(s1['pinfo'], s2['pinfo']): assert_array_equal(p1, p2)
def var_fit(var_mean,var_name): size_list = [] throuput_list = [] for var in var_mean: get_per,set_per = normal_ops get_per_s = "%.2f" % get_per set_per_s = "%.2f" % set_per if (var_name=='key'): upperlimit = 64 filename = 'data_collected/'+str(var)+'_'+str(normal_value)+'_'+str(normal_hash)+'_'+get_per_s+'_'+set_per_s+'.out' elif (var_name=='value'): upperlimit = 2048 filename = 'data_collected/'+str(normal_key)+'_'+str(var)+'_'+str(normal_hash)+'_'+get_per_s+'_'+set_per_s+'.out' with open(filename,'r') as f: load, time = zip(*[(int(line.strip().split(',')[0]), float(line.strip().split(',')[1])) for line in f]) z = np.polyfit(time,load,2) p = np.poly1d(z) size = var throuput = p(1) size_list.append(size) throuput_list.append(throuput) #Record raw data point with open ('data_collected/'+var_name+'_throuput','w') as g: for size,throuput in zip(size_list,throuput_list): g.write(str(size)+','+str(throuput)+'\n') #Recide fit data point z = np.polyfit(np.array(size_list),np.array(throuput_list),1) p = np.poly1d(z) size_fit_list = [i for i in range(1,upperlimit)] throuput_fit_list = [p(i) for i in size_fit_list] with open ('data_collected/'+var_name+'_throuput_fit','w') as g: for size,throuput in zip(np.array(size_fit_list),np.array(throuput_fit_list)): g.write(str(size)+','+str(throuput)+'\n') var_plot(var_name,list(z))
def post(self): self._require_admin() additions = json.loads(self.request.get("youtube_additions_json")) match_keys, youtube_videos = zip(*additions["videos"]) matches = ndb.get_multi([ndb.Key(Match, match_key) for match_key in match_keys]) matches_to_put = [] results = {"existing": [], "bad_match": [], "added": []} for (match, match_key, youtube_video) in zip(matches, match_keys, youtube_videos): if match: if youtube_video not in match.youtube_videos: match.youtube_videos.append(youtube_video) match.dirty = True # hacky matches_to_put.append(match) results["added"].append(match_key) else: results["existing"].append(match_key) else: results["bad_match"].append(match_key) MatchManipulator.createOrUpdate(matches_to_put) self.template_values.update({ "results": results, }) path = os.path.join(os.path.dirname(__file__), '../../templates/admin/videos_add.html') self.response.out.write(template.render(path, self.template_values))
def plotStars(data, pos): n = len(data) #pos = [0.6, 0.68, 0.75, 0.82, 0.97] d = -0.001 # side by side for i, j in zip(xrange(1, n), xrange(n-1)): if data[i, j] != 0: ax.plot([j+width-0.15, i+0.15], [pos[0], pos[0]], linewidth = 2, color = 'black') ax.text(j+width+0.15, pos[0]+d, "*"*data[i,j]) # two side for i,j in zip(xrange(2, n), xrange(n-2)): if data[i,j] != 0 and j%2 != 0: ax.plot([j+width-0.15, i+0.15], [pos[1], pos[1]], linewidth = 2, color = 'black') ax.text(j+2*width+0.15, pos[1]+d, "*"*data[i,j]) elif data[i,j] != 0 and j%2 == 0: ax.plot([j+width-0.15, i+0.15], [pos[2], pos[2]], linewidth = 2, color = 'black') ax.text(j+2*width+0.15, pos[2]+d, "*"*data[i,j]) # for i,j in zip(xrange(3, n), xrange(n-3)): if data[i,j] != 0 and j%2 != 0: ax.plot([j+width-0.15, i+0.15], [pos[3], pos[3]], linewidth = 2, color = 'black') ax.text(j+3*width+0.15, pos[3]+d, "*"*data[i,j]) elif data[i,j] != 0 and j%2 == 0: ax.plot([j+width-0.15, i+0.15], [pos[4], pos[4]], linewidth = 2, color = 'black') ax.text(j+3*width+0.15, pos[4]+d, "*"*data[i,j]) # for i,j in zip(xrange(4, n), xrange(n-4)): if data[i,j] != 0 and j%2 == 0: ax.plot([j+width-0.15, i+0.15], [pos[5], pos[5]], linewidth = 2, color = 'black') ax.text(j+4*width+0.15, pos[5]+d, "*"*data[i,j]) elif data[i,j] != 0 and j%2 != 0: ax.plot([j+width-0.15, i+0.15], [pos[6], pos[6]], linewidth = 2, color = 'black') ax.text(j+4*width+0.15, pos[6]+d, "*"*data[i,j])
def train(self, inp, out, training_weight=1.): inp = np.mat(inp).T out = np.mat(out).T deriv = [] val = inp vals = [val] # forward calculation of activations and derivatives for weight,bias in self.__weights: val = weight*val val += bias deriv.append(self.__derivative(val)) vals.append(self.__activation(val)) deriv = iter(reversed(deriv)) weights = iter(reversed(self.__weights)) errs = [] errs.append(np.multiply(vals[-1]-out, next(deriv))) # backwards propagation of errors for (w,b),d in zip(weights, deriv): errs.append(np.multiply(np.dot(w.T, errs[-1]), d)) weights = iter(self.__weights) for (w,b),v,e in zip(\ self.__weights,\ vals, reversed(errs)): e *= self.__learning_rate*training_weight w -= e*v.T b -= e tmp = vals[-1]-out return np.dot(tmp[0].T,tmp[0])*.5*training_weight
def get_mesh(oqparam): """ Extract the mesh of points to compute from the sites, the sites_csv, or the region. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance """ if oqparam.sites: lons, lats = zip(*sorted(oqparam.sites)) return geo.Mesh(numpy.array(lons), numpy.array(lats)) elif 'sites' in oqparam.inputs: csv_data = open(oqparam.inputs['sites'], 'U').read() coords = valid.coordinates( csv_data.strip().replace(',', ' ').replace('\n', ',')) lons, lats = zip(*sorted(coords)) return geo.Mesh(numpy.array(lons), numpy.array(lats)) elif oqparam.region: # close the linear polygon ring by appending the first # point to the end firstpoint = geo.Point(*oqparam.region[0]) points = [geo.Point(*xy) for xy in oqparam.region] + [firstpoint] try: return geo.Polygon(points).discretize(oqparam.region_grid_spacing) except: raise ValueError( 'Could not discretize region %(region)s with grid spacing ' '%(region_grid_spacing)s' % vars(oqparam)) elif 'site_model' in oqparam.inputs: coords = [(param.lon, param.lat) for param in get_site_model(oqparam)] lons, lats = zip(*sorted(coords)) return geo.Mesh(numpy.array(lons), numpy.array(lats))
def test_get_all_records_different_header(self): self.sheet.resize(6, 4) # put in new values, made from three lists rows = [["", "", "", ""], ["", "", "", ""], ["A1", "B1", "", "D1"], [1, "b2", 1.45, ""], ["", "", "", ""], ["A4", 0.4, "", 4]] cell_list = self.sheet.range('A1:D6') for cell, value in zip(cell_list, itertools.chain(*rows)): cell.value = value self.sheet.update_cells(cell_list) # first, read empty strings to empty strings read_records = self.sheet.get_all_records(head=3) d0 = dict(zip(rows[2], rows[3])) d1 = dict(zip(rows[2], rows[4])) d2 = dict(zip(rows[2], rows[5])) self.assertEqual(read_records[0], d0) self.assertEqual(read_records[1], d1) self.assertEqual(read_records[2], d2) # then, read empty strings to zeros read_records = self.sheet.get_all_records(empty2zero=True, head=3) d1 = dict(zip(rows[2], (0, 0, 0, 0))) self.assertEqual(read_records[1], d1)
def dup_sub(f, g, K): """ Subtract dense polynomials in ``K[x]``. Examples ======== >>> from sympy.polys import ring, ZZ >>> R, x = ring("x", ZZ) >>> R.dup_sub(x**2 - 1, x - 2) x**2 - x + 1 """ if not f: return dup_neg(g, K) if not g: return f df = dup_degree(f) dg = dup_degree(g) if df == dg: return dup_strip([ a - b for a, b in zip(f, g) ]) else: k = abs(df - dg) if df > dg: h, f = f[:k], f[k:] else: h, g = dup_neg(g[:k], K), g[k:] return h + [ a - b for a, b in zip(f, g) ]
def _update_classification_scores(self, configurations): """ Wraps the stacks, buffers and contextualized token data of all given configurations into a tensor which is then passed through the MLP to compute the classification scores in the configurations. :param configurations: List of not finished Configurations :return: Updated Configurations """ stacks = [c.stack_tensor for c in configurations] stacks_padded = pad_tensor_list(stacks) stacks_lengths = torch.tensor( [len(c.stack) for c in configurations], dtype=torch.int64, device=self.device ) buffers = [c.buffer_tensor for c in configurations] buffers_padded = pad_tensor_list(buffers) buffer_lengths = torch.tensor( [len(c.buffer) for c in configurations], dtype=torch.int64, device=self.device ) clf_transitions, clf_relations = self.model( sentences=torch.stack([c.sentence_features for c in configurations]), sentence_lengths=None, sentence_encoding_batch=[c.contextualized_input for c in configurations], buffers=buffers_padded, buffer_lengths=buffer_lengths, stacks=stacks_padded, stack_lengths=stacks_lengths, ) # Isolate the columns for the transitions left_arc = clf_transitions[:, T.LEFT_ARC.value].view(-1, 1) right_arc = clf_transitions[:, T.RIGHT_ARC.value].view(-1, 1) shift = clf_transitions[:, T.SHIFT.value].view(-1, 1) swap = clf_transitions[:, T.SWAP.value].view(-1, 1) # Isolate the columns for the different relations relation_slices = self.model.relations.slices shift_relations = clf_relations[:, relation_slices[T.SHIFT]] swap_relations = clf_relations[:, relation_slices[T.SWAP]] left_arc_relations = clf_relations[:, relation_slices[T.LEFT_ARC]] right_arc_relations = clf_relations[:, relation_slices[T.RIGHT_ARC]] # Add them in one batch shift_score_batch = torch.add(shift, shift_relations) swap_score_batch = torch.add(swap, swap_relations) left_arc_scores_batch = torch.add(left_arc, left_arc_relations) right_arc_scores_batch = torch.add(right_arc, right_arc_relations) # For the left and right arc scores, we're only interested in the # two best entries, so we extract then in one go. left_arc_scores_sorted, left_arc_scores_indices = torch.sort( left_arc_scores_batch, descending=True ) right_arc_scores_sorted, right_arc_scores_indices = torch.sort( right_arc_scores_batch, descending=True ) # Only take the best two items left_arc_scores_sorted = left_arc_scores_sorted[:, :2] right_arc_scores_sorted = right_arc_scores_sorted[:, :2] # We need them later in RAM, so retrieve them all at once from the gpu left_arc_scores_indices = left_arc_scores_indices[:, :2].cpu().numpy() right_arc_scores_indices = right_arc_scores_indices[:, :2].cpu().numpy() class Combination(NamedTuple): configuration: Configuration shift_score: torch.Tensor swap_score: torch.Tensor left_arc_scores: torch.Tensor left_arc_scores_indices: np.array left_arc_scores_sorted: torch.Tensor right_arc_scores: torch.Tensor right_arc_scores_indices: np.array right_arc_scores_sorted: torch.Tensor def apply(self): self.configuration.scores = { T.SHIFT: self.shift_score, T.SWAP: self.swap_score, T.LEFT_ARC: self.left_arc_scores, (T.LEFT_ARC, "best_scores"): self.left_arc_scores_sorted, (T.LEFT_ARC, "best_scores_indices"): self.left_arc_scores_indices, T.RIGHT_ARC: self.right_arc_scores, (T.RIGHT_ARC, "best_scores"): self.right_arc_scores_sorted, (T.RIGHT_ARC, "best_scores_indices"): self.right_arc_scores_indices, } combinations = zip( configurations, shift_score_batch, swap_score_batch, left_arc_scores_batch, left_arc_scores_indices, left_arc_scores_sorted, right_arc_scores_batch, right_arc_scores_indices, right_arc_scores_sorted, ) # Update the result of the classifiers in the configurations for combination in combinations: Combination(*combination).apply() return configurations, clf_transitions, clf_relations
def remove_one(model, batch, n_beams, indices, removed_indices, max_beam_size, target_s=None, target_e=None): n_examples = len(n_beams) has_label = False if target_s is not None and target_e is not None: has_label = True onehot_grad = get_onehot_grad(model, batch, target_s, target_e) else: onehot_grad = get_onehot_grad(model, batch) onehot_grad = onehot_grad.data.cpu().numpy() question = batch[5] question_mask = batch[6] question_lengths = [real_length(x) for x in question] new_batch = [] new_n_beams = [] new_indices = [] new_removed_indices = [] if has_label: new_target_s = [] new_target_e = [] start = 0 for example_idx in range(n_examples): if n_beams[example_idx] == 0: new_n_beams.append(0) continue coordinates = [] # i_in_batch, j_in_question # ignore PADs for i in range(start, start + n_beams[example_idx]): if real_length(question[i]) == 1: continue word_order = np.argsort(-onehot_grad[i][:question_lengths[i]]) coordinates += [(i, j) for j in word_order[:max_beam_size]] if len(coordinates) == 0: start += n_beams[example_idx] new_n_beams.append(0) continue coordinates = np.asarray(coordinates) scores = onehot_grad[coordinates[:, 0], coordinates[:, 1]] scores = sorted(zip(coordinates, scores), key=lambda x: -x[1]) coordinates = np.asarray([x for x, _ in scores[:max_beam_size]]) assert all(j < question_lengths[i] for i, j in coordinates) if not all(j < len(indices[i]) for i, j in coordinates): for i, j in coordinates: print('i', i) print('j', j) print('ql', question_lengths[i]) print('len(indices)', len(indices[i])) print(indices[i]) print() cnt = 0 for i, j in coordinates: # because stupid tensor doesn't support proper indexing q, qm = [], [] if j > 0: q.append(question[i][:j]) qm.append(question_mask[i][:j]) if j + 1 < question[i].shape[0]: q.append(question[i][j + 1:]) qm.append(question_mask[i][j + 1:]) if len(q) > 0: new_entry = [x[i] for x in batch] new_entry[5] = torch.cat(q, 0) new_entry[6] = torch.cat(qm, 0) new_batch.append(new_entry) new_removed_indices.append(removed_indices[i] + [indices[i][j]]) new_indices.append(indices[i][:j] + indices[i][j + 1:]) if has_label: new_target_s.append(target_s[i]) new_target_e.append(target_e[i]) cnt += 1 start += n_beams[example_idx] new_n_beams.append(cnt) new_batch = list(map(list, zip(*new_batch))) batch = [torch.stack(c, 0) for c in new_batch[:7]] batch += new_batch[7:] if has_label: new_target_s = torch.cat(new_target_s) new_target_e = torch.cat(new_target_e) return batch, new_n_beams, new_indices, new_removed_indices, \ new_target_s, new_target_e else: return batch, new_n_beams, new_indices, new_removed_indices
def enum(*sequential, **named): """Handy way to fake an enumerated type in Python http://stackoverflow.com/questions/36932/how-can-i-represent-an-enum-in-python """ enums = dict(zip(sequential, range(len(sequential))), **named) return type('Enum', (), enums)
def test_ref_table(self): tables = list(self.votable.iter_tables()) for x, y in zip(tables[0].array.data[0], tables[1].array.data[0]): assert_array_equal(x, y)
def getCellWidthData(queryfuel): """Given a queryfuel, return a list of dictionaries of all Cell Width data scraped from the detonation database. **Parameters:** queryfuel string, The text with which the website denotes a fuel you'd like to query. **Returns:** returndict dictionary, structured with a filename as a key, and its associated dataframe as a value. **Example:** :: c2h4data = getCellWidthData(queryfuel="C2H4") for _, (k, v) in enumerate(c2h4data.items()): print(f"This is the filename: | {k} | and here is the data:") print(v) print("") Output: :: ... This is the filename: | at172d.txt | and here is the data: Category Fuel Sub-Category Oxidizer Initial Pressure (kPa) Diluent Equivalence Ratio Initial Temperature (K) Cell Width (mm) 0 cell size C2H4 width Air 101.3 1 298.15 19.5 1 cell size C2H4 width Air 101.3 1 373.15 16.0 **Example:** :: c2h4data = getCellWidthData(queryfuel="C2H4") oxidiserdict = {} for _, (k, v) in enumerate(c2h4data.items()): oxidisers = list(v["Oxidizer"]) for oxidiser in oxidisers: if oxidiser in list(oxidiserdict.keys()): oxidiserdict[oxidiser] += 1 else: oxidiserdict[oxidiser] = 0 print("{'Oxidiser': Number of Hits} ==> ", oxidiserdict) Output: :: {'Oxidiser': Number of Hits} ==> {'O2': 92, 'Air': 53} """ urlext = _getCellWidthURLext(queryfuel=queryfuel) # Get the Cell width page and create a soup object of it page_cellwidths = requests.get(ROOT_URL + "html/" + urlext) soup = BeautifulSoup(page_cellwidths.content, "html.parser") # Create list of all available text files txtfiles = soup.find_all(string=lambda text: ".txt" in text) # Turn the list of discovered textfiles into a list of URL extensions urlext_list = [] for txtfile in txtfiles: urlext = list(soup.find("a", string=txtfile)["href"]) while (urlext.pop(0) != "/"): pass urlext_list.append("".join(urlext)) # Find all the relevant tables in the webpage and clean them up cellwidthdata_list = [] for table in soup.find_all("table"): # The tables we are interested in have no attributes if len(table.attrs) == 0: # Keep all the detected table elements in a list tablestrings = [] for string in table.strings: tablestrings.append(str(string)) # Clean up the table elements tablestrings = list( filter(lambda element: element != "\n", tablestrings)) for i in range(len(tablestrings)): # Remove leading '\n' characters while ("\n" in tablestrings[i]): tablestrings[i] = tablestrings[i][1:] # Remove leading and trailing spaces if len(tablestrings[i]) >= 2: tablestrings[i] = (tablestrings[i][1:] if tablestrings[i][0] == " " else tablestrings[i]) tablestrings[i] = (tablestrings[i][:-1] if tablestrings[i][-1] == " " else tablestrings[i]) # Remove trailing colons tablestrings[i] = (tablestrings[i][:-1] if tablestrings[i][-1] == ":" else tablestrings[i]) # Cleaning up values if i % 2 == 1: # Check if value had units, and move units to the key stringcomponents = tablestrings[i].split(" ") if is_number(stringcomponents[0]) is True: if stringcomponents[-1].isalpha() is True: tablestrings[i - 1] += f" ({stringcomponents[-1]})" tablestrings[i] = float(stringcomponents[0]) # Check if value is a range, and mark for omission if it is val = "".join( stringcomponents[:-1]) if stringcomponents[-1].isalpha( ) == True else "".join(stringcomponents) if "-" in val: if all(is_number(elem) for elem in val.split("-")) == True: tablestrings[i - 1] = "*delete" tablestrings[i] = "*delete" # Delete keys and values marked for deletion tablestrings = list( filter(lambda element: element != "*delete", tablestrings)) # Package every element of tablestrings in alternating dictionary keys and values cellwidthdata_list.append( dict(zip(tablestrings[::2], tablestrings[1::2]))) # Package each dictionary of table data collected with the csv data that goes with it for i in range(len(cellwidthdata_list)): pandasdf = _pandascleaner( pd.read_table(f"{ROOT_URL}{urlext_list[i]}", delimiter=",", dtype=float)) cellwidthdata_list[i] = _recastaspandas( basepandadf=pandasdf, dicttorecast=cellwidthdata_list[i]) returndict = dict(zip(txtfiles, cellwidthdata_list)) return returndict
def dEdt_ej(self, r0, v_orb, v_cut=-1, n_kick=N_KICK, correction=np.ones(N_GRID)): """Calculate carried away by particles which are completely unbound. Parameters: - r0 : radial position of the perturbing body [pc] - v_orb: orbital velocity [km/s] - v_cut: optional, only scatter with particles slower than v_cut [km/s] defaults to v_max(r) (i.e. all particles) - n_kick: optional, number of grid points to use when integrating over Delta-eps (defaults to N_KICK = 100). """ if v_cut < 0: v_cut = self.v_max(r0) T_orb = (2 * np.pi * r0 * pc_to_km) / v_orb dE = np.zeros(N_GRID) # Calculate sizes of kicks and corresponding weights for integration if n_kick == 1: # Replace everything by the average if n_kick = 1 delta_eps_list = ( -2 * v_orb ** 2 * np.log(1 + self.Lambda ** 2) / self.Lambda ** 2, ) frac_list = (1,) else: b_list = np.geomspace(self.b_min(v_orb), self.b_max(v_orb), n_kick) delta_eps_list = self.delta_eps_of_b(v_orb, b_list) # Step size for trapezoidal integration step = delta_eps_list[1:] - delta_eps_list[:-1] step = np.append(step, 0) step = np.append(0, step) # Make sure that the integral is normalised correctly renorm = np.trapz(self.P_delta_eps(v_orb, delta_eps_list), delta_eps_list) frac_list = 0.5 * (step[:-1] + step[1:]) / renorm # Sum over the kicks for delta_eps, b, frac in zip(delta_eps_list, b_list, frac_list): # Maximum impact parameter which leads to the ejection of particles b_ej_sq = self.b_90(v_orb) ** 2 * ((2 * v_orb ** 2 / self.eps_grid) - 1) # Define which energies are allowed to scatter mask = ( (self.eps_grid > self.psi(r0) * (1 - b / r0) - 0.5 * v_cut ** 2) & (self.eps_grid < self.psi(r0) * (1 + b / r0)) & (b ** 2 < b_ej_sq) ) r_eps = G_N * self.M_BH / self.eps_grid[mask] r_cut = G_N * self.M_BH / (self.eps_grid[mask] + 0.5 * v_cut ** 2) if np.sum(mask) > 0: L1 = np.minimum((r0 - r0 ** 2 / r_eps) / b, 0.999999) alpha1 = np.arccos(L1) L2 = np.maximum((r0 - r0 ** 2 / r_cut) / b, -0.999999) alpha2 = np.arccos(L2) m = (2 * b / r0) / (1 - (r0 / r_eps) + b / r0) mask1 = (m <= 1) & (alpha2 > alpha1) mask2 = (m > 1) & (alpha2 > alpha1) N1 = np.zeros(len(m)) if np.sum(mask1) > 0: N1[mask1] = ellipe(m[mask1]) - ellipeinc( (np.pi - alpha2[mask1]) / 2, m[mask1] ) if np.sum(mask2) > 0: N1[mask2] = ellipeinc_alt((np.pi - alpha1[mask2]) / 2, m[mask2]) dE[mask] += ( -frac * correction[mask] * self.f_eps[mask] * (1 + b ** 2 / self.b_90(v_orb) ** 2) ** 2 * np.sqrt(1 - r0 / r_eps + b / r0) * N1 * (self.eps_grid[mask] + delta_eps) ) norm = ( 2 * np.sqrt(2 * (self.psi(r0))) * 4 * np.pi ** 2 * r0 * (self.b_90(v_orb) ** 2 / (v_orb) ** 2) ) return norm * np.trapz(dE, self.eps_grid) / T_orb
def dfdt_plus(self, r0, v_orb, v_cut=-1, n_kick=1, correction=1): """Particles to add back into distribution function from E - dE -> E.""" if v_cut < 0: v_cut = self.v_max(r0) T_orb = (2 * np.pi * r0 * pc_to_km) / v_orb df = np.zeros(N_GRID) # Calculate sizes of kicks and corresponding weights for integration if n_kick == 1: # Replace everything by the average if n_kick = 1 delta_eps_list = ( -2 * v_orb ** 2 * np.log(1 + self.Lambda ** 2) / self.Lambda ** 2, ) frac_list = (1,) else: b_list = np.geomspace(self.b_min(v_orb), self.b_max(v_orb), n_kick) delta_eps_list = self.delta_eps_of_b(v_orb, b_list) # Step size for trapezoidal integration step = delta_eps_list[1:] - delta_eps_list[:-1] step = np.append(step, 0) step = np.append(0, step) # Make sure that the integral is normalised correctly renorm = np.trapz(self.P_delta_eps(v_orb, delta_eps_list), delta_eps_list) frac_list = 0.5 * (step[:-1] + step[1:]) / renorm # Sum over the kicks for delta_eps, b, frac in zip(delta_eps_list, b_list, frac_list): # Value of specific energy before the kick eps_old = self.eps_grid - delta_eps # Define which energies are allowed to scatter mask = (eps_old > self.psi(r0) * (1 - b / r0) - 0.5 * v_cut ** 2) & ( eps_old < self.psi(r0) * (1 + b / r0) ) # Sometimes, this mask has no non-zero entries if np.sum(mask) > 0: r_eps = G_N * self.M_BH / eps_old[mask] r_cut = G_N * self.M_BH / (eps_old[mask] + 0.5 * v_cut ** 2) # Distribution of particles before they scatter f_old = self.interpolate_DF(eps_old[mask], correction) L1 = np.minimum((r0 - r0 ** 2 / r_eps) / b, 0.999999) alpha1 = np.arccos(L1) L2 = np.maximum((r0 - r0 ** 2 / r_cut) / b, -0.999999) alpha2 = np.arccos(L2) m = (2 * b / r0) / (1 - (r0 / r_eps) + b / r0) mask1 = (m <= 1) & (alpha2 > alpha1) mask2 = (m > 1) & (alpha2 > alpha1) N1 = np.zeros(len(m)) if np.sum(mask1) > 0: N1[mask1] = ellipe(m[mask1]) - ellipeinc( (np.pi - alpha2[mask1]) / 2, m[mask1] ) if np.sum(mask2) > 0: N1[mask2] = ellipeinc_alt( (np.pi - alpha1[mask2]) / 2, m[mask2] ) # - ellipeinc_alt((np.pi - alpha2[mask2])/2, m[mask2]) df[mask] += ( frac * f_old * (1 + b ** 2 / self.b_90(v_orb) ** 2) ** 2 * np.sqrt(1 - r0 / r_eps + b / r0) * N1 ) T_orb = (2 * np.pi * r0 * pc_to_km) / v_orb norm = ( 2 * np.sqrt(2 * (self.psi(r0))) * 4 * np.pi ** 2 * r0 * (self.b_90(v_orb) ** 2 / (v_orb) ** 2) ) return norm * df / T_orb / self.DoS
"Value".center(12),)) print(25 * "-" + ("|" + "-" * 14) * 1) for key, value in arguments.items(): print("%s \t | %s " % (str(key).ljust(16), str(value).strip().center(12))) print("") print("Sampling algorithm performance:") print("===============================") print("Results are averaged over %s repetition(s)." % opts.n_times) print("") fig = plt.figure('scikit-learn sample w/o replacement benchmark results') plt.title("n_population = %s, n_times = %s" % (opts.n_population, opts.n_times)) ax = fig.add_subplot(111) for name in sampling_algorithm: ax.plot(ratio, time[name], label=name) ax.set_xlabel('ratio of n_sample / n_population') ax.set_ylabel('Time (s)') ax.legend() # Sort legend labels handles, labels = ax.get_legend_handles_labels() hl = sorted(zip(handles, labels), key=operator.itemgetter(1)) handles2, labels2 = zip(*hl) ax.legend(handles2, labels2, loc=0) plt.show()
insert_character = """ INSERT INTO charactercreator_character (name, level, exp, hp, strength, intelligence, dexterity,wisdom) VALUES """ + str(character[1:]) + ";" pg_curs.execute(insert_character) #showing the table we just made in elephent sql pg_curs.execute('SELECT * FROM charactercreator_character;') #example to show everything has been updated to elephentsql! pg_curs.fetchall() #closing and commiting to save changes pg_curs.close() pg_conn.commit() #now reopening the connection to check for errors! pg_curs = pg_conn.cursor() pg_curs.execute('SELECT * from charactercreator_character;') pg_characters = pg_curs.fetchall() #first row in sqlite characters[0] #first row in sqelephant pg_characters[0] #writing to verify that entries all coppied over accurately! for character, pg_character in zip(characters, pg_characters): assert character == pg_character
def generate_genetic_model(self): start_time = time.time() gen_structure = self.gen_structure # All variables v_len = len(self.gen_structure.vertex_var) p_len = len(self.gen_structure.p_var) N = v_len + p_len pop_num = 100 num_of_random_el = 6 max_num_step = 10000 # ======================== # I. Initial population for p and v # ======================== parent_population_v = numpy.zeros(shape=(pop_num, v_len)) parent_population_p = numpy.zeros(shape=(pop_num, p_len)) for i in range(0, pop_num): # parent_population_v[i, :] = self._create_smart_random_solution(v_len, self.T) # parent_population_v[i, :] = self._create_random_solution(v_len, self.T) parent_population_v[i, :] = self._create_fixed_solution() # parent_population_p[i, :] = self._create_random_solution(p_len, 2) num_steps = 0 min_objective = sys.maxint while num_steps < max_num_step: # =========================== # II. Offspring population: uniform crossover of two vectors + mutation # =========================== offspring_population_v = numpy.zeros(shape=(pop_num, v_len)) offspring_population_p = numpy.zeros(shape=(pop_num, p_len)) i = 0 final_population = None # crossover = "uniform" # crossover = "cross" crossover = "part" while i < pop_num: parent_population = numpy.hstack((parent_population_v, parent_population_p)) parent_1 = self._get_parent(parent_population, num_of_random_el) parent_2 = self._get_parent(parent_population, num_of_random_el) parent_v_1 = parent_1[:v_len] parent_v_2 = parent_2[:v_len] parent_p_1 = parent_1[v_len:] parent_p_2 = parent_2[v_len:] if crossover == "cross": offspring_v_1, offspring_v_2 = self._generate_offspring_cross(parent_v_1, parent_v_2, self.T) # offspring_p_1, offspring_p_2 = self._generate_offspring_cross(parent_p_1, parent_p_2, 2) offspring_population_v[i, :] = offspring_v_1 offspring_population_v[i + 1, :] = offspring_v_2 # offspring_population_p[i, :] = offspring_p_1 # offspring_population_p[i + 1, :] = offspring_p_2 i += 2 elif crossover == "uniform": offspring_v_1 = self._generate_offspring_uniform(parent_v_1, parent_v_2, self.T) offspring_p_1 = self._generate_offspring_uniform(parent_p_1, parent_p_2, 2) offspring_population_v[i, :] = offspring_v_1 offspring_population_p[i, :] = offspring_p_1 i += 1 elif crossover == "part": offspring_v_1 = self._generate_offspring_uniform_part(parent_v_1, parent_v_2, self.T) # offspring_p_1 = self._generate_offspring_uniform_part(parent_p_1, parent_p_2, 2) offspring_population_v[i, :] = offspring_v_1 # offspring_population_p[i, :] = offspring_p_1 i += 1 offspring_population = numpy.hstack((offspring_population_v, offspring_population_p)) obj_func_value = numpy.apply_along_axis(self._objective_function, axis=1, arr=offspring_population) min_objective = min(obj_func_value) print min_objective if abs(min_objective) < 5: final_population = offspring_population argmin_obj_value = obj_func_value.argmin() break parent_population_v = offspring_population_v parent_population_p = offspring_population_p num_steps += 1 vp_final = final_population[argmin_obj_value] vertex_var = gen_structure.vertex_var p_var = gen_structure.p_var v_final = vp_final[:v_len] p_final = vp_final[v_len:] print "Objective function: " + str(min_objective) print("--- %s seconds ---" % (time.time() - start_time)) for vertex, val in zip(vertex_var.values(), v_final.tolist()): print vertex + "\t" + str(val) print "\n" for vertex, val in zip(p_var.values(), p_final.tolist()): print vertex + "\t" + str(val) pass
def visualize_dataset(dataset, train_classes=None, train_data=None, train_filenames=None, train_labels=None, test_classes=None, test_data=None, test_filenames=None, test_labels=None, visualize_hexagonal=None, create_h5=False, verbosity_level=2): Hexnet_print(f'Visualizing dataset {dataset}') start_time = time() if create_h5: dataset = f'{dataset}_visualized.h5' create_dataset_h5(dataset, train_classes, train_data, train_filenames, train_labels, test_classes, test_data, test_filenames, test_labels) elif os.path.isfile(dataset) and dataset.lower().endswith('.csv'): dataset_visualized = f'{dataset}_visualized' with open(dataset) as dataset_file: dataset_reader = csv.reader(dataset_file) dataset_data = list(dataset_reader)[1:] for label, filename, data in tqdm(dataset_data): current_output_dir = os.path.join(dataset_visualized, label) os.makedirs(current_output_dir, exist_ok=True) with open(os.path.join(current_output_dir, filename), 'w') as current_data_file: print(data.replace('"', ''), file=current_data_file) else: dataset_visualized = f'{dataset}_visualized' if os.path.isfile(dataset) and dataset.lower().endswith('.h5'): for current_class in train_classes: os.makedirs(os.path.join(dataset_visualized, 'train', current_class), exist_ok=True) for current_class in test_classes: os.makedirs(os.path.join(dataset_visualized, 'test', current_class), exist_ok=True) else: shutil.copytree(dataset, dataset_visualized, ignore=copytree_ignore_files) for current_set, current_data, current_filenames, current_labels in \ zip(('train', 'test'), (train_data, test_data), (train_filenames, test_filenames), (train_labels, test_labels)): if verbosity_level >= 1: Hexnet_print(f'\t> current_set={current_set}') if not current_data.size: continue for file, filename, label in zip(tqdm(current_data), current_filenames, current_labels): filename = os.path.join(dataset_visualized, current_set, label, filename) if verbosity_level >= 3: Hexnet_print(f'\t\t\t> filename={filename}') filename_lower = filename.lower() if filename_lower.endswith('.csv'): np.savetxt(filename, np.reshape(file, newshape=(1, file.shape[0])), delimiter=',') elif filename_lower.endswith('.npy'): np.save(filename, file) else: if not visualize_hexagonal: imsave(filename, file) else: filename = '.'.join(filename.split('.')[:-1]) visualize_hexarray(normalize_array(file), filename) time_diff = time() - start_time Hexnet_print(f'Visualized dataset {dataset} in {time_diff:.3f} seconds')
def select(self, population, discard_percent=0, k=config.evolution.tournament_size): """Select individuals based on fitness sharing""" if config.evolution.evaluation.type == "spatial": return self.select_spatial(population) ### TOURNAMENT TEST # population_size = len(population.phenotypes()) # phenotypes = population.phenotypes() # selected = [] # for i in range(population_size): # p = np.random.choice(phenotypes, 3, replace=False).tolist() # p.sort(key=lambda x: x.fitness()) # selected.append([p[0], p[0]]) # return [selected] ### population_size = len(population.phenotypes()) species_selected = [] species_list = population.species_list average_species_fitness_list = [] for species in species_list[:]: species.remove_invalid() # discard invalid individuals if len(species) > 0: average_species_fitness_list.append(species.average_fitness()) else: species_list.remove(species) total_fitness = np.sum(average_species_fitness_list) # initialize raw sizes with equal proportion raw_sizes = [population_size / len(species_list)] * len(species_list) if total_fitness != 0: # calculate proportional sizes when total fitness is not zero raw_sizes = [ average_species_fitness / total_fitness * population_size for average_species_fitness in average_species_fitness_list ] sizes = tools.round_array(raw_sizes, max_sum=population_size, invert=True) for species_obj, size in zip(species_list, sizes): size = int(size) # discard the lowest-performing individuals species = species_obj.best_percent(1 - discard_percent) # tournament selection inside species selected = [] # ensure that the best was selected if config.evolution.speciation.keep_best and size > 0: selected.append([species[0]]) orig_species = list(species) for i in range(size - len(selected)): parents = [] for l in range(2): winner = None for j in range(k): random_index = np.random.randint(0, len(species)) if winner is None or species[random_index].fitness( ) < winner.fitness(): winner = species[random_index] del species[ random_index] # remove element to emulate draw without replacement if len( species ) == 0: # restore original list when there is no more individuals to draw species = list(orig_species) parents.append(winner) if config.evolution.crossover_rate == 0: # do not draw another individual from the population if there is no probability of crossover break selected.append(parents) species_selected.append(selected) return species_selected
col7.append('nyr') col8.append('yrb') col9.append('yre') col10.append('elev') col11.append('CT') col12.append('CN') col13.append('River_Name') col14.append('OCN') for i in range(300): col1.append(No[i]) col2.append(m2s_ratio[i]) col3.append(lonm[i]) col4.append(latm[i]) col5.append(area[i]) col6.append(Vol[i]) col7.append(nyr[i]) col8.append(yrb[i]) col9.append(yre[i]) col10.append(elev[i]) col11.append(CT[i]) col12.append(CN[i]) col13.append(River_Name[i]) col14.append(OCN[i]) rows = zip(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14) with open("top_300_rios.csv", "w") as output: for row in rows: writer = csv.writer(output, lineterminator='\n') writer.writerow(row)
def text_to_instance(self, question_text: str, passage_text: str, passage_tokens: List[Token], passage_sentence_tokens: List[List[Token]], numbers_in_passage: List[Any], number_words : List[str], number_indices: List[int], number_len: List[int], sentence_indices: List[int], question_id: str = None, passage_id: str = None, answer_annotations: List[Dict] = None ) -> Union[Instance, None]: # Tokenize question and passage question_tokens = self.tokenizer.tokenize(question_text) qlen = len(question_tokens) plen = len(passage_tokens) question_passage_tokens = [Token('[CLS]')] + question_tokens + [Token('[SEP]')] + passage_tokens question_passage_sentence_tokens = [[Token('[CLS]')] + question_tokens + [Token('[SEP]')] + sentence_tokens + [Token('[SEP]')] for sentence_tokens in passage_sentence_tokens] passage_sentence_tokens = [[Token('[CLS]')] + sentence_tokens + [Token('[SEP]')] for sentence_tokens in passage_sentence_tokens] question_passage_sentence_mask = [-1] * (qlen + 2) + sentence_indices if len(question_passage_tokens) > self.max_pieces - 1: question_passage_tokens = question_passage_tokens[:self.max_pieces - 1] question_passage_sentence_mask = question_passage_sentence_mask[:self.max_pieces - 1] passage_tokens = passage_tokens[:self.max_pieces - qlen - 3] sentence_indices = sentence_indices[:self.max_pieces - qlen - 3] plen = len(passage_tokens) number_indices, number_len, numbers_in_passage = \ clipped_passage_num(number_indices, number_len, numbers_in_passage, plen) question_passage_tokens += [Token('[SEP]')] question_passage_sentence_mask += [-1] number_indices = [index + qlen + 2 for index in number_indices] + [-1] # Not done in-place so they won't change the numbers saved for the passage number_len = number_len + [1] numbers_in_passage = numbers_in_passage + [0] number_tokens = [Token(str(number)) for number in numbers_in_passage] extra_number_tokens = [Token(str(num)) for num in self.extra_numbers] mask_indices = [0, qlen + 1, len(question_passage_tokens) - 1] fields: Dict[str, Field] = {} # Add feature fields question_passage_field = TextField(question_passage_tokens, self.token_indexers) fields["question_passage"] = question_passage_field fields["sentences_mask"] = ArrayField(np.array(question_passage_sentence_mask), padding_value=-1) fields["sentences_tokens"] = ListField([TextField(sentence_tokens, self.token_indexers) for sentence_tokens in passage_sentence_tokens]) fields["question_sentences_tokens"] = ListField([TextField(q_sentence_tokens, self.token_indexers) for q_sentence_tokens in question_passage_sentence_tokens]) number_token_indices = \ [ArrayField(np.arange(start_ind, start_ind + number_len[i]), padding_value=-1) for i, start_ind in enumerate(number_indices)] fields["number_indices"] = ListField(number_token_indices) numbers_in_passage_field = TextField(number_tokens, self.token_indexers) extra_numbers_field = TextField(extra_number_tokens, self.token_indexers) all_numbers_field = TextField(extra_number_tokens + number_tokens, self.token_indexers) mask_index_fields: List[Field] = [IndexField(index, question_passage_field) for index in mask_indices] fields["mask_indices"] = ListField(mask_index_fields) # Compile question, passage, answer metadata metadata = {"original_passage": passage_text, "original_question": question_text, "original_numbers": numbers_in_passage, "original_number_words": number_words, "extra_numbers": self.extra_numbers, "passage_tokens": passage_tokens, "sentence_indices": sentence_indices, "question_tokens": question_tokens, "question_passage_tokens": question_passage_tokens, "sentnces_mask": question_passage_sentence_mask, "passage_id": passage_id, "question_id": question_id} if answer_annotations: for annotation in answer_annotations: tokenized_spans = [[token.text for token in self.tokenizer.tokenize(answer)] for answer in annotation['spans']] annotation['spans'] = [tokenlist_to_passage(token_list) for token_list in tokenized_spans] # Get answer type, answer text, tokenize answer_type, answer_texts = DropReader.extract_answer_info_from_annotation(answer_annotations[0]) tokenized_answer_texts = [] num_spans = min(len(answer_texts), self.max_spans) for answer_text in answer_texts: answer_tokens = self.tokenizer.tokenize(answer_text) tokenized_answer_texts.append(' '.join(token.text for token in answer_tokens)) metadata["answer_annotations"] = answer_annotations metadata["answer_texts"] = answer_texts metadata["answer_tokens"] = tokenized_answer_texts # Find answer text in question and passage valid_question_spans = DropReader.find_valid_spans(question_tokens, tokenized_answer_texts) for span_ind, span in enumerate(valid_question_spans): valid_question_spans[span_ind] = (span[0] + 1, span[1] + 1) valid_passage_spans = DropReader.find_valid_spans(passage_tokens, tokenized_answer_texts) for span_ind, span in enumerate(valid_passage_spans): valid_passage_spans[span_ind] = (span[0] + qlen + 2, span[1] + qlen + 2) # Get target numbers target_numbers = [] for answer_text in answer_texts: number = self.word_to_num(answer_text) if number is not None: target_numbers.append(number) # Get possible ways to arrive at target numbers with add/sub valid_expressions: List[List[int]] = [] exp_strings = None if answer_type in ["number", "date"]: if self.exp_search == 'full': expressions = get_full_exp(list(enumerate(self.extra_numbers + numbers_in_passage)), target_numbers, self.operations, self.op_dict, self.max_depth) zipped = list(zip(*expressions)) if zipped: valid_expressions = list(zipped[0]) exp_strings = list(zipped[1]) elif self.exp_search == 'add_sub': valid_expressions = \ DropReader.find_valid_add_sub_expressions(self.extra_numbers + numbers_in_passage, target_numbers, self.max_numbers_expression) elif self.exp_search == 'template': valid_expressions, exp_strings = \ get_template_exp(self.extra_numbers + numbers_in_passage, target_numbers, self.templates, self.template_strings) exp_strings = sum(exp_strings, []) # Get possible ways to arrive at target numbers with counting valid_counts: List[int] = [] if answer_type in ["number"]: numbers_for_count = list(range(self.max_count + 1)) valid_counts = DropReader.find_valid_counts(numbers_for_count, target_numbers) # Update metadata with answer info answer_info = {"answer_passage_spans": valid_passage_spans, "answer_question_spans": valid_question_spans, "num_spans": num_spans, "expressions": valid_expressions, "counts": valid_counts} if self.exp_search in ['template', 'full']: answer_info['expr_text'] = exp_strings metadata["answer_info"] = answer_info # Add answer fields passage_span_fields: List[Field] = [SpanField(span[0], span[1], question_passage_field) for span in valid_passage_spans] if not passage_span_fields: passage_span_fields.append(SpanField(-1, -1, question_passage_field)) fields["answer_as_passage_spans"] = ListField(passage_span_fields) question_span_fields: List[Field] = [SpanField(span[0], span[1], question_passage_field) for span in valid_question_spans] if not question_span_fields: question_span_fields.append(SpanField(-1, -1, question_passage_field)) fields["answer_as_question_spans"] = ListField(question_span_fields) if self.exp_search == 'add_sub': add_sub_signs_field: List[Field] = [] extra_signs_field: List[Field] = [] for signs_for_one_add_sub_expressions in valid_expressions: extra_signs = signs_for_one_add_sub_expressions[:len(self.extra_numbers)] normal_signs = signs_for_one_add_sub_expressions[len(self.extra_numbers):] add_sub_signs_field.append(SequenceLabelField(normal_signs, numbers_in_passage_field)) extra_signs_field.append(SequenceLabelField(extra_signs, extra_numbers_field)) if not add_sub_signs_field: add_sub_signs_field.append(SequenceLabelField([0] * len(number_tokens), numbers_in_passage_field)) if not extra_signs_field: extra_signs_field.append(SequenceLabelField([0] * len(self.extra_numbers), extra_numbers_field)) fields["answer_as_expressions"] = ListField(add_sub_signs_field) if self.extra_numbers: fields["answer_as_expressions_extra"] = ListField(extra_signs_field) elif self.exp_search in ['template', 'full']: expression_indices = [] for expression in valid_expressions: if not expression: expression.append(3 * [-1]) expression_indices.append(ArrayField(np.array(expression), padding_value=-1)) if not expression_indices: expression_indices = \ [ArrayField(np.array([3 * [-1]]), padding_value=-1) for _ in range(len(self.templates))] fields["answer_as_expressions"] = ListField(expression_indices) count_fields: List[Field] = [LabelField(count_label, skip_indexing=True) for count_label in valid_counts] if not count_fields: count_fields.append(LabelField(-1, skip_indexing=True)) fields["answer_as_counts"] = ListField(count_fields) fields["num_spans"] = LabelField(num_spans, skip_indexing=True) fields["metadata"] = MetadataField(metadata) return Instance(fields)
def togpx(self, gpx_version=DEF_GPX_VERSION, human_namespace=False): """Generate a GPX metadata element subtree >>> meta = _GpxMeta(time=(2008, 6, 3, 16, 12, 43, 1, 155, 0)) >>> ET.tostring(meta.togpx()) '<ns0:metadata xmlns:ns0="http://www.topografix.com/GPX/1/1"><ns0:time>2008-06-03T16:12:43+0000</ns0:time></ns0:metadata>' >>> meta.bounds = {"minlat": 52, "maxlat": 54, "minlon": -2, ... "maxlon": 1} >>> ET.tostring(meta.togpx()) '<ns0:metadata xmlns:ns0="http://www.topografix.com/GPX/1/1"><ns0:time>2008-06-03T16:12:43+0000</ns0:time><ns0:bounds maxlat="54" maxlon="1" minlat="52" minlon="-2" /></ns0:metadata>' >>> meta.bounds = [point.Point(52.015, -0.221), ... point.Point(52.167, 0.390)] >>> ET.tostring(meta.togpx()) # doctest: +ELLIPSIS '<ns0:metadata xmlns:ns0="http://www.topografix.com/GPX/1/1"><ns0:time>...</ns0:time><ns0:bounds maxlat="52.167" maxlon="0.39" minlat="52.015" minlon="-0.221" /></ns0:metadata>' :type gpx_version: ``str`` :param gpx_version: GPX version to generate :type human_namespace: ``bool`` :param human_namespace: Whether to generate output using human readable namespace prefixes :rtype: :class:`ET.Element` :return: GPX metadata element """ elementise = partial(create_elem, gpx_version=gpx_version, human_namespace=human_namespace) metadata = elementise("metadata", None) if self.name: metadata.append(elementise("name", None, self.name)) if self.desc: metadata.append(elementise("desc", None, self.desc)) if self.author: element = elementise("author", None) if self.author['name']: element.append(elementise("name", None, self.author['name'])) if self.author['email']: element.append(elementise("email", dict(zip(self.author['email'].split("@"), ("id", "domain"))))) if self.author['link']: element.append(elementise("link", None, self.author['link'])) metadata.append(element) if self.copyright: author = {"author": self.copyright['name']} if self.copyright['name'] else None element = elementise("copyright", author) if self.copyright['year']: element.append(elementise("year", None, self.copyright['year'])) if self.copyright['license']: license = elementise("license", None) element.append(license) metadata.append(element) if self.link: for link in self.link: if isinstance(link, basestring): element = elementise("link", {"href": link}) else: element = elementise("link", {"href": link["href"]}) if link['text']: element.append(elementise("text", None, link["text"])) if link['type']: element.append(elementise("type", None, link["type"])) metadata.append(element) element = elementise("time", None) if isinstance(self.time, (time.struct_time, tuple)): element.text = time.strftime("%Y-%m-%dT%H:%M:%SZ", self.time) # GPX documentation states, the Z on the end should be capital elif isinstance(self.time, utils.Timestamp): element.text = self.time.isoformat() else: element.text = time.strftime("%Y-%m-%dT%H:%M:%SZ") # GPX documentation states, the Z on the end should be capital metadata.append(element) if self.keywords: metadata.append(elementise("keywords", None, self.keywords)) if self.bounds: if not isinstance(self.bounds, dict): latitudes = list(map(attrgetter("latitude"), self.bounds)) longitudes = list(map(attrgetter("longitude"), self.bounds)) bounds = { "minlat": str(min(latitudes)), "maxlat": str(max(latitudes)), "minlon": str(min(longitudes)), "maxlon": str(max(longitudes)), } else: bounds = dict([(k, str(v)) for k, v in self.bounds.items()]) metadata.append(elementise("bounds", bounds)) if self.extensions: element = elementise("extensions") for i in self.extensions: element.append(i) metadata.append(extensions) return metadata
def create_dataset_overview(classes, train_labels, test_labels, dataset, output_dir): # Prepare dataset overview table: entries total_string = 'Total' unique, counts = np.unique(train_labels, return_counts=True) train_labels_unique_counts = dict(zip(unique, counts)) unique, counts = np.unique(test_labels, return_counts=True) test_labels_unique_counts = dict(zip(unique, counts)) labels_unique_counts = {key: train_value + test_value for (key, train_value), (_, test_value) in \ zip(train_labels_unique_counts.items(), test_labels_unique_counts.items())} train_labels_unique_counts_total = sum(train_labels_unique_counts.values()) test_labels_unique_counts_total = sum(test_labels_unique_counts.values()) labels_unique_counts_total = sum(labels_unique_counts.values()) entries_max_len = max( np.vectorize(len)(classes).max(), len(total_string), len(str(labels_unique_counts_total))) # Create dataset overview table: rows and columns total_string = total_string.rjust(entries_max_len) header_entries = '|'.join( [f' {c.rjust(entries_max_len)} ' for c in classes]) train_entries = '|'.join([ f' {str(v).rjust(entries_max_len)} ' for v in train_labels_unique_counts.values() ]) test_entries = '|'.join([ f' {str(v).rjust(entries_max_len)} ' for v in test_labels_unique_counts.values() ]) total_entries = '|'.join([ f' {str(v).rjust(entries_max_len)} ' for v in labels_unique_counts.values() ]) train_entries_total = str(train_labels_unique_counts_total).rjust( entries_max_len, ' ') test_entries_total = str(test_labels_unique_counts_total).rjust( entries_max_len, ' ') total_entries_total = str(labels_unique_counts_total).rjust( entries_max_len, ' ') header = '| Set \ Class |' + header_entries + '| ' + total_string + ' |' train = '| Train |' + train_entries + '| ' + train_entries_total + ' |' test = '| Test |' + test_entries + '| ' + test_entries_total + ' |' total = '| Total |' + total_entries + '| ' + total_entries_total + ' |' hline = len(header) * '-' dataset_overview = \ f'{hline}\n' \ f'{header}\n' \ f'{hline}\n' \ f'{train}\n' \ f'{test}\n' \ f'{hline}\n' \ f'{total}\n' \ f'{hline}' # Output dataset overview table Hexnet_print(f'Dataset overview\n{dataset_overview}') if output_dir: filename = os.path.join( output_dir, f'{os.path.basename(dataset)}_dataset_overview.dat') os.makedirs(output_dir, exist_ok=True) with open(filename, 'w') as file: print(dataset_overview, file=file)
results = face_client.face.identify(face_ids, PERSON_GROUP_ID) names = [] for result in results: candidates = sorted(result.candidates, key=lambda c: c.confidence, reverse=True) if len(candidates) > 0: top_candidate = candidates[0] person = face_client.person_group_person.get( PERSON_GROUP_ID, top_candidate.person_id) if top_candidate.confidence > .8: names.append(person.name) else: names.append(person.name) post = {'person': []} for name, face in zip(names, faces): emo = best_emotion(face.face_attributes.emotion) post['person'].append([name, emo]) print(f'Name: {name}, Emo: {emo}') if len(names) == 0: post['person'].append(['Stranger', emo]) r = requests.post(url, json=post) ''' img = Image.open('saved_img.jpg') # For each face returned use the face rectangle and draw a red box. draw = ImageDraw.Draw(img) for face in faces: draw.rectangle(getRectangle(face), outline='red') topLeft, botRight = getRectangle(face) tLeft, tTop = getTextLoc(face) draw.text((tLeft, tTop - 40),
def discriminator_2(x_onehot,encoder_state, batch_size, seq_len, vocab_size,_embed_ph,D_with_state = param.D_with_state,filter_dim = 100): # get the embedding dimension for each presentation #assert isinstance(emb_dim_single, int) and emb_dim_single > 0 filter_sizes = [2, 3, 4, 5] num_filters = [300, 300, 300, 300] dropout_keep_prob = 0.75 num_rep = 64 dis_emb_dim = param.INPUT_DIM emb_dim_single = int(dis_emb_dim / num_rep) d_embeddings = tf.get_variable(name='d_emb', shape=[vocab_size,dis_emb_dim], initializer=xavier_initializer())#changed from trainable = False # _embed_init = d_embeddings.assign(_embed_ph) #the embeddings of the discriminator is different with the one for generator input_x_re = tf.reshape(x_onehot, [-1, vocab_size]) emb_x_re = tf.matmul(input_x_re, d_embeddings) emb_x = tf.reshape(emb_x_re, [batch_size, seq_len, dis_emb_dim]) # batch_size x seq_len x dis_emb_dim emb_x_expanded = tf.expand_dims(emb_x, -1) # batch_size x seq_len x dis_emb_dim x 1 print('shape of emb_x_expanded: {}'.format(emb_x_expanded.get_shape().as_list())) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for filter_size, num_filter in zip(filter_sizes, num_filters): conv = conv2d(emb_x_expanded, num_filter, k_h=filter_size, k_w=emb_dim_single, d_h=1, d_w=emb_dim_single, padding='VALID'#d is stride, k is kernel size ,scope="conv-%s" % filter_size) # batch_size x (seq_len-k_h+1) x num_rep x num_filter out = tf.nn.relu(conv, name="relu") # pooled = tf.nn.max_pool(out, ksize=[1, seq_len - filter_size + 1, 1, 1], # strides=[1, 1, 1, 1], padding='VALID', # name="pool")# batch_size x 1 x num_rep x num_filter pooled = tf.reduce_max(out,axis=1) pooled = tf.reshape(pooled,[BATCH_SIZE,1,num_rep,num_filter]) pooled_outputs.append(pooled) # Combine all the pooled features if D_with_state: num_filters_total = sum(num_filters) else: num_filters_total = sum(num_filters) h_pool = tf.concat(pooled_outputs, 3) # batch_size x 1 x num_rep x num_filters_total print('shape of h_pool: {}'.format(h_pool.get_shape().as_list())) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) if D_with_state: tile_state = tf.reshape(tf.tile(encoder_state, [1, num_rep, 1]), [BATCH_SIZE*num_rep, int(encoder_state.shape[-1])]) state = linear(tile_state, output_size=num_filters_total, use_bias=True, scope='state_fc') h_pool_flat = tf.multiply(state,h_pool_flat) # Add highway h_highway = highway(h_pool_flat, h_pool_flat.get_shape()[1], 1, 0) # (batch_size*num_rep) x num_filters_total # Add dropout # if D_with_state: # h_highway = tf.concat([tf.reshape(tf.tile(encoder_state, [1, num_rep, 1]), [-1, encoder_state.shape[-1]]), h_highway], 1) print('shape of h_highway: {}'.format(h_highway.get_shape().as_list())) h_drop = tf.nn.dropout(h_highway, dropout_keep_prob, name='dropout') # fc fc_out = linear(h_drop, output_size=100, use_bias=True, scope='fc') logits = linear(fc_out, output_size=1, use_bias=True, scope='logits') logits = tf.squeeze(logits, -1) # batch_size*num_rep return logits,h_highway
print DataFrame(model.labels_, columns=['Label']).groupby('Label').size() print final df_append['cluster_num']=model.labels_ df_append=DataFrame(df_append['cluster_num'],columns=['cluster_num']) df_clus = pd.merge(df_base, df_append, left_index=True, right_index=True, how='inner'); mean_obesity=df_clus.groupby(['cluster_num'])['PCT_OBESE10'].mean().sort_values() #dict.fromkeys(mean_obesity.index) #clus_order={mean_obesity.index.values} clus_dict=dict(zip(mean_obesity.index,[x for x in range(0,len(mean_obesity.index))])) df_clus['cluster']=df_clus['cluster_num'].apply(lambda row: clus_dict[row]) #def clus_name(clus_num): # clus_map = {0: "a", 1: "b", 2: "c",3: "d"} # return clus_map[clus_num] # #df_clus['clus_profile']=df_clus['cluster'].apply(lambda row: clus_name(row)) with open('us_counties.topo.json') as json_data: d = json.load(json_data)
def g_nonsaturating_loss(fake_pred, weights): loss = 0 for fake, weight in zip(fake_pred, weights): loss += weight * F.softplus(-fake).mean() return loss / len(fake_pred)
def get_divergence(self, source): """Gets the full divergence given the aperture radius.""" ss = [a - b for a, b in zip(self.center - source.center)] return self.r * 2 * (np.dot(ss, ss)**-0.5)
def main(argv): global config global logger """Program entry point. :param argv: command-line arguments :type argv: :class:`list` """ author_strings = [] for name, email in zip(metadata.authors, metadata.emails): author_strings.append('Author: {0} <{1}>'.format(name, email)) epilog = '''{project} {version} {authors} URL: <{url}> '''.format(project=metadata.project, version=metadata.version, authors='\n'.join(author_strings), url=metadata.url) arg_parser = argparse.ArgumentParser( prog=argv[0], formatter_class=argparse.RawDescriptionHelpFormatter, description=metadata.description, epilog=epilog) arg_parser.add_argument('-V', '--version', action='version', version='{0} {1}'.format(metadata.project, metadata.version)) arg_parser.add_argument('configfile', help='kafkatos3 config file to use') args = arg_parser.parse_args(args=argv[1:]) config = parse_config(args.configfile) logger = logging.getLogger('kafkatos3') formatter = logging.Formatter( '%(asctime)s - [%(levelname)s/%(processName)s] - %(message)s') ch = logging.StreamHandler() ch.setFormatter(formatter) logger.setLevel(logging.INFO) logger.addHandler(ch) logger.info( "===============================================================") logger.info(epilog) logger.info( "===============================================================") for x in range(0, int(config.get("consumer", "consumer_processes"))): p = Process(target=consumer_process, args=(str(x), )) p.start() processes.append(p) p = Process(target=compression_process) p.start() processes.append(p) p = Process(target=s3_process) p.start() processes.append(p) setproctitle("[mainprocess] " + getproctitle()) for p in processes: p.join() return 0
def ffunc(f): restype = ftype(f.type.restype) types, names = map(ftype, f.type.argtypes), map(prefix, f.args) args = ajoin(map(sjoin, zip(types, names))) header = sjoin(["function", restype, f.name + parens(args)]) return njoin([header + " {", njoin(map(fblock, f.blocks)), "}"])
def kmean_anchors(path='../coco/train2017.txt', n=12, img_size=(320, 1024), thr=0.10, gen=1000): # Creates kmeans anchors for use in *.cfg files: from utils.utils import *; _ = kmean_anchors() # n: number of anchors # img_size: (min, max) image size used for multi-scale training (can be same values) # thr: IoU threshold hyperparameter used for training (0.0 - 1.0) # gen: generations to evolve anchors using genetic algorithm from utils.datasets import LoadImagesAndLabels def print_results(k): k = k[np.argsort(k.prod(1))] # sort small to large iou = wh_iou(wh, torch.Tensor(k)) max_iou = iou.max(1)[0] bpr, aat = (max_iou > thr).float().mean(), (iou > thr).float().mean() * n # best possible recall, anch > thr print('%.2f iou_thr: %.3f best possible recall, %.2f anchors > thr' % (thr, bpr, aat)) print('n=%g, img_size=%s, IoU_all=%.3f/%.3f-mean/best, IoU>thr=%.3f-mean: ' % (n, img_size, iou.mean(), max_iou.mean(), iou[iou > thr].mean()), end='') for i, x in enumerate(k): print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg return k def fitness(k): # mutation fitness iou = wh_iou(wh, torch.Tensor(k)) # iou max_iou = iou.max(1)[0] return (max_iou * (max_iou > thr).float()).mean() # product # Get label wh wh = [] dataset = LoadImagesAndLabels(path, augment=True, rect=True, cache_labels=True) nr = 1 if img_size[0] == img_size[1] else 10 # number augmentation repetitions for s, l in zip(dataset.shapes, dataset.labels): wh.append(l[:, 3:5] * (s / s.max())) # image normalized to letterbox normalized wh wh = np.concatenate(wh, 0).repeat(nr, axis=0) # augment 10x wh *= np.random.uniform(img_size[0], img_size[1], size=(wh.shape[0], 1)) # normalized to pixels (multi-scale) wh = wh[(wh > 2.0).all(1)] # remove below threshold boxes (< 2 pixels wh) # Darknet yolov3.cfg anchors use_darknet = False if use_darknet and n == 9: k = np.array([[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]]) else: # Kmeans calculation from scipy.cluster.vq import kmeans print('Running kmeans for %g anchors on %g points...' % (n, len(wh))) s = wh.std(0) # sigmas for whitening k, dist = kmeans(wh / s, n, iter=30) # points, mean distance k *= s wh = torch.Tensor(wh) k = print_results(k) # # Plot # k, d = [None] * 20, [None] * 20 # for i in tqdm(range(1, 21)): # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # ax = ax.ravel() # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh # ax[0].hist(wh[wh[:, 0]<100, 0],400) # ax[1].hist(wh[wh[:, 1]<100, 1],400) # fig.tight_layout() # fig.savefig('wh.png', dpi=200) # Evolve npr = np.random f, sh, mp, s = fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma for _ in tqdm(range(gen), desc='Evolving anchors'): v = np.ones(sh) while (v == 1).all(): # mutate until a change occurs (prevent duplicates) v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) # 98.6, 61.6 kg = (k.copy() * v).clip(min=2.0) fg = fitness(kg) if fg > f: f, k = fg, kg.copy() print_results(k) k = print_results(k) return k
#Read a list which was stored as a string while saving dataframe import ast df.col_new = df.col_with_list.map(ast.literal_eval) #Find out top 5 key words per document #Refernce: https://stackoverflow.com/questions/38955182/find-names-of-top-n-highest-value-columns-in-each-pandas-dataframe-row nlargest = 5 order = np.argsort(-tfidf_df.values, axis=1)[:,:nlargest] result = pd.DataFrame(tfidf_df.columns[order], columns=['top{}'.format(i) for i in range(1, nlargest+1)], index=tfidf_df.index) result.head() #dataframe as dictionary dict_for_repl = dict(zip(df[col_as_keys].values,df[col_as_values].values)) #Group by then join #Every man should have min five distinct cars c=pd.DataFrame({'is_car_cnt_more5':cleaned_data.groupby(['man_id'])['car_id'].nunique()>=5}).reset_index() c=c[c['is_car_cnt_more5']==True] del c['is_car_cnt_more5'] cleaned_data=pd.merge(cleaned_data,c,on=['man_id']) cleaned_data=cleaned_data.dropna() df1.join(df2,left_on='col_for_joining_of_df1',right_on='col_for_joining_of_df2').drop('col_for_joining_of_df2',axis=1) #groupby then count and rename the count as new_cc gf.groupby(['id'], as_index=False).size().reset_index().rename(columns={0:'new_cc'}).head()
def main(): train, test = get_data() train_X = rearrange(train['X']) train_Y = train['y'].flatten()-1 train_X, train_Y = shuffle(train_X, train_Y) test_X = rearrange(test['X']) test_Y = test['y'].flatten()-1 max_iter = 6 print_period = 10 lr = np.float32(0.0001) mu = np.float32(0.99) decay = np.float32(0.9) eps = np.float32(1e-10) reg = np.float32(0.01) N = train_X.shape[0] batch_sz = 500 num_batch = N // batch_sz M = 500 K = 10 poolsz = (2, 2) W1_shape = (20, 3, 5, 5) #(num_feature_maps, num_color_channels, filter_width, filter_height) W1_init = init_filter(W1_shape, poolsz) b1_init = np.zeros(W1_shape[0], dtype=np.float32) W2_shape = (50, 20, 5, 5) #(num_feature_maps, old_num_feature_maps, filter_width, filter_height) W2_init = init_filter(W2_shape, poolsz) b2_init = np.zeros(W2_shape[0], dtype=np.float32) #ANN W3_init = np.random.randn(W2_shape[0]*5*5, M) / np.sqrt(W2_shape[0]*5*5 + M) b3_init = np.zeros(M, dtype=np.float32) W4_init = np.random.randn(M, K) / np.sqrt(M+K) b4_init = np.zeros(K, dtype=np.float32) #init theano variables X = T.tensor4('X', dtype='float32') Y = T.ivector('T') W1 = theano.shared(W1_init, 'W1') b1 = theano.shared(b1_init, 'b1') W2 = theano.shared(W2_init, 'W2') b2 = theano.shared(b2_init, 'b2') W3 = theano.shared(W3_init.astype(np.float32), 'W3') b3 = theano.shared(b3_init, 'b3') W4 = theano.shared(W4_init.astype(np.float32), 'W4') b4 = theano.shared(b4_init, 'b4') #forward Z1 = convpool(X, W1, b1) Z2 = convpool(Z1, W2, b2) Z3 = relu(Z2.flatten(ndim=2).dot(W3) + b3) pY = T.nnet.softmax(Z3.dot(W4) + b4) #test & prediction functions params = [W1, b1, W2, b2, W3, b3, W4, b4] rcost = reg * np.sum((p*p).sum() for p in params) cost = -(T.log(pY[T.arange(Y.shape[0]), Y])).mean() + rcost prediction = T.argmax(pY, axis=1) momentum = [theano.shared( np.zeros_like(p.get_value(), dtype=np.float32)) for p in params] catchs = [theano.shared( np.ones_like(p.get_value(), dtype=np.float32)) for p in params] #RMSProp updates = [] grads = T.grad(cost, params) for p, g, m, c in zip(params, grads, momentum, catchs): updates_c = decay*c + (np.float32(1.0)-decay)*g*g updates_m = mu*m - lr*g / T.sqrt(updates_c + eps) updates_p = p + updates_m updates.append([c, updates_c]) updates.append([m, updates_m]) updates.append([p, updates_p]) #init functions train_op = theano.function(inputs=[X, Y], updates=updates) prediction_op = theano.function(inputs=[X, Y], outputs=[cost, prediction]) costs= [] for i in range(max_iter): shuffle_X, shuffle_Y = shuffle(train_X, train_Y) for j in range(num_batch): x = shuffle_X[j*batch_sz : (j*batch_sz+batch_sz), :] y = shuffle_Y[j*batch_sz : (j*batch_sz+batch_sz)] train_op(x, y) if j % print_period == 0: cost_val, p_val = prediction_op(test_X, test_Y) e = error_rate(p_val, test_Y) costs.append(cost_val) print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, cost_val, e)) plt.plot(costs) plt.show()
def compare(traces, models, ic='WAIC', method='stacking', b_samples=1000, alpha=1, seed=None, round_to=2): R"""Compare models based on the widely available information criterion (WAIC) or leave-one-out (LOO) cross-validation. Read more theory here - in a paper by some of the leading authorities on model selection - dx.doi.org/10.1111/1467-9868.00353 Parameters ---------- traces : list of PyMC3 traces models : list of PyMC3 models in the same order as traces. ic : string Information Criterion (WAIC or LOO) used to compare models. Default WAIC. method : str Method used to estimate the weights for each model. Available options are: - 'stacking' : (default) stacking of predictive distributions. - 'BB-pseudo-BMA' : pseudo-Bayesian Model averaging using Akaike-type weighting. The weights are stabilized using the Bayesian bootstrap - 'pseudo-BMA': pseudo-Bayesian Model averaging using Akaike-type weighting, without Bootstrap stabilization (not recommended) For more information read https://arxiv.org/abs/1704.02030 b_samples: int Number of samples taken by the Bayesian bootstrap estimation. Only useful when method = 'BB-pseudo-BMA'. alpha : float The shape parameter in the Dirichlet distribution used for the Bayesian bootstrap. Only useful when method = 'BB-pseudo-BMA'. When alpha=1 (default), the distribution is uniform on the simplex. A smaller alpha will keeps the final weights more away from 0 and 1. seed : int or np.random.RandomState instance If int or RandomState, use it for seeding Bayesian bootstrap. Only useful when method = 'BB-pseudo-BMA'. Default None the global np.random state is used. round_to : int Number of decimals used to round results (default 2). Returns ------- A DataFrame, ordered from lowest to highest IC. The index reflects the order in which the models are passed to this function. The columns are: IC : Information Criteria (WAIC or LOO). Smaller IC indicates higher out-of-sample predictive fit ("better" model). Default WAIC. pIC : Estimated effective number of parameters. dIC : Relative difference between each IC (WAIC or LOO) and the lowest IC (WAIC or LOO). It's always 0 for the top-ranked model. weight: Relative weight for each model. This can be loosely interpreted as the probability of each model (among the compared model) given the data. By default the uncertainty in the weights estimation is considered using Bayesian bootstrap. SE : Standard error of the IC estimate. If method = BB-pseudo-BMA these values are estimated using Bayesian bootstrap. dSE : Standard error of the difference in IC between each model and the top-ranked model. It's always 0 for the top-ranked model. warning : A value of 1 indicates that the computation of the IC may not be reliable see http://arxiv.org/abs/1507.04544 for details. """ if ic == 'WAIC': ic_func = waic df_comp = pd.DataFrame(index=np.arange(len(models)), columns=['WAIC', 'pWAIC', 'dWAIC', 'weight', 'SE', 'dSE', 'warning']) elif ic == 'LOO': ic_func = loo df_comp = pd.DataFrame(index=np.arange(len(models)), columns=['LOO', 'pLOO', 'dLOO', 'weight', 'SE', 'dSE', 'warning']) else: raise NotImplementedError( 'The information criterion {} is not supported.'.format(ic)) if len(set([len(m.observed_RVs) for m in models])) != 1: raise ValueError( 'The number of observed RVs should be the same across all models') if method not in ['stacking', 'BB-pseudo-BMA', 'pseudo-BMA']: raise ValueError('The method {}, to compute weights,' 'is not supported.'.format(method)) warns = np.zeros(len(models)) c = 0 def add_warns(*args): warns[c] = 1 with warnings.catch_warnings(): warnings.showwarning = add_warns warnings.filterwarnings('always') ics = [] for c, (t, m) in enumerate(zip(traces, models)): ics.append((c, ic_func(t, m, pointwise=True))) ics.sort(key=lambda x: x[1][0]) if method == 'stacking': N, K, ic_i = _ic_matrix(ics) exp_ic_i = np.exp(-0.5 * ic_i) Km = K - 1 def w_fuller(w): return np.concatenate((w, [max(1. - np.sum(w), 0.)])) def log_score(w): w_full = w_fuller(w) score = 0. for i in range(N): score += np.log(np.dot(exp_ic_i[i], w_full)) return -score def gradient(w): w_full = w_fuller(w) grad = np.zeros(Km) for k in range(Km): for i in range(N): grad[k] += (exp_ic_i[i, k] - exp_ic_i[i, Km]) / \ np.dot(exp_ic_i[i], w_full) return -grad theta = np.full(Km, 1. / K) bounds = [(0., 1.) for i in range(Km)] constraints = [{'type': 'ineq', 'fun': lambda x: -np.sum(x) + 1.}, {'type': 'ineq', 'fun': lambda x: np.sum(x)}] w = minimize(fun=log_score, x0=theta, jac=gradient, bounds=bounds, constraints=constraints) weights = w_fuller(w['x']) ses = [res[1] for _, res in ics] elif method == 'BB-pseudo-BMA': N, K, ic_i = _ic_matrix(ics) ic_i = ic_i * N b_weighting = dirichlet.rvs(alpha=[alpha] * N, size=b_samples, random_state=seed) weights = np.zeros((b_samples, K)) z_bs = np.zeros_like(weights) for i in range(b_samples): z_b = np.dot(b_weighting[i], ic_i) u_weights = np.exp(-0.5 * (z_b - np.min(z_b))) z_bs[i] = z_b weights[i] = u_weights / np.sum(u_weights) weights = weights.mean(0) ses = z_bs.std(0) elif method == 'pseudo-BMA': min_ic = ics[0][1][0] Z = np.sum([np.exp(-0.5 * (x[1][0] - min_ic)) for x in ics]) weights = [] ses = [] for _, res in ics: weights.append(np.exp(-0.5 * (res[0] - min_ic)) / Z) ses.append(res[1]) if np.any(weights): for i, (idx, res) in enumerate(ics): diff = res[3] - ics[0][1][3] d_ic = np.sum(diff) d_se = np.sqrt(len(diff) * np.var(diff)) se = ses[i] weight = weights[i] df_comp.at[idx] = (round(res[0], round_to), round(res[2], round_to), round(d_ic, round_to), round(weight, round_to), round(se, round_to), round(d_se, round_to), warns[idx]) return df_comp.sort_values(by=ic)
def fixAnnotations(annotations): texts = [] for xt, yt, s in zip(xcen, ycen, annotations): texts.append(plt.text(xt, yt, s)) return texts
def loo(trace, model=None, pointwise=False, progressbar=False): """Calculates leave-one-out (LOO) cross-validation for out of sample predictive model fit, following Vehtari et al. (2015). Cross-validation is computed using Pareto-smoothed importance sampling (PSIS). Parameters ---------- trace : result of MCMC run model : PyMC Model Optional model. Default None, taken from context. pointwise: bool if True the pointwise predictive accuracy will be returned. Default False progressbar: bool Whether or not to display a progress bar in the command line. The bar shows the percentage of completion, the evaluation speed, and the estimated time to completion Returns ------- namedtuple with the following elements: loo: approximated Leave-one-out cross-validation loo_se: standard error of loo p_loo: effective number of parameters loo_i: and array of the pointwise predictive accuracy, only if pointwise True """ model = modelcontext(model) log_py = _log_post_trace(trace, model, progressbar=progressbar) if log_py.size == 0: raise ValueError('The model does not contain observed values.') # Importance ratios r = np.exp(-log_py) r_sorted = np.sort(r, axis=0) # Extract largest 20% of importance ratios and fit generalized Pareto to each # (returns tuple with shape, location, scale) q80 = int(len(log_py) * 0.8) pareto_fit = np.apply_along_axis( lambda x: pareto.fit(x, floc=0), 0, r_sorted[q80:]) if np.any(pareto_fit[0] > 0.7): warnings.warn("""Estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples. You should consider using a more robust model, this is because importance sampling is less likely to work well if the marginal posterior and LOO posterior are very different. This is more likely to happen with a non-robust model and highly influential observations.""") elif np.any(pareto_fit[0] > 0.5): warnings.warn("""Estimated shape parameter of Pareto distribution is greater than 0.5 for one or more samples. This may indicate that the variance of the Pareto smoothed importance sampling estimate is very large.""") # Calculate expected values of the order statistics of the fitted Pareto S = len(r_sorted) M = S - q80 z = (np.arange(M) + 0.5) / M expvals = map(lambda x: pareto.ppf(z, x[0], scale=x[2]), pareto_fit.T) # Replace importance ratios with order statistics of fitted Pareto r_sorted[q80:] = np.vstack(expvals).T # Unsort ratios (within columns) before using them as weights r_new = np.array([r[np.argsort(i)] for r, i in zip(r_sorted.T, np.argsort(r.T, axis=1))]).T # Truncate weights to guarantee finite variance w = np.minimum(r_new, r_new.mean(axis=0) * S**0.75) loo_lppd_i = - 2. * logsumexp(log_py, axis=0, b=w / np.sum(w, axis=0)) loo_lppd_se = np.sqrt(len(loo_lppd_i) * np.var(loo_lppd_i)) loo_lppd = np.sum(loo_lppd_i) lppd = np.sum(logsumexp(log_py, axis=0, b=1. / log_py.shape[0])) p_loo = lppd + (0.5 * loo_lppd) if pointwise: LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, LOO_i') return LOO_r(loo_lppd, loo_lppd_se, p_loo, loo_lppd_i) else: LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO') return LOO_r(loo_lppd, loo_lppd_se, p_loo)
class WoAiWoJiaSpider(BaseSpider): TypesMap = { '二手房': 'ershoufang', '新房': 'loupan', '成交': 'solds', '租房': 'zufang' } CityMap = { '苏州': 'sz', } Name = '5i5j' PerInPages = {k: v for k, v in zip(TypesMap.values(), [30, 10, 30, 30])} TitleMap = { '二手房': ['房源ID', '行政区', '商圈', '小区', '总价', '单价', '户型', '面积', '楼层', '产权', '年代'], '新房': ['行政区', '楼盘', '均价', '开发商', '交房时间', '销售状态', '产权年限', '绿化率', '容积率'], '成交': ['小区', '户型', '总价', '均价', '成交日期', '楼层', '朝向', '商圈'], '租房': ['租金', '户型', '面积', '支付方式', '年代', '出租方式', '行政区', '商圈', '小区'] } def __init__(self, name='', file_path='./', city='苏州', types='二手房', pages=None, re_connect=5): super(WoAiWoJiaSpider, self).__init__(name=self.Name + name, filepath=file_path, re_connect=re_connect) self.pages = pages if city in self.CityMap.keys(): self.city = self.CityMap[city] else: self.logger.info('初始化时, 未知城市%s' % city) if types in self.TypesMap.keys(): self.title = self.TitleMap[types] self.type_ = self.TypesMap[types] else: self.logger.info('初始化时,未知形式%s' % types) def __get_info_in_per_url_ershoufang(self, soup): detail_0 = soup.select('.rent-top p')[0].text.split('房源ID:')[-1] detail_1 = soup.select('.cur-path a') detail_1 = [d.text.split('二手房')[0] for d in detail_1][-3:] detail_2 = soup.select('.housesty .jlinfo') detail_2 = [d.text for d in detail_2] detail_3 = soup.select('.infocon span') detail_3 = [d.text for i, d in enumerate(detail_3) if i in [1, 3, 4]] information = [ detail_0, ] + detail_1 + detail_2 + detail_3 return information def __get_info_in_per_url_chengjiao(self, soup): detail_1 = soup.select('.house-tit') detail_1 = [d.text.split() for d in detail_1] information = detail_1[0] detail_2 = soup.select('.house-info .cjinfo') detail_2 = [d.text for d in detail_2] information += detail_2[:-1] + [ d for d in detail_2[-1].split() if len(d) > 8 ] detail_3 = soup.select('.detailinfo li') detail_3 = [d.text.split(':') for d in detail_3] information += [d[-1] for d in detail_3][:2] detail_4 = soup.select('.infomain li') detail_4 = [ d.text.split('所在商圈')[-1] for d in detail_4 if d.text.find('所在商圈') >= 0 ] information += detail_4 return information def __get_info_in_per_url_loupan(self, soup): detail_0 = soup.select('.menu li') detail_0 = [d.text.split('楼盘')[0] for d in detail_0][-2:] detail_1 = soup.select('.details_price .clearfix')[0].text.split()[0] price = '' for i in filter(str.isdigit, detail_1): price += i detail_2 = soup.select('.style_list .txtList .txt') detail_2 = [d.text.split(',')[0] for d in detail_2] detail_3 = soup.select('.style_list .txtList label')[2:] detail_3 = [d.text for d in detail_3] wy = detail_2[0].split()[0].split(',')[0] d2 = [ '', ] * 5 for i, key in enumerate(['交房时间', '销售状态', '产权年限', '绿化率', '容积率']): if key in detail_3: idx = detail_3.index(key) d2[i] = detail_2[idx] information = detail_0 + [price, wy] + d2 if sum(len(f) for f in information) <= 0: information = [] return information def __get_info_in_per_url_zufang(self, soup): detail_1 = soup.select('.housesty .jlinfo') detail_1 = [d.text for d in detail_1] detail_2 = soup.select('.zushous li') d2 = ['', ''] for d in detail_2: if d.text.find('年代') >= 0: d2[0] = d.text.split(':')[-1] if d.text.find('出租方式') >= 0: d2[1] = d.text.split(':')[-1] detail_3 = soup.select('.cur-path a') detail_3 = [d.text.split('租房')[0] for d in detail_3][-3:] information = detail_1 + d2 + detail_3 return information def get_urls_in_per_url(self, url, type_='solds'): select_key_map = { k: v for k, v in zip(self.TypesMap.values(), [ '.pList li .listTit a', '.houseList_list .txt1 a', '.pList li a', '.pList li .listTit a' ]) } if type_ in select_key_map.keys(): select_key = select_key_map[type_] else: self.logger.info('采集网页时,未知形式%s' % type_) return [] try: data = self.grasp(url) soup = BeautifulSoup(data, 'lxml') url_info = soup.select(select_key) page_url = [pg_url['href'] for pg_url in url_info] if type_ in [self.TypesMap[f] for f in ['成交', '二手房', '租房']]: origin_url = 'https://%s.5i5j.com' % self.city else: origin_url = 'https://fang.5i5j.com' urls = [origin_url + f for f in list(set(page_url))] except Exception as error_info: urls = [] self.logger.error(error_info) return urls def get_num_of_pages(self, url, types_): if types_ not in self.TypesMap.values(): self.logger.info("Unknown %s! Set pages to 1" % types_) return 1 html = self.grasp(url) if html is None: return 1 soup = BeautifulSoup(html, 'lxml') try: if types_ in [self.TypesMap[f] for f in ['成交', '二手房', '租房']]: records = soup.select('.total-box span')[0].text else: records = soup.select('.houseList_total i')[0].text except Exception as error_info: self.logger.info("Something is wrong! Set pages to 1") self.logger.error(error_info) return 1 pages = int((int(records) - 1) / self.PerInPages[types_]) + 1 return [pages if pages < self.LimitPages else self.LimitPages][0] def _get_url_list_for_run(self, area=None, conditions=None): if self.type_ in [self.TypesMap[f] for f in ['成交', '二手房', '租房']]: original_url = 'https://%s.5i5j.com/%s/' % (self.city, self.type_) else: original_url = 'https://fang.5i5j.com/%s/%s/' % (self.city, self.type_) # url = 'https://sz.5i5j.com/solds/' # url = 'https://sz.5i5j.com/ershoufang/' # url = 'https://sz.5i5j.com/zufang/' # url = 'https://fang.5i5j.com/sz/loupan/' temp = original_url if area is not None: temp += '%s/' % area if conditions is not None: temp += '%s/' % conditions if self.pages is None: pages = self.get_num_of_pages(temp, self.type_) else: pages = self.pages ## example_url = 'https://sz.5i5j.com/ershoufang/xiangchengqu/a4p5n3/' page_url_list = [] for i in range(1, pages + 1): temp = original_url if area is None and conditions is None: temp += 'n%s/' % i elif area is None and conditions is not None: temp += '%sn%s/' % (conditions, i) elif area is not None and conditions is None: temp += '%s/n%s/' % (area, i) else: temp += '%s/%sn%s/' % (area, conditions, i) page_url_list += [ temp, ] return page_url_list def _get_save_file_name(self): url_file = self.date_path + '/5i5j_page_urls_%s_%s.txt' % (self.city, self.type_) info_file = self.date_path + "/5i5j_information_%s_%s.csv" % ( self.city, self.type_) return url_file, info_file @property def function_map(self): Maps = { '二手房': self.__get_info_in_per_url_ershoufang, '新房': self.__get_info_in_per_url_loupan, '成交': self.__get_info_in_per_url_chengjiao, '租房': self.__get_info_in_per_url_zufang } return {self.TypesMap[i]: Maps[i] for i in ['二手房', '新房', '成交', '租房']}