def send_day(day): send_data(day)#自当年元旦开始的天(个位)、分隔标志、天(十位)、基准标志 #天(百位)、分隔标志、未编码位、基准标志 send_bits(bcd(day//100)) utils.divide() utils.vacancy() utils.p_unit()
def test_divide(self): self.assertEquals(utils.divide([]), 1) self.assertAlmostEquals(utils.divide([2.1]), 0.47619047619047616) self.assertAlmostEquals(utils.divide([2.1, 4.3, 2.1]), 0.052734272003374986) self.assertAlmostEquals(utils.divide([-0.5, 10.1, 3]), -0.06600660066006601)
def __init__(self, attention_mask_func, hidden_size, num_attention_heads, attention_dropout): super(ParallelSelfAttention, self).__init__() self.attention_mask_func = attention_mask_func # Per attention head and per partition values. world_size = torch.distributed.get_world_size() self.hidden_size_per_partition = divide(hidden_size, world_size) self.hidden_size_per_attention_head = divide(hidden_size, num_attention_heads) self.num_attention_heads_per_partition = divide( num_attention_heads, world_size) # Strided linear layer. self.query_key_value = ColumnParallelLinear( # column linear hidden_size, 3 * hidden_size, gather_output=False) self.norm_factor = math.sqrt(self.hidden_size_per_attention_head) self.scale_mask_softmax = ScaleMaskSoftmax( mask_func=self.attention_mask_func, scale=None) # Dropout. Note that for a single iteration, this layer will generate # different outputs on different number of parallel partitions but # on average it should not be partition dependent. self.attention_dropout = torch.nn.Dropout(attention_dropout) # Output. self.dense = RowParallelLinear(input_size=hidden_size, output_size=hidden_size, input_is_parallel=True)
def send_data(data,digit=2): dot=1 for i in range(digit): dot*=10 unit=data%dot*10//dot#单位数字 send_bits(bcd(unit)) if i <digit-1: utils.divide() utils.p_unit()
def test_divide(self): """Expectation: divide one number by another using the correct template""" with self.subTest("Testing divide() using integers"): computed = utils.divide(self.i3, self.i4) self.assertIsInstance(computed, int) self.assertEqual(computed, int(self.i3 / self.i4)) with self.subTest("Testing divide() using floats"): computed = utils.divide(self.f3, self.f4) self.assertIsInstance(computed, float) self.assertEqual(computed, self.f3 / self.f4)
def calculate_predictions(true_positive, false_positive, true_negative, false_negative): """ Calculate predictions based on the number of true/false positive/negative's """ sens_denom = true_positive + false_negative spec_denom = true_negative + false_positive sensitivity = divide(true_positive, sens_denom) specificity = divide(true_negative, spec_denom) accuracy = divide(true_positive + true_negative, sens_denom + spec_denom) tpr = sensitivity # Calculate the true positive rate fpr = 1 - specificity # Calculate the false positive rate return accuracy, tpr, fpr
def send_control(): """ 未编码位、分隔标志、未编码位、基准标志+时间质量标志、校验位、未编码位、基准标志 """ utils.vacancy() utils.divide() utils.vacancy() utils.p_unit() utils.time_quality() utils.verify() utils.vacancy() utils.time_quality()
def compute(self): if not(self.graph.processed()): self.graph.process() user_count = len(self.graph) self.analyze_group() self.statistics.update({ 'subgroups' : len(self.graph.groups), 'user_count': user_count, 'like_avg': divide(self.statistics['likes_total'], user_count), 'age_avg': divide(self.statistics['age_total'], user_count), 'friends_avg': divide(len(self.friends), user_count), 'friends_age_avg':divide(self.statistics['friends_age_total'], len(self.friends)), }) return self.statistics
def hydrate(self, ids): ids = divide(list(ids), 100) results = [] for id in ids: result = data_from_id(self.tw, map(str, id)) results.append(result) return results
def __init__(self, input_size, output_size, input_is_parallel=False): super(RowParallelLinear, self).__init__() # Keep input parameters self.input_size = input_size self.output_size = output_size self.input_is_parallel = input_is_parallel # Divide the weight matrix along the last dimension. world_size = torch.distributed.get_world_size() self.input_size_per_partition = divide(input_size, world_size) # Parameters. # Note: torch.nn.functional.linear performs XA^T + b and as a result # we allocate the transpose. # Initialize weight. self.weight = Parameter( torch.empty(self.output_size, self.input_size_per_partition, device=torch.cuda.current_device(), dtype=torch.float)) torch.nn.init.xavier_normal_(self.weight) self.bias = Parameter( torch.empty(self.output_size, device=torch.cuda.current_device(), dtype=torch.float)) # Always initialize bias to zero. with torch.no_grad(): self.bias.zero_()
def __init__(self, pattern = "", seeds_dict = {}): self.pattern = pattern self.strlen = len(pattern) self.num_of_seeds = len(seeds_dict) self.seeds_dict = seeds_dict self.max_seed = max(self.seeds_dict.values()) self.avg_seed = utils.divide(utils.sum(self.seeds_dict.values()), self.num_of_seeds)
def features(self): return { # Device #'device': self.device, # Product #'product': self.merchandise, #Country 'country': self.country, # Price # 'price': self.price, 'price_fr': utils.short_float(utils.divide(self.price, self.last_price)), # IP #'ip_3oct': self.ip_3oct, #'ip_2oct': self.ip_2oct, # Time # 'time': self.time, # 'time_from_start': self.time_from_start, # 'time_to_end': self.time_to_end, # format _perc to have 2 digist after . 'time_from_start_perc': utils.short_float(utils.divide(self.time, self.auc_length)), 'time_to_end_perc': utils.short_float(utils.divide(self.time_to_end, self.auc_length)), # 'time_from_day_start': self.time_from_day_start, # 'time_to_prev_bid': self.time_to_prev_bid, # Order #'is_first': self.is_first, #'is_last': self.is_last, #'day': self.day, 'unique': self.prev_unique, 'unique_50': self.prev_unique_50, # how many immediate prev bids are made by the same user? # Url #'ref': self.url, }
def EStep(pose, log_var, log_activation, vote): normal_vote = utils.divide(tf.square(vote - pose), 2 * tf.exp(log_var)) log_probs = normal_vote + utils.log(2 * np.pi) + log_var log_probs = -0.5 * tf.reduce_sum(log_probs, axis=-1, keepdims=True) log_act_logit = log_activation + log_probs log_act_logit = log_probs log_R = log_act_logit - tf.reduce_logsumexp(log_act_logit, axis=-2, keepdims=True) return log_R
def use_my_utils(a, b): differenceof = utils.differenceof(a, b) sumof = utils.sumof(a, b) multiplyof = utils.multiplyof(a, b) divide = utils.divide(a, b) print(differenceof) print(sumof) print(multiplyof) print(divide)
def analyze_group(self): for k, group in enumerate(self.graph.groups): shared_songs = 0 friendships = 0 for edge in self.graph.edges: if edge[0] in group or edge[1] in group: friendships += 1 shared_songs += int(edge[2]['songs']) self.statistics['shared_songs_total_group'][k] = shared_songs self.statistics['shared_songs_avg_group'][k] = divide(shared_songs, friendships) if shared_songs else 0
def migrate_node(self, src_node): nodes = [n for n in self.masters if n.name != src_node.name] slot_count = len(src_node.slots) if slot_count <= 0: return slots = divide(slot_count, len(nodes)) nodes.sort(key=lambda x: len(x.slots)) for node, count in zip(nodes, slots): src, dst = (src_node, node) self.migrate(src, dst, count)
def fill_slots(self): masters = self.masters slots = itertools.chain(*[n.slots for n in masters]) missing = list(set(range(self.CLUSTER_HASH_SLOTS)).difference(slots)) div = divide(len(missing), len(masters)) masters.sort(key=lambda x: len(x.slots)) i = 0 for count, node in zip(div, masters): node.add_slots(*missing[i:count + i]) i += count
def bind_slots_force(self): masters = self.masters slots = itertools.chain(*[n.slots for n in masters]) missing = list(set(range(self.CLUSTER_HASH_SLOTS)).difference(slots)) div = divide(len(missing), len(masters)) masters.sort(key=lambda x: len(x.slots)) i = 0 for count, node in zip(div, masters): for slot in missing[i:count + i]: self.update_slot_mapping(slot, node.name) i += count
def count_proximity_over_cooc(self, list, cache=False): """ list - check proximity over cooccurrence for the given list of terms """ prox = self.count_proximity(list, cache) cooc = self.count_cooccurrence(list, cache) result = utils.divide(prox, cooc) if self.debug: print "Calculated proximity count for terms %s - %0.5f (cooc - %d)" % (list, result, cooc) return result
def compute_metrics(stats_authors): #Keep only the possible candidates: stats_authors = { s: stats_authors[s] for s in stats_authors if stats_authors[s]["Candidate"] } #Compute (more complex) metrics: for authorName, authorStats in stats_authors.items(): stats_authors[authorName]["Following-Followers Ratio"] = divide( authorStats["Following"], authorStats["Followers"]) stats_authors[authorName]["TotViews"] = sum(authorStats["ViewsSerie"]) stats_authors[authorName]["Views-Followers Ratio"] = divide( authorStats["TotViews"], authorStats["Followers"], round_result=False) stats_authors[authorName]["AverageLikes"] = divide( authorStats["NLikes"], authorStats["NVideos"]) stats_authors[authorName]["AverageViews"] = divide( sum(authorStats["ViewsSerie"]), authorStats["NVideos"]) save_metrics(stats_authors) return stats_authors
def split_tensor_along_last_dim(tensor, num_partitions, contiguous_split_chunks=False): """Split a tensor along its last dimension. Arguments: tensor: input tensor. num_partitions: number of partitions to split the tensor contiguous_split_chunks: If True, make each chunk contiguous in memory. """ last_dim = tensor.dim() - 1 last_dim_size = divide(tensor.size()[last_dim], num_partitions) # Split tensor_list = torch.split(tensor, split_size_or_sections=last_dim_size, dim=last_dim) return tensor_list
def choose_solo_notes(self): rest_beginning = random.choice([True, False, False, False, False]) rest_middle = random.choice([True, True, False]) rest_end = random.choice([True, True, True, False, False]) rests = [rest_beginning, rest_middle, rest_end] num_rests = rests.count(True) min_num_divs = 3 + num_rests num_divs = random.randint(min_num_divs, 9) divs = divide(16, num_divs) notes = [{'pitch': None, 'duration': div / 4.0} for div in divs] if rest_beginning: notes[0]['pitch'] = 'rest' if rest_end: notes[-1]['pitch'] = 'rest' if rest_middle: if rest_beginning: start = 2 else: start = 1 if rest_end: end = -2 else: end = -1 middle_rest_index = random.choice(range(len(notes))[start:end]) notes[middle_rest_index]['pitch'] = 'rest' for note in notes: if note['pitch'] != 'rest': note['pitch'] = ps = random.choice(self.soloist_shared_notes) self.soloist_shared_notes = [p for p in frange(ps - 2, ps + 3) if p in self.all_soloists_shared_notes] self.soloist_shared_notes = [p for p in frange(ps - 5, ps + 6) if p in self.all_soloists_shared_notes] return notes
async def list_category(self, ctx): embeds = [] for categories in divide(list(self.bot.categoryDB.find()), 10): embed = Embed(title='카테고리 목록', color=Color.green()) for category in categories: embed.add_field(name=category['name'], value=f'`{category["description"]}`') embeds.append(embed) if embeds: msg = await ctx.send(embed=embeds[0]) await Paginator(self.bot, msg, embeds=embeds).start() else: await ctx.send(embed=Embed( title='카테고리 목록', description='카테고리가 없습니다', color=Color.green()) )
def __init__(self, g: Graph, levels_to_build): super().__init__() self.g = g self.division = divide(g) self.levels_to_build = levels_to_build self.final = [] self.start = FactorGraph(g, sorted([CongruenceClass([node]) for node in g.nodes]), '∆') self.add_node(self.start) self.levels_count = g.number_of_nodes() - 1 if self.levels_to_build is not None: self.levels_count = min(self.levels_to_build, self.levels_count) self.levels = [[] for _ in range(self.levels_count)] self.levels_set = [set() for _ in range(self.levels_count)] self.nodes_levels = {} self._build()
async def help(self, ctx): embeds = [ Embed(title='도움', description='`이모지`로 페이지를 넘기세요', color=Color.green()) ] for i in range(len(self.bot.cogs)): cog = self.bot.cogs[list(self.bot.cogs.keys())[i]] embeds[0].add_field(name=f'{i + 1}페이지 | {cog.name}', value=f'`{cog.description}`', inline=True) for cmds in divide(cog.get_commands(), 10): embed = Embed(title=f'{cog.name} 도움', color=Color.green()) embed.set_footer(text=f'Page {i + 1}') for cmd in cmds: if 'commands' not in dir(cmd): embed.add_field( name=f'{self.bot.command_prefix}{cmd.name}' if not cmd.usage else f'{self.bot.command_prefix}{cmd.usage}', value=cmd.help or '설명 없음', inline=True) else: for child_cmd in cmd.commands: embed.add_field( name= f'{self.bot.command_prefix}{cmd.name} {child_cmd.name}' if not child_cmd.usage else f'{self.bot.command_prefix}{cmd.name} {child_cmd.usage}', value=child_cmd.help or '설명 없음', inline=True) embeds.append(embed) msg = await ctx.send(embed=embeds[0]) await Paginator(self.bot, msg, embeds=embeds).start()
async def list_post(self, ctx, *, query: str = None): if not query: postlist = list(self.bot.postDB.find()) title = '글 목록' no_description = '글이 없습니다' else: postlist = [] _idlist = [] _postlist = self.bot.postDB.get({'title': {'$regex': f'.*{query}.*'}}) + self.bot.postDB.get({'content': {'$regex': f'.*{query}.*'}}) for data in _postlist: if data['_id'] in _idlist: continue _idlist.append(data['_id']) postlist.append(data) title = f'"{query}" 검색 결과' no_description = '검색 결과가 없습니다' embeds = [] for categories in divide(postlist, 10): embed = Embed( title=title, color=Color.green() ) for post in categories: embed.add_field(name=f'{post["title"]} ({post["_id"]})', value=f'by `{self.bot.get_user(int(post["authorID"]))}` :heart: `{len(post["hearts"])}` :speech_balloon: `{len(self.bot.commentDB.get({"postID": post["_id"]}))}`', inline=False) embeds.append(embed) if embeds: msg = await ctx.send(embed=embeds[0]) await Paginator(self.bot, msg, embeds=embeds).start() else: await ctx.send(embed=Embed(title=title, description=no_description, color=Color.green()))
def slot_balance(self, seq): amt = self.CLUSTER_HASH_SLOTS seq.sort(key=lambda x: x['count'], reverse=True) chunks = divide(amt, len(seq)) pairs = list(zip(seq, chunks)) i, j = 0, len(pairs) - 1 while i < j: m, count = pairs[i] more = m['count'] - count if more <= 0: i += 1 continue n, count = pairs[j] need = count - n['count'] if need <= 0: j -= 1 continue if need < more: n['need'].append((m['node'], need)) n['count'] += need m['count'] -= need j -= 1 elif need > more: n['need'].append((m['node'], more)) n['count'] += more m['count'] -= more i += 1 else: n['need'].append((m['node'], need)) n['count'] += need m['count'] -= more j -= 1 i += 1 return seq
from numpy import asarray from numpy import save import straw from scipy.sparse import csr_matrix import utils highres = utils.matrix_extract( 18, 10000, "https://hicfiles.s3.amazonaws.com/hiseq/gm12878/in-situ/combined.hic") print('dividing, filtering and downsampling files...') highres_sub, index = utils.divide(highres) print("highres shape: ", highres_sub.shape) lowres = utils.genDownsample(highres, 1 / float(16)) lowres_sub, index = utils.divide(lowres) print("lowres shape: ", lowres_sub.shape) save('lowres_ch18.npy', lowres_sub) save('highres_ch18.npy', highres_sub)
str(args['resolution'])) low_chr_mat = low_cool.matrix( balance=False).fetch("chr" + str(args['chr_num'])).astype(float) low_chr_mat[np.isnan(low_chr_mat)] = 0 chr_frames, chr_indices = utils.divide2(low_chr_mat, args['chr_num']) enhanced_chr_mat = low_cool.matrix( balance=False).fetch("chr" + str(args['chr_num'])).astype(float) enhanced_chr_mat[np.isnan(enhanced_chr_mat)] = 0 """ average_chr_mat = low_cool.matrix(balance = False).fetch("chr" + str(args['chr_num'])).astype(float) average_chr_mat[np.isnan(average_chr_mat)] = 0 """ else: chr_frames, chr_indices = utils.divide(args['LowRes_matrix_path'], args['chr_num'], args['resolution'], args['genome_type']) low_chr_mat = np.load(args['LowRes_matrix_path'] + '_npy_form_tmp.npy') enhanced_chr_mat = np.load(args['LowRes_matrix_path'] + '_npy_form_tmp.npy') # average_chr_mat = np.load(args['LowRes_matrix_path'] + '_npy_form_tmp.npy') # applying model on frames chr_frames = np.stack(chr_frames, axis=0) chr_indices = np.stack(chr_indices, axis=0) chr_frames = np.expand_dims(chr_frames, axis=1) lowres_set = torch.from_numpy(chr_frames).float() enhanced_set = Net(Variable(lowres_set)) enhanced_set = enhanced_set.data.cpu().numpy() enhanced_set = np.reshape( enhanced_set, (enhanced_set.shape[0], enhanced_set.shape[2], enhanced_set.shape[3]))
use_gpu = 1 conv2d1_filters_numbers = 8 conv2d1_filters_size = 9 conv2d2_filters_numbers = 8 conv2d2_filters_size = 1 conv2d3_filters_numbers = 1 conv2d3_filters_size = 5 down_sample_ratio = 16 epochs = 10 HiC_max_value = 100 # This block is the actual training data used in the training. The training data is too large to put on Github, so only toy data is used. input_file = '/home/zhangyan/Desktop/chr21.10kb.matrix' low_resolution_samples, index = utils.divide(input_file) low_resolution_samples = np.minimum(HiC_max_value, low_resolution_samples) batch_size = low_resolution_samples.shape[0] # Reshape the high-quality Hi-C sample as the target value of the training. sample_size = low_resolution_samples.shape[-1] padding = conv2d1_filters_size + conv2d2_filters_size + conv2d3_filters_size - 3 half_padding = padding / 2 output_length = sample_size - padding print low_resolution_samples.shape lowres_set = data.TensorDataset( torch.from_numpy(low_resolution_samples),
def __init__(self, encoder, decoder, n_samples=1, q=6 / 5, n_lambda=29): super(particle_flow, self).__init__() self.n_samples = n_samples self.intervals = divide(q, n_lambda) self.encoder = encoder self.decoder = decoder
if args.ci_mode and not args.ci_threshold: parser.error("CI mode requires a threshold to be set") cwd = os.getcwd() master = load_tool(args.mutation_tool, cwd) if not args.benchmark: print("Checking project compatibility with {0}...".format(args.mutation_tool)) if not master.check(): sys.exit("Selected mutation tool reports that it doesn't support the current project.") print("Creating mutants...") mdir, mutants = master.mutate() print("Scoring mutants in parallel...") divided_mutants = divide(mutants, args.scorers) # functools.partial is instead of lamda below, as the latter can't be pickled toolfun = functools.partial(load_tool, args.mutation_tool) scorefun = functools.partial(local_scorer.create_and_score, toolfun, cwd, mdir) with Pool(processes=args.scorers) as pool: nested_results = pool.map(scorefun, divided_mutants, 1) results = ScoringResult(flatten_list(nested_results)) if not args.benchmark: print("Loading mutant metadata from the filesystem...") results.add_metadata(cwd, mdir) if args.ci_mode:
def features(self): # global human_cnt # if self.is_robot or (self.is_human and human_cnt > 0): # if self.is_human: # human_cnt -= 1 # self.find_increment_patterns() #self.find_inc_price_patterns() increments = self.get_all_increments() bids_count = len(self.bids) true_bids_count = len(increments) # Auctions auctions_count = len(self.auctions) sim_auctions = self.get_sim_auctions() won_auctions_count = len(self.win_bids) auction_rank = 0.0 for auc, bids in self.auctions.iteritems(): auction_rank += shared.auction_rank[auc] if self.auctions: auction_rank /= len(self.auctions) # Price human_price_rmse_per_auction = self.get_price_rmse(self.last_bids, shared.human_median_price_per_auction, 'auction') human_price_rmse_per_product = self.get_price_rmse(self.last_bids, shared.human_median_price_per_product, 'merchandise') robot_price_rmse_per_auction = self.get_price_rmse(self.last_bids, shared.robot_median_price_per_auction, 'auction') robot_price_rmse_per_product = self.get_price_rmse(self.last_bids, shared.robot_median_price_per_product, 'merchandise') # Stats per auction avg_countries_per_auction = self.get_per_auction(np.average, self.counties_per_auction) median_countries_per_auction = self.get_per_auction(np.median, self.counties_per_auction) std_countries_per_auction = self.get_per_auction(np.std, self.counties_per_auction) avg_devices_per_auction = self.get_per_auction(np.average, self.devices_per_auction) median_devices_per_auction = self.get_per_auction(np.median, self.devices_per_auction) std_devices_per_auction = self.get_per_auction(np.std, self.devices_per_auction) avg_referrals_per_auction = self.get_per_auction(np.average, self.referrals_per_auction) median_referrals_per_auction = self.get_per_auction(np.median, self.referrals_per_auction) std_referrals_per_auction = self.get_per_auction(np.std, self.referrals_per_auction) avg_ips_per_auction = self.get_per_auction(np.average, self.ips_per_auction) median_ips_per_auction = self.get_per_auction(np.median, self.ips_per_auction) std_ips_per_auction = self.get_per_auction(np.std, self.ips_per_auction) # avg_inc_per_auction = self.get_per_auction(np.average, self.increments_per_auction) # median_inc_per_auction = self.get_per_auction(np.median, self.increments_per_auction) # std_inc_per_auction = self.get_per_auction(np.std, self.increments_per_auction) # IP avg_ips_per_increment = utils.divide(sum([inc.ips_count for inc in increments]), len(increments)) frequent_ip = '.'.join(self.get_frequent(self.ips).split('.')[:2]) ip_octets = defaultdict(int) for ip, count in self.ips.iteritems(): octets = ip.split('.') ip_octets['.'.join(octets[:1])] += count ip_octets['.'.join(octets[:2])] += count ip_octets['.'.join(octets[:3])] += count ip_octets['.'.join(octets[:4])] += count sorted_octets = sorted(ip_octets.items(), key=operator.itemgetter(1), reverse=True) # Countries cnt_mask = 0 seen_countries = set(self.countries.keys()) for country in seen_countries: i = shared.countries.index(country) cnt_mask |= (1<<(i+1)) cnt_mask = str(cnt_mask) countries_inc = defaultdict(int) for inc in self.bids: countries_inc[inc.country] += 1 country_rank = 0 for country, count in countries_inc.iteritems(): country_rank += shared.country_rank[country] * count if len(countries_inc): country_rank = float(country_rank) / sum(countries_inc.values()) regions_mask = 0 seen_regions = set([shared.country_to_region[c] for c in seen_countries]) for region in seen_regions: i = shared.regions.index(region) regions_mask |= (1<<(i+1)) regions_mask = str(regions_mask) # Products all_products = ['mobile', 'jewelry', 'home goods', 'sporting goods', 'auto parts', 'office equipment', 'computers', 'books and music', 'furniture', 'clothing'] products_mask = 0 # compute all products user bidded on as a number for p in self.products.keys(): i = all_products.index(p)+1 if not i: continue products_mask |= (1<<i) products_mask = str(products_mask) # Bids bid_on_unpopular = -1.0 for bid in self.bids: if bid.merchandise in ["auto parts", "clothing", "furniture"]: bid_on_unpopular = 1.0 break # Generate # TODO: add count of times user reached max price in auction (measure greediness) labels = ["country", "device", "product", "ip", "ref", "auction", "bids", "increments", "won_auctions", "sim_auctions"] values = [len(self.countries), len(self.devices), len(self.products), len(self.ips), len(self.referrals), auctions_count, len(self.bids), true_bids_count, won_auctions_count, sim_auctions] #ops = ["+", "-", "*", "/"] ops = ["/"] generated_features = dict() for op in ops: for i in xrange(len(labels)): al = labels[i] av = values[i] for j in xrange(i+1, len(labels)): bl = labels[j] bv = values[j] if op == "+": generated_features[al+op+bl] = av+bv elif op == "-": generated_features[al+op+bl] = av-bv elif op == "*": generated_features[al+op+bl] = av*bv elif op == "/": generated_features[al+op+bl] = utils.divide(av,bv) # Time time_hist_prob = 0 for bid in self.bids: i = bisect.bisect_left(shared.human_hist_bins, bid.time) time_hist_prob += shared.human_hist[i-1] if self.bids: time_hist_prob /= float(len(self.bids)) features = { "set_0": self.bidder_id in shared.set0, # Address # "addr_1": self.get_addr_1(), # "addr_2": self.get_addr_2(), # "is_addr_1_unique": len(shared.addr_1[self.get_addr_1()]) == 1, # "is_addr_2_unique": len(shared.addr_2[self.get_addr_2()]) == 1, # "addr_1=a3d2de7675556553a5f08e4c88d2c228": self.get_addr_1() == 'a3d2de7675556553a5f08e4c88d2c228', # Devices # "devices": len(self.devices), # TODO: devices per last X bids #"avg_devices_per_auction": avg_devices_per_auction, #"median_devices_per_auction": median_devices_per_auction, #"std_devices_per_auction": std_devices_per_auction, # Time "time_hist_prob": time_hist_prob, # Country #"unique_countries_count": len(self.countries), # "frequent_country": self.get_frequent(self.countries), "avg_countries_per_auction": avg_countries_per_auction, "median_countries_per_auction": median_countries_per_auction, "std_countries_per_auction": std_countries_per_auction, # "seen_countries": cnt_mask, #"country_rank": country_rank, # "seen_regions": regions_mask, #"regions_count": len(seen_regions), "country_change": self.get_change_rate('country'), # Products # "frequent_product": self.get_frequent(self.products), "bid_on_unpopular": bid_on_unpopular, # "products_mask": products_mask, # IP #"unique_ips": len(self.ips), "avg_ips_per_auction": avg_ips_per_auction, "median_ips_per_auction": median_ips_per_auction, "std_ips_per_auction": std_ips_per_auction, #"avg_unique_ips": utils.divide(len(self.ips), bids_count), #"avg_ips": utils.divide(sum(self.ips.values()), bids_count), "avg_ips_per_increment": avg_ips_per_increment, # "frequent_ip": frequent_ip, #"frequent_ip_class": self.get_ip_class(frequent_ip), # http://www.vlsm-calc.net/ipclasses.php # "most_popular_octet": sorted_octets[0][0] if sorted_octets else '', #"ip_rank": self.get_ip_rank(self.bids), "ip_change": self.get_change_rate('ip_pref'), # Referrals # "frequent_referral": self.get_frequent(self.referrals), #"referrals_count": len(self.referrals), "avg_referrals_per_auction": avg_referrals_per_auction, "median_referrals_per_auction": median_referrals_per_auction, "std_referrals_per_auction": std_referrals_per_auction, "referral_change": self.get_change_rate('url'), # Auctions # "auctions_count": auctions_count, #"won_auctions_count": won_auctions_count, #"sim_auctions": sim_auctions, # "auction_rank": auction_rank, # Payment # "payment_type": self.get_payment_type(), # "payment_acct": self.get_payment_acct(), # "is_pmt_type_unique": len(shared.pmt_type[self.get_payment_type()]) == 1, # "is_pmt_acct_unique": len(shared.pmt_accnt[self.get_payment_acct()]) == 1, #"payment_type=addr_1": self.get_payment_type() == self.get_addr_1(), #"payment_acct=addr_2": self.get_payment_acct() == self.get_addr_2(), # Bids #"true_bids_count": true_bids_count, # "avg_inc_per_auction": avg_inc_per_auction, # "median_inc_per_auction": median_inc_per_auction, # "std_inc_per_auction": std_inc_per_auction, # Price # rmse is calculated based on won_price and measure user price threshold/estimate # expect to have it bigger for humans and smaller for robots #"human_price_rmse_per_auction": human_price_rmse_per_auction, #"human_price_rmse_per_auction": human_price_rmse_per_auction, #"robot_price_rmse_per_auction": robot_price_rmse_per_auction, #"robot_price_rmse_per_product": robot_price_rmse_per_product, } features.update(generated_features) # features.update(buckets_dict) return features
def train(): # Initialize torch.distributed init_distributed() print_rank_0('AutoMP: training ParallelTransformerLayer...') batch_size = args.batch_size sequence_length = args.sequence_length hidden_size = args.hidden_size vocab_size = args.vocab_size hidden_dropout = args.hidden_dropout attention_dropout = args.attention_dropout num_layers = args.num_layers layernorm_epsilon = args.layernorm_epsilon num_attention_heads = args.num_attention_heads input_indices = torch.randint(low=0, high=vocab_size, size=(batch_size, sequence_length)) input_indices = input_indices.to(torch.cuda.current_device()) labels = torch.randint(low=0, high=vocab_size, size=(batch_size, sequence_length)) labels = labels.to(torch.cuda.current_device()) position_indices = torch.tile(torch.arange(start=0, end=sequence_length), (batch_size, 1)) position_indices = position_indices.to(torch.cuda.current_device()) def init_method_normal(tensor): return torch.nn.init.normal_(tensor, mean=0.0, std=1.0) def gpt2_attention_mask_func(attention_scores, ltor_mask): attention_scores.masked_fill_(ltor_mask, -10000.0) return attention_scores def init_method_normal(tensor): return torch.nn.init.normal_(tensor, mean=0.0, std=1.0) embedding = Embedding(hidden_size=hidden_size, vocab_size=vocab_size, max_sequence_length=sequence_length, embedding_dropout_prob=hidden_dropout, init_method=init_method_normal) embedding_output = embedding.forward(input_indices, position_indices) transformer_layer = ParallelTransformerLayer( attention_mask_func=gpt2_attention_mask_func, layer_number=0, hidden_size=hidden_size, layernorm_epsilon=layernorm_epsilon, num_attention_heads=num_attention_heads, attention_dropout=attention_dropout, hidden_dropout=hidden_dropout) # attention_mask, loss_mask, position_ids = get_ltor_masks_and_position_ids(input_indices, vocab_size - 1) attention_mask = (torch.randint( low=0, high=2, size=(sequence_length, divide(num_attention_heads, torch.distributed.get_world_size()), batch_size, batch_size)) < 0).cuda() optimizer = torch.optim.SGD(transformer_layer.parameters(), lr=0.01) profiler = Profiler(os.path.join('benchmark', args.exp_name)) num_epochs = 5 tot_time = 0 nproc = torch.distributed.get_world_size() for epoch in range(num_epochs): input_ = torch.rand(size=embedding_output.size()).cuda() overall_name = f'transformer_layer_np-{nproc}_hs-{hidden_size}_nah-{num_attention_heads}_bsz-{batch_size}' profiler.start(overall_name) fname = f'transformer_layer_forward_np-{nproc}_hs-{hidden_size}_nah-{num_attention_heads}_bsz-{batch_size}' # Forward pass profiler.start(fname) loss = transformer_layer.forward(input_, attention_mask) train_loss = torch.mean(loss) # print(train_loss) torch.cuda.synchronize() profiler.stop(fname) # Backward pass bname = f'transformer_layer_backward_np-{nproc}_hs-{hidden_size}_nah-{num_attention_heads}_bsz-{batch_size}' profiler.start(bname) optimizer.zero_grad() train_loss.backward() optimizer.step() torch.cuda.synchronize() profiler.stop(bname) profiler.stop(overall_name)
""" # this works if format of file names in a COO_folder is not like chr[chr_num].txt but we should care about sequence of filenames to be same in # folder of high resolution files and folder of low resolution files chr_files_list = [f for f in os.listdir(COO_folder_path) if (not f.startswith('.')) & (not f.endswith('_npy_form_tmp.npy'))] chr_num_list = [] for f in chr_files_list: m = re.search('chr(\d+|x)', f, re.IGNORECASE) chr_num_list.append(int(m.group(1))) """ frames_data = [] index_data = [] for i in range(args['min_chrN'], args['max_chrN'] + 1): chr_COO_file_name = "chr" + str(i) + ".txt" chr_data_path = os.path.join(COO_folder_path, chr_COO_file_name) temp_frames, temp_index = utils.divide(chr_data_path, i, args['resolution'], args['genome_type'], args['COO_format']) frames_data.extend(temp_frames) index_data.extend(temp_index) print("chr" + str(i) + " is done!") frames_data = np.stack(frames_data, axis=0) index_data = np.stack(index_data, axis=0) if not os.path.exists(args['output_folder_path']): os.makedirs(args['output_folder_path']) np.save( os.path.join(args['output_folder_path'], args['output_file_name'] + ".npy"), frames_data) np.save( os.path.join(args['output_folder_path'], args['output_file_name'] + "-index.npy"), index_data)
delimiter = opt.delimiter expRes = 10000 ## need to make resolution adjustable. length = chrs_length[chrN - 1] / expRes # divide the input matrix into sub-matrixes. inputMatrix = utils.readFiles(input_file, length + 1, expRes, delimiter) print("inputMatrix is symmetric?") print(is_symmetric(inputMatrix)) compareMatrix = utils.readFiles(compare_matrix, length + 1, expRes, delimiter) print("compareMatrix is symmetric?") print(is_symmetric(compareMatrix)) low_resolution_samples, index = utils.divide(inputMatrix, chrN) low_resolution_samples = np.minimum( HiC_max_value, low_resolution_samples ) # why use HiC_max_value, in this way, low_resolution_samples will not change. batch_size = low_resolution_samples.shape[0] #256 # batch_size=256 print("batch_size:", batch_size) # Reshape the high-quality Hi-C sample as the target value of the training. sample_size = low_resolution_samples.shape[-1] padding = conv2d1_filters_size + conv2d2_filters_size + conv2d3_filters_size - 3 half_padding = padding / 2 output_length = sample_size - padding
def test_process(self): numSets = [[1,-1,2,4,3.2, 4.1], [1], [-1,2]] for nums in numSets: self.assertAlmostEquals(self.node._processReturn(nums, 1), utils.divide(nums)) self.assertTimeIndependent(nums)
def count_cooc_over_min_freq(self, list, cache=False): cooc, frequencies = self.count_cooc_and_freq(list, cache) return utils.divide(cooc, min(frequencies))
def generate_diff_player_shots_closest_defender( self, player_daily_shots_closest_defender, player_total_shots_closest_defender): player_daily_shots_closest_defender = player_daily_shots_closest_defender[ ['player_id', 'fg3m', 'fg3a', 'def_dist']] player_total_shots_closest_defender = player_total_shots_closest_defender[ ['player_id', 'fg3m', 'fg3a', 'def_dist']] # Tight player_daily_shots_closest_defender_tight = player_daily_shots_closest_defender[ (player_daily_shots_closest_defender['def_dist'] == '0-2 Feet - Very Tight') | (player_daily_shots_closest_defender['def_dist'] == '2-4 Feet - Tight')].drop('def_dist', axis=1).groupby(['player_id' ]).sum().reset_index() player_daily_shots_closest_defender_tight[ 'fg3_pct'] = player_daily_shots_closest_defender_tight.apply( lambda row: divide(row.fg3m, row.fg3a), axis=1) player_daily_shots_closest_defender_tight.columns = [ str(col) + '_daily_tight' if col != 'player_id' else col for col in player_daily_shots_closest_defender_tight ] player_total_shots_closest_defender_tight = player_total_shots_closest_defender[ (player_total_shots_closest_defender['def_dist'] == '0-2 Feet - Very Tight') | (player_total_shots_closest_defender['def_dist'] == '2-4 Feet - Tight')].drop('def_dist', axis=1).groupby(['player_id' ]).sum().reset_index() player_total_shots_closest_defender_tight[ 'fg3_pct'] = player_total_shots_closest_defender_tight.apply( lambda row: divide(row.fg3m, row.fg3a), axis=1) player_total_shots_closest_defender_tight.columns = [ str(col) + '_total_tight' if col != 'player_id' else col for col in player_total_shots_closest_defender_tight ] merged_df_tight = player_daily_shots_closest_defender_tight.merge( player_total_shots_closest_defender_tight, how='inner', on='player_id') merged_df_tight['fg3m_diff_tight'] = merged_df_tight.apply( lambda row: row.fg3m_daily_tight - row.fg3m_total_tight, axis=1) merged_df_tight['fg3a_diff_tight'] = merged_df_tight.apply( lambda row: row.fg3a_daily_tight - row.fg3a_total_tight, axis=1) merged_df_tight['fg3_pct_diff_tight'] = merged_df_tight.apply( lambda row: row.fg3_pct_daily_tight - row.fg3_pct_total_tight, axis=1) # Open player_daily_shots_closest_defender_open = player_daily_shots_closest_defender[ (player_daily_shots_closest_defender['def_dist'] == '4-6 Feet - Open') | (player_daily_shots_closest_defender['def_dist'] == '6+ Feet - Wide Open')].drop('def_dist', axis=1).groupby( ['player_id']).sum().reset_index() player_daily_shots_closest_defender_open[ 'fg3_pct'] = player_daily_shots_closest_defender_open.apply( lambda row: divide(row.fg3m, row.fg3a), axis=1) player_daily_shots_closest_defender_open.columns = [ str(col) + '_daily_open' if col != 'player_id' else col for col in player_daily_shots_closest_defender_open ] player_total_shots_closest_defender_open = player_total_shots_closest_defender[ (player_total_shots_closest_defender['def_dist'] == '4-6 Feet - Open') | (player_total_shots_closest_defender['def_dist'] == '6+ Feet - Wide Open')].drop('def_dist', axis=1).groupby( ['player_id']).sum().reset_index() player_total_shots_closest_defender_open[ 'fg3_pct'] = player_total_shots_closest_defender_open.apply( lambda row: divide(row.fg3m, row.fg3a), axis=1) player_total_shots_closest_defender_open.columns = [ str(col) + '_total_open' if col != 'player_id' else col for col in player_total_shots_closest_defender_open ] merged_df_open = player_daily_shots_closest_defender_open.merge( player_total_shots_closest_defender_open, how='inner', on='player_id') merged_df_open['fg3m_diff_open'] = merged_df_open.apply( lambda row: row.fg3m_daily_open - row.fg3m_total_open, axis=1) merged_df_open['fg3a_diff_open'] = merged_df_open.apply( lambda row: row.fg3a_daily_open - row.fg3a_total_open, axis=1) merged_df_open['fg3_pct_diff_open'] = merged_df_open.apply( lambda row: row.fg3_pct_daily_open - row.fg3_pct_total_open, axis=1) return merged_df_tight.merge(merged_df_open, how='inner', on='player_id')