def for_each_facility(self, data, tracer, previous_cycle_data=None): df1_records = filter_consumption_records(data, tracer.extras[DF1]) df2_records = filter_consumption_records(data, tracer.extras[DF2]) df1_count = len(df1_records) df2_count = len(df2_records) df1_values = values_for_records(tracer.extras.get(FIELDS, []), df1_records) df2_values = values_for_records(tracer.extras.get(FIELDS, []), df2_records) all_df1_fields_are_blank = pydash.every( df1_values, lambda x: x is None ) and len( df1_values ) > 0 all_df2_fields_are_blank = pydash.every( df2_values, lambda x: x is None ) and len( df2_values ) > 0 sum_df1 = pydash.chain(df1_values).reject(lambda x: x is None).map( float ).sum().value() sum_df2 = pydash.chain(df2_values).reject(lambda x: x is None).map( float ).sum().value() if df1_count == 0 or df2_count == 0: return NOT_REPORTING if all_df1_fields_are_blank or all_df2_fields_are_blank: result = NO elif (sum_df2 == 0 and sum_df1 == 0) or ( sum_df2 != 0 and 0.7 < abs(sum_df1 / sum_df2) < 1.429 ): result = YES else: result = NO return result
def for_each_facility(self, data, combination, other_cycle_data={}): ratio = combination.get(RATIO) df1_records = filter_consumption_records(data, combination[DF1]) df2_records = filter_consumption_records(data, combination[DF2]) other_records = filter_consumption_records(data, combination.get(OTHER, [])) df1_count = len(df1_records) df2_count = len(df2_records) + len(other_records) df1_values = values_for_records(combination[FIELDS], df1_records) df2_values = values_for_records(combination[FIELDS], df2_records) other_values = values_for_records(combination[FIELDS], other_records) sum_df1 = pydash.chain(df1_values).reject(lambda x: x is None).map(float).sum().value() sum_df2 = pydash.chain(df2_values).reject(lambda x: x is None).map(float).sum().value() other_sum = pydash.chain(other_values).reject(lambda x: x is None).map(float).sum().value() all_df1_fields_are_blank = pydash.every(df1_values, lambda x: x is None) and len(df1_values) > 0 b1 = pydash.every(df2_values, lambda x: x is None) and len(df2_values) > 0 b2 = pydash.every(other_values, lambda x: x is None) and len(other_values) > 0 all_df2_fields_are_blank = b1 and b2 adjusted_sum_df1 = sum_df1 / ratio numerator = adjusted_sum_df1 denominator = (sum_df2 / ratio) + other_sum if df1_count == 0 or df2_count == 0: return NOT_REPORTING if all_df1_fields_are_blank or all_df2_fields_are_blank: result = NO elif (sum_df2 == 0 and sum_df1 == 0) or (denominator != 0 and 0.7 < abs(numerator / denominator) < 1.429): result = YES else: result = NO return result
def start_thread_save_db(self, msg_list): taskdao = TaskDAO() # 多线程将缓存保存到数据库 def start_thread(x): batch = Utils.get_batch(x) thread_key = taskdao.thread_key.format(batch) if not self.aios_redis.get(thread_key): threading.Thread(target=taskdao.save_to_db, args=(g.user_id, g.tenant_id, batch), daemon=True).start() self.aios_print('启动定时调度线程', batch) self.aios_redis.set(thread_key, 'Running', taskdao.THREAD_KEY_TIMEOUT) if self.aios_redis.ttl(thread_key) < 10: threading.Thread(target=taskdao.save_to_db, args=(g.user_id, g.tenant_id, batch), daemon=True).start() self.aios_print('线程已存在,失效时间快到了,重新启动定时调度线程', batch) self.aios_redis.set(thread_key, 'Running', taskdao.THREAD_KEY_TIMEOUT) else: self.aios_print('线程已存在,不需要开启', batch) # 启动多个线程 _.chain(msg_list). \ map_(lambda x: Utils.get_host(x.get('dir_path'))). \ uniq(). \ for_each(start_thread). \ value()
def test_chaining_invalid_method(): raised = False try: _.chain([]).foobar except _.InvalidMethod: raised = True assert raised
def notify_thread_stop(self, msg_list): taskdao = TaskDAO() def stop_thread(x): batch = Utils.get_batch(x) thread_key = taskdao.thread_key.format(batch) self.aios_redis.set(thread_key, 'Stop') _.chain(msg_list). \ map_(lambda x: Utils.get_host(x.get('dir_path'))). \ uniq(). \ for_each(stop_thread). \ value()
def for_each_facility(self, data, combination, previous_cycle_data=None): ratio = combination[RATIO] df1_records = filter_consumption_records(data, combination[DF1]) df2_records = filter_consumption_records(data, combination[DF2]) df1_count = len(df1_records) df2_count = len(df2_records) df1_values = values_for_records(combination[FIELDS], df1_records) df2_values = values_for_records(combination[FIELDS], df2_records) sum_df1 = pydash.chain(df1_values).reject(lambda x: x is None).sum().value() sum_df2 = pydash.chain(df2_values).reject(lambda x: x is None).sum().value() all_df1_fields_are_blank = pydash.every(df1_values, lambda x: x is None) all_df2_fields_are_blank = pydash.every(df2_values, lambda x: x is None) return calculate_score(df1_count, df2_count, sum_df1, sum_df2, ratio, all_df1_fields_are_blank, all_df2_fields_are_blank, facility_not_reporting(data))
def download_majors(username, password): # Create target URL college_pages = {} parameters = copy.copy(URL_PARAMS) url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) parameters['changeCollege'] = 'Next' parameters['call'] = '4' # Get viewstate s = cmu_auth.authenticate(url, username, password) for college in COLLEGES: parameters['college'] = college url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) export_page = s.get(url).content soup = bs4.BeautifulSoup(export_page) # _.map_(soup.select('option'),lambda x:print(_.strip_tags(x).split(' in '))) college_pages[college] = _.chain( soup.select('option')).map(lambda x: _.strip_tags(x)).map( lambda x: { 'name': x, 'department': x.split(' in ')[1], 'type': x.split(' in ')[0] }).value() return {'data': college_pages, 'auth': s}
def create_data_words(corpus_file): train_corpus = [] train_corpus = corpus.unpickle_data(corpus_file) # Convert to list data = pydash.chain(train_corpus)\ .flatten().map(lambda x: x["open_search"])\ .value() del train_corpus # Corpus prepreocessing # Mail data = [re.sub('\S*@\S*\s?', '', sent) for sent in data] # Line chars data = [re.sub('\s+', ' ', sent) for sent in data] # Single quotes data = [re.sub("\'", "", sent) for sent in data] # using nltk data_words = list(sent_to_words(data)) return data_words
def extract_quantity(strings: List[str], safe_units=None) -> QuantityField: extracted_strings = [] for string in strings: context = extract_numbers_with_context(string) extracted_numbers = ( pydash.chain(context).map(extract_units_from_number_context).value() ) extracted_numbers = handle_multipliers(extracted_numbers) extracted_strings.append(extracted_numbers) size = {} pieces = {} for string in extracted_strings: for (i, number) in enumerate(x for x in string if x): unit = number.get("unit") unit = alt_unit_map[unit] if unit in alt_unit_map.keys() else unit unit = extract_unit(unit) if unit["type"] not in (unit_types.QUANTITY, unit_types.PIECE): continue if unit["symbol"] in quantity_units: if safe_units and unit["si"]["symbol"] not in safe_units: continue size_value = number.get("value") size_amount = dict(min=size_value, max=size_value) size = dict(unit=unit, amount=size_amount) result = dict(size=size, pieces=dict()) elif unit["symbol"] in piece_units: pieces_value = number.get("value") pieces_amount = dict(min=pieces_value, max=pieces_value) pieces = dict(unit=unit, amount=pieces_amount) result = dict(size=size, pieces=pieces) return result
def __parse_query(self, query: dict) -> dict: """Creates the string_query property for earch query that is manageable by requests. The queries should have at least defiend the field to search Args: queries (dict): [queries to convert] Returns: List[str]: [string requests] """ for opt, _def in self.VALID_OPTIONS.items(): if opt not in query and not _def["default"]: msg = f"The field {opt} has to de defined" log.error(msg) raise Exception(msg) else: query[opt] = query.get(opt, _def["default"]) fls = query["fields"] query["str_query"] = f' {query["join_op"]} '.join( pydash.chain(fls.keys()).map( lambda key: [f'{key}:"{val}"' for val in fls[key]]).reduce( lambda x, y: x + y, []).value()) return query
async def color_command(self, ctx: Context, color): """ Control your Discord color. """ color_choice_string = ', '.join(k for k in COLOR_ROLES) color = pydash.chain(ctx.message.content) \ .replace(ctx.prefix + 'mycolor', '') \ .trim() \ .title_case() \ .value() role_id = COLOR_ROLES.get(color, None) server = self.bot.get_server(ZEN_SERVER) if role_id is None: await self.bot.say('Try again with one of these colors' f': {color_choice_string}') return new_roles = [r for r in ctx.message.author.roles] + \ [self.get_role(server, role_id)] new_roles = pydash.uniq(new_roles) await self.bot.replace_roles( ctx.message.author, *[ r for r in new_roles if r.name not in COLOR_ROLES or r.id == role_id ])
def get_previous_series(self): """Return last series that has been closed. This is useful for staff to get what needs to be worked with easily.""" return (py_.chain(list(self.prefetch_series( ))).filter(lambda s: not s.accepts_solution_submissions).sort( key=attrgetter("submission_deadline")).reverse().head().value())
def get_manuscript_line(self, manuscript_id: int) -> ManuscriptLine: manuscript_line = (pydash.chain(self.variants).map_( lambda variant: variant.get_manuscript_line(manuscript_id)).reject( pydash.is_none).head().value()) if manuscript_line is None: raise ValueError(f"No line foun for mauscript {manuscript_id}.") else: return manuscript_line
def upload(filename='default'): file = request.data filename = request.args.get('filename') old_task_id = request.args.get('old_task_id') # 删除旧的文件 def remove_file(f): if os.path.exists(os.path.join('upload', f)): os.remove(os.path.join('upload', f)) if old_task_id: _.chain(os.listdir('upload')).filter_(lambda x: x.startswith( old_task_id)).for_each(lambda x: remove_file(x)).value() with open(os.path.join('upload', filename), 'wb') as f: f.write(file) return 'ok'
def outlier_filter(arr): q1, q3 = numpy.percentile(arr, [25, 75]) iqr = q3 - q1 maxbound = q3 + (iqr * 2) minbound = q1 - (iqr * 2) filtered = pydash.chain(arr).filter_(lambda x: x < maxbound).filter_( lambda x: x > minbound).value() return filtered
def make_spreadsheets(self, guild: str, char_ids: List[int], stat_opts: List[str]) -> List[pd.DataFrame]: def relic(unit_data: dict) -> int: r_level = int(unit_data["relic_tier"]) - 2 return r_level * (r_level > 0) def def_processor(sheet: pd.DataFrame) -> pd.DataFrame: sheet = sheet.fillna(0) data_cols = sheet.columns[1:] temp = sheet[data_cols].astype("int32") sheet[data_cols] = temp return sheet REPORT_MAPPER = { "pg": lambda x: x["power"] or 0, "relic": relic, } SHEET_PROCESSOR = { "pg": def_processor, "relic": def_processor, } players_stats = self.swgoh.get_guild_players(guild) base_ids = self.chars[self.chars["id"].isin( char_ids)]["base_id"].unique().tolist() mrlobot_sheets = [] unit_names = { x["base_id"]: x["name"] for x in self.alias_char.values() } unit_names["PLAYER"] = "PLAYER" for stat in stat_opts: player_stats = [] for pstats in players_stats: name = pstats["data"].get("name") unit_stats = pydash.chain(pstats["units"])\ .map(lambda x: x["data"])\ .filter(lambda x: x["base_id"] in base_ids)\ .map(lambda x: (x["base_id"], REPORT_MAPPER.get(stat, "")(x)))\ .value() unit_stats = dict(unit_stats) unit_stats["PLAYER"] = name player_stats.append(unit_stats) temp = pd.DataFrame(player_stats, columns=["PLAYER", *base_ids]) temp.columns = temp.columns.to_series().map(unit_names) mrlobot_sheets.append((stat, SHEET_PROCESSOR[stat](temp))) return mrlobot_sheets
def multi_process_handler(self, msg_list): '''多进程处理文件合并消息 ''' try: from app import aios_redis self.aios_redis = aios_redis # 启动线程做定时保存缓存到数据库 # self.start_thread_save_db(msg_list) # 区分出不需要合并的分片和需要合并的分片 # 不需要合并的分片直接改名即可 single_chunk_files = _.remove( msg_list, lambda x: x.get('curr_chunk') == 1 and x.get( 'total_chunks') == 1) multi_chunk_files = msg_list succ_list = [] err_list = [] sp_file_handler = SubprocessFileHandler() if len(single_chunk_files): _succ_list, _err_list = sp_file_handler.single_file_handler( single_chunk_files) succ_list.extend(_succ_list) err_list.extend(_err_list) if len(multi_chunk_files): default_limit = 4 cpus = min(len(multi_chunk_files), cpu_count(), default_limit) with Pool(processes=cpus) as pool: result = pool.map(sp_file_handler.multi_file_handler, multi_chunk_files) succ_list.extend( _.chain(result).filter_(lambda x: x[0] is not None). map_(lambda x: x[0]).value()) err_list.extend( _.chain(result).filter_(lambda x: x[1] is not None). map_(lambda x: x[1]).value()) return succ_list, err_list except Exception as err: print('❌multi_process_handler❌', err) return [], [{ 'file_key': i['file_key'], 'curr_chunk': i['curr_chunk'] } for i in msg_list]
def get(self, request): all_values = [] distinct_values_from_consumption = Consumption.objects.order_by( ).values_list("formulation").distinct() all_values.extend(distinct_values_from_consumption) return Response({ "values": pydash.chain(all_values).flatten().uniq().sort().value() })
def get(self, request): all_values = [] distinct_values_from_paed = PAEDPatientsRecord.objects.order_by( ).values_list("formulation").distinct() all_values.extend(distinct_values_from_paed) return Response({ "values": pydash.chain(all_values).flatten().uniq().sort().value() })
def chain(self): """Return pydash chaining instance with items returned by :meth:`all`. See Also: `pydash's <http://pydash.readthedocs.org/>`_ documentation on `chaining <http://pydash.readthedocs.org/en/latest/chaining.html>`_ """ return pyd.chain(self.all())
def orderings(n_range, n): combinations = itertools.combinations(n_range, n) permutations = ( _.chain(combinations) .map_(lambda x: itertools.permutations(x)) .reduce_(lambda x, y: list(x) + list(y)) .value() ) return permutations
def sort_and_resolve_urls(words, word_index, ranks, doc_list): # flatten, pluck and remove duplicates from the results list url_ids = _.chain(doc_list).pluck('doc_id_list').flatten().pluck('doc_id').uniq().value() # resolve all the doc ids to their respective document information results = [doc for doc in db.doc_index.find({"doc_id": {"$in": url_ids}})] # boost page rank as appropriate ranks = boost_page_rank(ranks, word_index, results, doc_list) # sort and return the results return sorted(results, reverse=True, key=lambda doc: ranks[doc['doc_id']])
def test_chaining_methods(): chain = _.chain([]) for method in dir(_): if not callable(method): continue chained = getattr(chain, method) assert chained.method is getattr(_, method)
def test_chaining(value, methods): expected = deepcopy(value) actual = _.chain(deepcopy(value)) for method, args in methods: expected = getattr(_, method)(expected, *args) actual = getattr(actual, method)(*args) assert actual.value() == expected
def for_each_facility(self, data, tracer, other_cycle_data={}): ratio = tracer.extras.get(RATIO) df1_records = filter_consumption_records(data, tracer.extras[DF1]) df2_records = filter_consumption_records(data, tracer.extras[DF2]) other_records = filter_consumption_records(data, tracer.extras.get(OTHER, [])) df1_count = len(df1_records) df2_count = len(df2_records) + len(other_records) df1_values = values_for_records(tracer.extras[FIELDS], df1_records) df2_values = values_for_records(tracer.extras[FIELDS], df2_records) other_values = values_for_records(tracer.extras[FIELDS], other_records) sum_df1 = pydash.chain(df1_values).reject(lambda x: x is None).map( float ).sum().value() sum_df2 = pydash.chain(df2_values).reject(lambda x: x is None).map( float ).sum().value() other_sum = pydash.chain(other_values).reject(lambda x: x is None).map( float ).sum().value() all_df1_fields_are_blank = pydash.every( df1_values, lambda x: x is None ) and len( df1_values ) > 0 b1 = pydash.every(df2_values, lambda x: x is None) and len(df2_values) > 0 b2 = pydash.every(other_values, lambda x: x is None) and len(other_values) > 0 all_df2_fields_are_blank = b1 and b2 adjusted_sum_df1 = sum_df1 / ratio numerator = adjusted_sum_df1 denominator = (sum_df2 / ratio) + other_sum if df1_count == 0 or df2_count == 0: return NOT_REPORTING if all_df1_fields_are_blank or all_df2_fields_are_blank: result = NO elif (sum_df2 == 0 and sum_df1 == 0) or ( denominator != 0 and 0.7 < abs(numerator / denominator) < 1.429 ): result = YES else: result = NO return result
def parse_consumption_records(data_import_records): return pydash.chain(data_import_records).filter(by_type(CONSUMPTION_REPORT)).map( lambda item: item.build_consumption_record() ).group_by( lambda item: item.regimen_location ).values().map( sum_records ).group_by( lambda item: item.location ).value()
def sort_and_resolve_urls(words, word_index, ranks, doc_list): # flatten, pluck and remove duplicates from the results list url_ids = _.chain(doc_list).pluck('doc_id_list').flatten().pluck( 'doc_id').uniq().value() # resolve all the doc ids to their respective document information results = [doc for doc in db.doc_index.find({"doc_id": {"$in": url_ids}})] # boost page rank as appropriate ranks = boost_page_rank(ranks, word_index, results, doc_list) # sort and return the results return sorted(results, reverse=True, key=lambda doc: ranks[doc['doc_id']])
def test_word_cycle(case, expected): actual = (_.chain(case) .camel_case() .kebab_case() .snake_case() .start_case() .camel_case() .value()) assert actual == expected
def parse_adult_records(data_import_records): return pydash.chain(data_import_records).filter(by_type(ADULT_PATIENT_REPORT)).map( lambda item: item.build_patient_record() ).group_by( lambda item: item.regimen_location ).values().map( sum_records ).group_by( lambda item: item.location ).value()
def problem1(): lines = read_input() print(lines) ans = pydash.chain(lines) \ .map(lambda x: x.replace("+", "")) \ .map(lambda x: int(x)) \ .reduce(lambda a,b: a+b) \ .value() print(ans)
def for_each_facility(self, data, combination, previous_cycle_data=None): ratio = combination[RATIO] df1_records = filter_consumption_records(data, combination[DF1]) df2_records = filter_consumption_records(data, combination[DF2]) df1_count = len(df1_records) df2_count = len(df2_records) df1_values = values_for_records(combination[FIELDS], df1_records) df2_values = values_for_records(combination[FIELDS], df2_records) sum_df1 = pydash.chain(df1_values).reject( lambda x: x is None).sum().value() sum_df2 = pydash.chain(df2_values).reject( lambda x: x is None).sum().value() all_df1_fields_are_blank = pydash.every(df1_values, lambda x: x is None) all_df2_fields_are_blank = pydash.every(df2_values, lambda x: x is None) return calculate_score(df1_count, df2_count, sum_df1, sum_df2, ratio, all_df1_fields_are_blank, all_df2_fields_are_blank, facility_not_reporting(data))
def for_each_facility(self, data, combination, previous_cycle_data=None): df1_records = filter_consumption_records(data, combination[DF1]) df2_records = filter_consumption_records(data, combination[DF2]) df1_count = len(df1_records) df2_count = len(df2_records) df1_values = values_for_records(combination.get(FIELDS, []), df1_records) df2_values = values_for_records(combination.get(FIELDS, []), df2_records) all_df1_fields_are_blank = pydash.every(df1_values, lambda x: x is None) and len(df1_values) > 0 all_df2_fields_are_blank = pydash.every(df2_values, lambda x: x is None) and len(df2_values) > 0 sum_df1 = pydash.chain(df1_values).reject(lambda x: x is None).map(float).sum().value() sum_df2 = pydash.chain(df2_values).reject(lambda x: x is None).map(float).sum().value() if df1_count == 0 or df2_count == 0: return NOT_REPORTING if all_df1_fields_are_blank or all_df2_fields_are_blank: result = NO elif (sum_df2 == 0 and sum_df1 == 0) or (sum_df2 != 0 and 0.7 < abs(sum_df1 / sum_df2) < 1.429): result = YES else: result = NO return result
def calculate_patient_totals(patient_records): patient_totals = [] total = 0 for pr in patient_records: entry = {COLUMN: pr.formulation} values = values_for_models([NEW, EXISTING], [pr]) sum = pydash.chain(values).reject(lambda x: x is None).sum().value() entry[VALUE] = sum total += int(sum) patient_totals.append(entry) patient_totals.append({COLUMN: TOTAL, VALUE: total, IS_HEADER: True}) return patient_totals
def analysis(infile, outfile): openedfile = open("./data/repos/"+infile, "r") name = infile[11:-5] jsonstring = "" readfile = openedfile.readlines() for string in readfile: jsonstring += string data = json.loads(jsonstring) repos = len(data) sizes = pydash.chain(data).filter_(lambda x: x['size'] != None).pluck('size').value() avg_size = int(sum(sizes)/len(sizes)) languages = pydash.chain(data).filter_(lambda x: x['language'] != None).sort_by('language').pluck('language').value() stars = pydash.chain(data).filter_(lambda x: x['stargazers_count'] != None).pluck('stargazers_count').sum().value() watchers = pydash.chain(data).filter_(lambda x: x['watchers'] != None).pluck('watchers').sum().value() forks = pydash.chain(data).filter_(lambda x: x['forks_count'] != None).pluck('forks_count').sum().value() forked_from= pydash.chain(data).pluck('fork').map_(lambda x: 1 if x else 0).sum().value() def most_common(lst): return max(set(lst), key=lst.count) if languages: fav_lang = most_common(languages) else: fav_lang = "None" results = [str(name), str(repos), str(forked_from), str(forks), str(watchers), str(stars), fav_lang, str(avg_size)+'\n'] results = ",".join(results) print(results) resultsfile.write(results) openedfile.close()
def main(): user_list = [] with open("./data/github-users.csv") as f: reader = csv.reader(f) for row in reader: user_list.append(row[0]) user_list = pydash.chain(user_list).uniq().sort().value() for user in user_list: repos = get_repositories(user) store_repositories(user, repos)
def calculate_consumption_totals(check_combination, consumption_records): totals = [] total = 0 for consumption in consumption_records: entry = {COLUMN: consumption.formulation} values = values_for_models(check_combination.get(FIELDS, []), [consumption]) caluclated_sum = pydash.chain(values).reject(lambda x: x is None).sum().value() reduced_sum = caluclated_sum / check_combination[RATIO] entry[VALUE] = reduced_sum total += reduced_sum totals.append(entry) totals.append({COLUMN: TOTAL, VALUE: total, IS_HEADER: True}) return totals
def _get_manuscript_text_lines( self, manuscript: Manuscript ) -> Sequence[TextLineEntry]: def create_entry(line: Line, index: int) -> Optional[TextLineEntry]: text_line = line.get_manuscript_text_line(manuscript.id) return text_line and TextLineEntry(text_line, index) return ( pydash.chain(self.lines) .map_(create_entry) .reject(pydash.is_none) .concat([TextLineEntry(line, None) for line in manuscript.text_lines]) .value() )
def train_model(): nlp = spacy.load("en_core_web_sm") sentences = flatten([item["patterns"] for item in intents_json]) sentences = [nlp(sentence) for sentence in sentences] sentences = [lemmatize_sentence(sentence) for sentence in sentences] bag_of_words = chain(sentences).flatten().uniq().sort().value() intents = [item["patterns"] for item in intents_json] X = [ sentence_to_feature_vector(sentence, bag_of_words) for sentence in sentences ] y = [] for idx, patterns in enumerate(intents): for pattern in patterns: entry = list(np.zeros(len(intents))) entry[idx] = 1 y.append(entry) indexes = [i for i, x in enumerate(sentences) if is_empty(x)] for index in indexes: del X[index] del y[index] model = Sequential() model.add(Dense(64, input_shape=(len(X[0]), ), activation="relu")) model.add(Dropout(0.5)) model.add(Dense(32, activation="relu")) model.add(Dropout(0.5)) model.add(Dense(len(y[0]), activation="softmax")) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["accuracy"]) # fitting and saving the model hist = model.fit(np.array(X), np.array(y), epochs=200, batch_size=5, verbose=1) pickle.dump(bag_of_words, open(f"{MODEL_FOLDER}/words.pkl", "wb")) pickle.dump(intents_json, open(f"{MODEL_FOLDER}/intents.pkl", "wb")) model.save(f"{MODEL_FOLDER}/chatbot.h5", hist)
def main(): user_list = [] with open("./data/gitCU-users.txt") as f: reader = csv.reader(f) next(reader) # Skip the first line header = True; for row in reader: if(header): header = False else: user_list.append(row[0]) user_list = pydash.chain(user_list).uniq().sort().value() for user in user_list: repos = get_repositories(user) store_repositories(user, repos)
def test_chaining_lazy(): tracker = {'called': False} def interceptor(value): tracker['called'] = True return value.pop() chain = _.chain([1, 2, 3, 4, 5]).initial().tap(interceptor) assert not tracker['called'] chain = chain.last() assert not tracker['called'] result = chain.value() assert tracker['called'] assert result == 3
def test_chaining_plant(): value = [1, 2, 3, 4] square_sum1 = _.chain(value).power(2).sum() def root_value(wrapper): if isinstance(wrapper._value, _.chaining.ChainWrapper): return root_value(wrapper._value) return wrapper._value assert root_value(square_sum1._value) == value test_value = [5, 6, 7, 8] square_sum2 = square_sum1.plant(test_value) assert root_value(square_sum1._value) == value assert root_value(square_sum2._value) == test_value assert square_sum1.value() == 30 assert square_sum2.value() == 174
def series_phrase(items, separator=", ", last_separator=" and ", serial=False): """Join items into a grammatical series phrase, e.g., ``"item1, item2, item3 and item4"``. Args: items (list): List of string items to join. separator (str, optional): Item separator. Defaults to ``', '``. last_separator (str, optional): Last item separator. Defaults to ``' and '``. serial (bool, optional): Whether to include `separator` with `last_separator` when number of items is greater than 2. Defaults to ``False``. Returns: str: Joined string. Example: Example: >>> series_phrase(['apples', 'bananas', 'peaches']) 'apples, bananas and peaches' >>> series_phrase(['apples', 'bananas', 'peaches'], serial=True) 'apples, bananas, and peaches' >>> series_phrase(['apples', 'bananas', 'peaches'], '; ', ', or ') 'apples; bananas, or peaches' .. versionadded:: 3.0.0 """ items = pyd.chain(items).map(pyd.to_string).compact().value() item_count = len(items) separator = pyd.to_string(separator) last_separator = pyd.to_string(last_separator) if item_count > 2 and serial: last_separator = separator.rstrip() + last_separator if item_count >= 2: items = items[:-2] + [last_separator.join(items[-2:])] return separator.join(items)
def human_case(text): """Converts `text` to human case which has only the first letter capitalized and each word separated by a space. Args: text (str): String to convert. Returns: str: String converted to human case. Example: >>> human_case('abc-def_hij lmn') 'Abc def hij lmn' >>> human_case('user_id') 'User' .. versionadded:: 3.0.0 """ return pyd.chain(text).snake_case().re_replace("_id$", "").replace("_", " ").capitalize().value()
def download_majors(username, password): # Create target URL college_pages={} parameters = copy.copy(URL_PARAMS) url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) parameters['changeCollege']='Next' parameters['call']='4' # Get viewstate s = cmu_auth.authenticate(url, username, password) for college in COLLEGES: parameters['college']=college url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) export_page=s.get(url).content soup = bs4.BeautifulSoup(export_page) # _.map_(soup.select('option'),lambda x:print(_.strip_tags(x).split(' in '))) college_pages[college]=_.chain(soup.select('option')).map(lambda x: _.strip_tags(x)).map(lambda x: {'name':x,'department':x.split(' in ')[1], 'type':x.split(' in ')[0] }).value() return {'data':college_pages,'auth':s }
def url(*paths, **params): """Combines a series of URL paths into a single URL. Optionally, pass in keyword arguments to append query parameters. Args: paths (str): URL paths to combine. Keyword Args: params (str, optional): Query parameters. Returns: str: URL string. Example: >>> link = url('a', 'b', ['c', 'd'], '/', q='X', y='Z') >>> path, params = link.split('?') >>> path == 'a/b/c/d/' True >>> set(params.split('&')) == set(['q=X', 'y=Z']) True .. versionadded:: 2.2.0 """ paths = pyd.chain(paths).flatten_deep().map(pyd.to_string).value() paths_list = [] params_list = flatten_url_params(params) for path in paths: scheme, netloc, path, query, fragment = urlsplit(path) query = parse_qsl(query) params_list += query paths_list.append(urlunsplit((scheme, netloc, path, "", fragment))) path = delimitedpathjoin("/", *paths_list) scheme, netloc, path, query, fragment = urlsplit(path) query = urlencode(params_list) return urlunsplit((scheme, netloc, path, query, fragment))
def test_thru(value, func, expected): assert _.chain(value).initial().thru(func).last().value()
def test_chaining_late_value_override(): square_sum = _.chain([1, 2, 3, 4]).power(2).sum() assert square_sum([5, 6, 7, 8]) == 174
def get_page_rank_scores(doc_list): # flatten, pluck and remove duplicates from the results list url_ids = _.chain(doc_list).pluck('doc_id_list').flatten().pluck('doc_id').uniq().value() ranks = db.page_rank.find({ "doc_id": {"$in": url_ids }}) return {rank['doc_id']: rank['score'] for rank in ranks}
def test_chaining_late_value_reuse(): square_sum = _.chain().power(2).sum() assert square_sum([1, 2, 3, 4]) == 30 assert square_sum([2]) == 4
def test_dash_instance_chaining(): value = [1, 2, 3, 4] from__ = _._(value).without(2, 3).reject(lambda x: x > 1) from_chain = _.chain(value).without(2, 3).reject(lambda x: x > 1) assert from__.value() == from_chain.value()
def get_consumption_records(data, formulation_name): return pydash.chain(data.get(C_RECORDS, [])).reject( lambda x: formulation_name.strip().lower() not in x[FORMULATION].lower() ).value()
def test_tap(value, interceptor, expected): actual = _.chain(value).initial().tap(interceptor).last().value() assert actual == expected
def test_chaining_value_to_string(case, expected): assert _.chain(case).to_string() == expected
def get_consumption_totals(fields, records): return pydash.chain(values_for_records(fields, records)).reject( lambda x: x is None).map(float).sum().value()
def get_patient_records(data, combinations, is_adult=True): lower_case_combinations = pydash.collect(combinations, lambda x: x.lower()) records = data.get(A_RECORDS, []) if is_adult else data.get(P_RECORDS, []) return pydash.chain(records).select( lambda x: x[FORMULATION].strip().lower() in lower_case_combinations ).value()