def dump_submissions_to(solution_dir, start=0): for problem in (fetch_problem_list() > pipe | where(X.paid_only == False) | where(X.id >= start) | sort_by(X.id)): submissions = fetch_problem_submissions(problem.title_slug) if len(submissions) == 0: continue problem_dir = solution_dir / problem.dir_name() problem_dir.mkdir(exist_ok=True) print( f'{len(submissions)} submissions for problem {problem.id}. {problem.title}' ) for submission in submissions: code = fetch_submission_code(submission.url) problem_submission = ProblemSubmission(problem=problem, submission=submission) problem_file = problem_dir / problem_submission.file_name() with problem_file.open('w') as f: print(code, file=f) time.sleep(0.3)
def _get_search_fields(self): return [ self._get_fields(lambda f: type(f) in self.search_field_types), # if there are any ForeignKeys to User, we'd like to be able to # search by the user's last_name, username and email (self.all_fields > pipe | where(isinstance, X, ForeignKey) | where(X.related.parent_model | (issubclass, X, User)) | foreach(X.name) | foreach(['{0}__last_name', '{0}__username', '{0}__email'])) ] > flatten | tuple
def fetch_problem_list(): url = 'https://leetcode-cn.com/api/problems/all/' r = requests.get(url) r.raise_for_status() data = r.json() def item_to_problem(item): id = item['stat']['question_id'] title = item['stat']['question__title'] title_slug = item['stat']['question__title_slug'] difficulty = item['difficulty']['level'] paid_only = item['paid_only'] problem = Problem(id=id, title=title, title_slug=title_slug, difficulty=Difficulty(difficulty), paid_only=paid_only) return problem problems = list(data['stat_status_pairs'] > pipe | foreach(item_to_problem) | where(X.id < 10000)) return problems
def authenticate(data): """ Checks for a valid combination of ``unit_id`` and ``secret_key`` values in `data`. Also removes the ``secret_key`` for enhanced security. """ invalid = pipe | 'Authentication failed ({0}).' | InvalidRequest valid, error = validate( required('unit_id', valid_int), required('secret_key', valid_string), )(data) if not valid: raise invalid(error) unit = unless(CarUnit.DoesNotExist, CarUnit.objects.get)( unit_id=data['unit_id'], secret_key=data['secret_key'], ) if not unit: raise invalid('wrong "unit_id" and/or "secret_key"') if not unit.enabled: raise invalid('unit is disabled') return data.iteritems() > where(KEY != 'secret_key') | dict
def entry_from_torque(data): """ Convert an item from Torque-app [1] CSV to a valid Geotrack entry. TODO: convert timestamp, just did it manually in the csv for now... [1] http://torque-bhp.com/ """ cleaned_data = dict((k.strip(), v) for k, v in data.iteritems()) get = lambda T, k: k > maybe | cleaned_data.get | unless(ValueError, T) return ( ('altitude', get(float, 'Altitude')), ('consumption', get(float, 'Fuel flow rate/hour(l/hr)')), ('engine_temp', get(float, 'Engine Coolant Temperature(C)')), ('engine_rpm', get(float, 'Engine RPM(rpm)')), ('fuel_remaining', get(float, 'Fuel Remaining (Calculated from vehicle profile)(%)')), ('location', ( get(float, 'Longitude'), get(float, 'Latitude'))), ('throttle', get(float, 'Absolute Throttle Position B(%)')), ('timestamp', get(str, 'GPS Time')), ('velocity', (get(float, 'GPS Speed (Meters/second)') or 0) * 3.6), ) > where(VALUE) | dict
def query(start=None, end=None, in_polygon=None, units=None, model=None): model = model or get_storage_model() lookup = ( ('timestamp__gte', start), ('timestamp__lte', end), ('location__within', in_polygon > maybe | encode_polygon), ('unit_id__in', units), ) > where(X[1]) | dict return model.objects.filter(**lookup)
def fetch_problem_submissions(title_slug, offset=0, limit=50): query = '''query Submissions($offset: Int!, $limit: Int!, $lastKey: String, $questionSlug: String!) { submissionList(offset: $offset, limit: $limit, lastKey: $lastKey, questionSlug: $questionSlug) { lastKey hasNext submissions { id statusDisplay lang timestamp url } } } ''' payload = { "operationName": "Submissions", "variables": { 'offset': offset, 'limit': limit, "questionSlug": title_slug, }, "query": query, } headers = { 'user-agent': user_agent, 'cookie': cookie, 'referer': problem_url, 'content-type': "application/json", } data = execute_graphql(api_url, payload=payload, headers=headers) def item_to_submission(item): id = item['id'] status = Status(item['statusDisplay']) language = Language(item['lang']) timestamp = item['timestamp'] url = urljoin(base_url, item['url']) return Submission(id=id, status=status, language=language, timestamp=timestamp, url=url) def fill_submission_code(submission): submission.code = fetch_submission_code(submission.url) return submission submissions = list(data['submissionList']['submissions'] > pipe | foreach(item_to_submission) | where(X.status == Status.AC) # | foreach(fill_submission_code) ) return submissions
def test_where_regex(self): data = [ 'foo bar', 'boo far', 'foolproof', ] assert (data > where(r'^foo') | list) == [ 'foo bar', 'foolproof', ]
def test_none_doesnt_match(self): data = [ 'foo bar', 'boo far', None, 'foolproof', ] assert (data > where(r'^foo') | list) == [ 'foo bar', 'foolproof', ]
def get(self, request, data): bounds = data > maybe | X.get('in_polygon') return ((data or {}) > as_kwargs(get_car_position_data) | where(X['car'] | self.filter) | group_by(X['location'] | (grouping_precision, X, bounds)) | X.iteritems() | foreach({ 'location': X[1][0]['location'], 'cars': X[1] | foreach(X['car']) | self.get_car_data, }) | tuple)
def split_by_events(journey, entry): """ Returns whether the last two events in `journey` were ENGINE_OFF and LOCKED -- where we only care about events: ENGINE_OFF, ENGINE_ON, UNLOCKED, LOCKED """ from metrocar.car_unit_api.models import Events events_of_interest = ( Events.ENGINE_OFF, Events.ENGINE_ON, Events.UNLOCKED, Events.LOCKED, ) events = journey['events'] > where(X._in_(events_of_interest)) | tuple return (len(events) >= 2 and (events[-2], events[-1]) == (Events.ENGINE_OFF, Events.LOCKED))
def synthesize(X, Y, K): global param_t P = all_proper_prefixes(X | Y) alphabet = set(c for s in X | Y for c in s) asp_code = encode(X, Y, K, P, alphabet) with open("code.asp", "w") as text_file: print(asp_code, file=text_file) p1 = Popen(["gringo", "code.asp"], stdout=PIPE) if param_t: p2 = Popen(["clasp", "-t", "16"], stdin=p1.stdout, stdout=PIPE) else: p2 = Popen(["clasp"], stdin=p1.stdout, stdout=PIPE) p1.stdout.close() text = p2.communicate()[0] text = text.decode() aut = None for line in text.splitlines(False) > where(r'^(pref|num)'): aut = extract(line, K, alphabet) break return aut
def process_docstring(app, what, name, obj, options, lines): """ Appends Model's fields to it's docstring together with help_texts. """ # This causes import errors if left outside the function from django.db import models # Only look at objects that inherit from Django's base model class if unless(TypeError, issubclass)(obj, models.Model): # Grab the field list from the meta class fields = obj._meta._fields() for field in fields > where(type | (X != models.AutoField)): # Decode and strip any html out of the field's help text help_text = strip_tags(force_unicode(field.help_text)) # Decode and capitalize the verbose name, for use if there isn't # any help text verbose_name = force_unicode(field.verbose_name).capitalize() if help_text: # Add the model field to the end of the docstring as a param # using the help text as the description lines.append(u':param %s: %s' % (field.name, help_text)) else: # Add the model field to the end of the docstring as a param # using the verbose name as the description lines.append(u':param %s: %s' % (field.name, verbose_name)) # Add the field's type to the docstring lines.append(u':type %s: %s' % (field.name, type(field).__name__)) # Return the extended docstring return lines
commit_readme = """ git add README.rst git commit -m "(readme update)" """ update_gh_pages = """ git checkout gh-pages git merge master git rm -rf doc sphinx-build -b html docs/source/ doc git add doc git commit -m "doc update" git checkout master """ runscript = X.split('\n') | where(X) | unless(BuildFailure, foreach_do(sh)) if __name__ == '__main__': create_readme() runscript(commit_readme) runscript(update_gh_pages)
def test_input(self): result = range(5) > where(X % 2) | list assert result == [1, 3]
def parse_text_to_word_sentences( input_text, no_paren=True, no_speaker=True, sentence_min_length=3 ): """a text parser for TED text data We make tokens/symbols/words to represent concept pointing to reality. Concepts have a natural inter-relationship determined by rules of reality. Those inter-relationship can be of a parent/children or peer form, and can include status, properties, actions, changes, etc. We construct phrases such as (adj. + n.) to raise specification of concept with properties, as well sentence to represent those inter-relationsihp. ([entity]-<relationship/interaction>-[entity]) -> ([concept]-<relationship>-[concept]) -> (context: [token]-<structure>-[token]) reality space - concept space - context space This partially indicates that our language (word + sentences = token + structure) with the correct effort can model those inter-relationsihp between concepts to reflect reality to an extent. Thus, co-occured tokens in those structures(sentences) with certain level of truthfulness can denote the properties of concept represented by those tokens. There can be various different dimensions of information stacked together in one text clip on top of the simple concept relationship: 0) Conceptual Relationship - reflection of reality 1) Errors - It can also happens that the truthfulness of text can be doubtful resulting from intentional or unintentional mistakes. 2) Formatting/Perception structure Information - Capital letters, line breaks , puctuations. Paragraphs can be usedd for a particular meaning structure purpose. 3) Supplement Information - Using parentheses to further specificy pointer of a phrase/word. 4) Medium/Speaker/Emotional Information - There can be content representing information in an unrelated often in the super-content/medium space, e.g. text annotation of audience reaction to a speech. For a simple model, it can be difficult for it to distinguish information from different information space initially. Therefore, it can be helpful to preprocess the text to include data only from a particular information space. Embeddings are one way to represent the concept relationship in a particular information space by relative spatial relationship of tokens in dense vectors. Word2Vec is a model to structure/learn the positions of tokens in the space via contextual co-occurances, e.g. contextual co-occurance relationship is shaped into a spatial distances, thus words with similar distances to a group of contextual co-occured words can be positioned closely, which is interpretted as similar words by the model. We may want to reduce noise information presented in the text, such as annotations of reaction, e.g. (Applaud), or speaker name, as they are not natural contextual co-occurance denoting concept relationsihp in the same space. Arguments: input_text {string} -- the raw string of text data Keyword Arguments: no_paren {bool} -- option to remove content with parentheses (default: {True}) no_speaker {bool} -- option to remove speaker name text (default: {True}) Returns: [list] -- tokenised sentences, text cut into sentences represented by list of words, [[word, word], [word, word]] """ text_cleaner = ( pipe | lower_string | regulate_punctuation | (remove_paren if no_paren else do_nothing) | (dirty_remove_speaker_name if no_speaker else do_nothing) | alphanumeric_period_only ) strip_word_in_sentence = ( pipe | (map, lambda word: word.strip()) | where(lambda stripped: len(stripped) > 0) | list ) word_sentences = [ strip_word_in_sentence(sentence.split(" ")) for paragraph in input_text.split("\n") for sentence in text_cleaner(paragraph).split(".") ] del input_text return [ word_sentence for word_sentence in word_sentences if len(word_sentence) > sentence_min_length ]
def _get_fields(self, cond): return self.all_fields > where(cond) | foreach(X.name) | tuple