예제 #1
0
	def _parse_one_line(self, s):
		d = collections.OrderedDict()
		i = 0
		ll = []
		line_2 = ""
		buf = StringIO.StringIO(s)
		while True:
			line = buf.readline()
			if line:
				if line.startswith('*') or line == '' or line == '\n':
					continue
				if line.startswith('+'):
					line = utils.clean_string(line)
					line_2 = utils.clean_string(line_2)
					line = utils.merge_two_string(line_2, line)
					line_2 = line
					continue
				ll = self._check_element(line_2)
				d[i] = ll
				i = i + 1
				line_2 = line
			else:
				ll = self._check_element(line_2)
				d[i] = ll
				i = i + 1
				return d
 def dialogok(self):
     '''helper to show an OK dialog with a message'''
     headertxt = clean_string(self.params.get("header", ""))
     bodytxt = clean_string(self.params.get("message", ""))
     dialog = xbmcgui.Dialog()
     dialog.ok(heading=headertxt, line1=bodytxt)
     del dialog
예제 #3
0
def parse_nodejsscan_file(threatplaybook, vul_result, project, target, scan,
                          token):
    vul_dict = {
        'name': str(vul_result.get('title')),
        'tool': 'NodeJsScan',
        'description': str(vul_result.get('description')),
        'project': str(project),
        'target': str(target),
        'scan': str(scan),
        'cwe': int(vul_result.get('cwe', 0)),
        'observation': str(vul_result.get('observation', '')),
        'remediation': str(vul_result.get('remediation', ''))
    }
    create_vulnerability_query = create_vulnerability(vul_dict=vul_dict)
    if create_vulnerability_query:
        response = _post_query(threatplaybook=threatplaybook,
                               token=token,
                               query=create_vulnerability_query)
        if response:
            cleaned_response_name = validate_vulnerability_response_name(
                content=response)
            vulnId = validate_vulnerability_response_id(content=response)
            evidence = {
                'name':
                str(
                    clean_string('File: {}, Line no: {}'.format(
                        vul_result.get('path'), vul_result.get('line')))),
                'url':
                str(clean_string(vul_result.get('filename'))),
                'vulnId':
                str(vulnId),
                'log':
                str(clean_string(vul_result.get('lines'))),
            }
            create_evidence_query = create_evidence(evidence=evidence)
            if create_evidence_query:
                evidence_response = _post_query(threatplaybook=threatplaybook,
                                                token=token,
                                                query=create_evidence_query)
                if evidence_response:
                    cleaned_evidence_response = validate_evidence_response(
                        content=evidence_response)
                    if cleaned_evidence_response:
                        print('Evidence Created: {}'.format(
                            cleaned_evidence_response))
                    else:
                        print('No Vulnerability Evidence')
            else:
                return {
                    'error':
                    'Error while creating Vulnerability Evidence Query'
                }
            return {'success': cleaned_response_name}
        else:
            return {'error': 'Error while creating Vulnerability'}
    else:
        return {'error': 'Error while creating Vulnerability Query'}
예제 #4
0
파일: cli.py 프로젝트: vechnoe/holydate
def search_constructor(string):
    """Construct command-line out."""

    search_string = string.decode('utf-8')
    out_dict = search_feasts(search_string)

    if out_dict['result'] == 0:
        return textwrap.fill(
            u'Ваш запрос — «%s» не найден!' % search_string,
            initial_indent=INDENT) + u'\n'

    date_formated = lambda x, month_arr: u"".join([
        unicode(x[0]), u' ',
        month_arr[x[1]], u' ',
        unicode(x[2]), u' г. '
    ])
    string_out = u""
    pattern = re.compile(ur"\{red\}|\{bold\}")
    pattern_substr = re.compile(ur"(\{red\}|\{bold\})(.+?)(\{end\})")
    for item in out_dict['result']:
        item[2] = clean_string(item[2])
        # Pretty highlighting output on terminal.
        sub_str_match = re.search(pattern_substr, item[2])
        sub_str = sub_str_match.group() if sub_str_match else u""
        match = re.search(pattern, sub_str)
        sub_str_replaced = sub_str.replace(
            u'{highlight_end}',
            u'{highlight_end}%s' %
            match.group() if match else u"{highlight_end}")
        item[2] = item[2].replace(sub_str, sub_str_replaced)
        # TODO: fix this ugly code in a future.
        string_out += \
            textwrap.fill(date_formated(
                item[1], MONTH_WORD) + u'по н. ст.', initial_indent=INDENT,) + u'\n' + \
            textwrap.fill(
                date_formated(item[0], MONTH_JULIAN_WORD) + u'от Адама.',
                initial_indent=INDENT,) + u'\n\n' + \
            textwrap.fill(clean_string(
                item[2].format(**DICT_FORMAT)).lstrip(),
                width=60, initial_indent=INDENT,
                subsequent_indent=INDENT) + u'\n\n\n'

    results_count = numeral.choose_plural(
        out_dict['count'], (
            u'найден {count} результат ',
            u'найденo {count} результата',
            u'найденo {count} результатов')
    )

    return (u"\n"
            u"%s"
            u"-----------------------------------------------------------"
            u"\n"
            u"  По запросу «%s» %s"
            u"\n") % (string_out, search_string,
                      results_count.format(count=out_dict['count']))
예제 #5
0
def evaluate_message(text):
    '''
    '''
    if utils.clean_string(text).startswith('Team'): 
        message_type = 'Team Submission'

    elif utils.clean_string(text) == 'Help': 
        message_type = 'Help Request'

    elif utils.clean_string(text) == 'Standings':
        message_type = 'Standing Request'

    elif utils.clean_string(text) == 'Winning': 
        message_type = 'Winning Request'

    elif utils.clean_string(text) == 'Loosing': 
        message_type = 'Loosing Request'

    elif utils.clean_string(text) == 'Choices':
        message_type = 'Choices Request'

    elif utils.clean_string(text) == 'Fixtures': 
        message_type = 'Fixture Request'

    elif utils.clean_string(text) == 'Position': 
        message_type = 'Position Request'
    
    elif utils.clean_string(text) == 'Rules':
        message_type = 'Rules Request'

    else: 
        message_type = 'Undefined'

    return message_type
 def dialogyesno(self):
     '''helper to show a YES/NO dialog with a message'''
     headertxt = clean_string(self.params.get("header", ""))
     bodytxt = clean_string(self.params.get("message", ""))
     yesactions = self.params.get("yesaction", "").split("|")
     noactions = self.params.get("noaction", "").split("|")
     if xbmcgui.Dialog().yesno(heading=headertxt, line1=bodytxt):
         for action in yesactions:
             xbmc.executebuiltin(action.encode("utf-8"))
     else:
         for action in noactions:
             xbmc.executebuiltin(action.encode("utf-8"))
예제 #7
0
 def as_dict(event_xml):
     a = event_xml.find('lloc_simple/adreca_simple')
     coord = event_xml.find('lloc_simple/adreca_simple/coordenades/googleMaps')
     return {
         "name": clean_string(event_xml.find('nom').text),
         "place": clean_string(event_xml.find('lloc_simple/nom').text),
         "address": clean_string("%s, %s %s (%s)" % (a.find('carrer').text, a.find('numero').text, a.find('codi_postal').text, a.find('municipi').text)),
         "neighborhood": clean_string(a.find('barri').text),
         "date": convert_or_raw(event_xml.find('data/data_proper_acte').text.split()[0], time.strptime, "%d/%m/%Y"),
         "starts_at": convert_or_raw(event_xml.find('data/data_proper_acte').text.split()[1], time.strptime, "%H.%M"),
         "ends_at": convert_or_raw(event_xml.find('data/hora_fi').text, time.strptime, "%H.%M"),
         "lat": convert_or_raw(coord.get('lat'), float),
         "lon": convert_or_raw(coord.get('lon'), float)
     }
예제 #8
0
def process_column_by_levels(df_data, column, levels):
    i = 0
    for element in df_data[column]:
        element = unicode(utils.clean_string(element)).lower()
        aux = False
        for level in levels:
            level_clean = unicode(utils.clean_string(level)).lower()
            if element in level_clean:
                df_data.set_value(i, column,
                                  unicode(utils.clean_points(level)))
                aux = True
        if not aux:
            df_data.set_value(i, column, unicode("otra_" + str(column)))
        i += 1
    return 0
예제 #9
0
    def query(self, query):

        if len(query) == 0:
            raise EmptyQueryStringException()

        q_string = self._raw_endpoint + str(query)
        response = self._sesh.get(q_string)

        if response.status_code != 200:
            raise NetworkErrorException(response.status_code)

        soup = BeautifulSoup(response.content, "html.parser")

        # parse the panels
        panels = soup.find(panel_table_filter)
        panels_list  = []
        
        if panels is not None:
            for row in panels.find_all("tr"):
                cells = row.find_all("td")
                
                if len(cells) == 2:
                    link = cells[0].a['href']
                    genes = [clean_string(gene) for gene in cells[1].string.split()]
                    panels_list.append(QuiverFushionPlexPanel(link, genes))

        # parse the fusions
        fusions = soup.find_all(fusion_table_filter)
        fusions_list = []

        if fusions is not None:
            for fusion in fusions:
                table = fusion.find('table')
                for row in table.find_all('tr'):
                    cells = row.find_all('td')
                    if len(cells) != 2:

                        # get the link
                        link = cells[0].a['href']
                        original_annotation = clean_string(cells[1].string)
                        disease = cells[2].string.strip()
                        pubmed_link = cells[3].a['href']
                        evidence_count = int(cells[4].string)

                        fusions_list.append(QuiverGeneFushion(link, original_annotation, disease,
                                            pubmed_link, evidence_count))

        return QuiverResultSet(panels_list, fusions_list, query)
예제 #10
0
def clean_column(df_data, column, levels, binar):
    """Esta funcion se encarga de eliminar casos poco frecuentes y reemplazarlos como "otros"
	en la columna de datos original
	"""

    if binar:
        #print("la variable es binaria: ", column)
        i = 0
        for element in df_data[column]:
            #clean element in every instance
            element = unicode(utils.clean_string(element)).lower()
            if unicode("si") in element:
                df_data.set_value(i, column, u"si")
            else:
                df_data.set_value(i, column, u"no")
            i += 1

    else:
        #print("la variable no es binaria", column)
        if column == "Actividad_Fisica":
            process_act_fisica(df_data, column)

        elif column == "Nacionalidad":
            process_nacionalidad(df_data, column)

        elif column == "Dieta_Alimenticia":
            process_dieta(df_data, column)

        else:
            process_column_by_levels(df_data, column, levels)
예제 #11
0
 def FindRawSubjects(self,subject1,subject2,update=False):
     ret=False
     #print "subject1=%s ,subject2=%s"%(subject1,subject2)
     if subject1 == None or subject2 == None or subject1 == subject2:
         return ret
     with open(self.m_data_file_path, 'r') as inF:
         for line in inF:
             cleanLine= utils.clean_string(line)
             #print "Line=%s\n"%line
             #if subject1 in line and subject2 in cleanLine:
             if not cleanLine.find(subject1) == -1 and not cleanLine.find(subject2) ==-1:
                 matchObj= re.match(self._REGEX_SUBJECT, line, flags=0)
                 if matchObj:
                     user = matchObj.group()
                     #print user
                     if not user == "":
                         #print "found user %s for subject %s and subject %s update dics.." %(user,subject1,subject2)
                         ret=True
                         if update:
                             if not user in self.m_user2subDic:
                                 subjectsList=[]
                                 self.m_user2subDic[user]=subjectsList
                             self.m_user2subDic[user].append(subject1)
                             self.m_user2subDic[user].append(subject2)
                             self.__AddToSub2User(subject1, user)
                             self.__AddToSub2User(subject2, user)
     return ret                
def get_steam_wishlist_game_names(steamid: str, sort: bool = True):
    url = f"https://store.steampowered.com/wishlist/profiles/{steamid}/wishlistdata/"

    game_names = []

    try:
        response = requests.get(url)
        response.raise_for_status()

        wishlist_infos = response.json()

        # Looks like: {'244850': {'name': 'Space Engineers', 'capsule': ...
        # print(wishlist_infos)

        for key in wishlist_infos.keys():
            # print(wishlist_infos[key]['name'])
            name = wishlist_infos[key]['name']
            name = clean_string(name)
            game_names.append(name)

    except (ReadTimeout, ConnectTimeout, HTTPError, Timeout,
            ConnectionError) as e:
        print("ERROR: get_steam_wishlist_game_names(...)")
        print(str(e))

    if (sort):
        game_names.sort()

    return game_names
예제 #13
0
    def scrape(self):
        page = self.lxmlize(COUNCIL_PAGE)

        councillors = page.xpath('//div[@class="article-content"]//td[@class="ms-rteTableOddCol-0"]')
        yield self.scrape_mayor(councillors[0])
        assert len(councillors), 'No councillors found'
        for councillor in councillors[1:]:
            if not councillor.xpath('.//a'):
                continue

            texts = [text for text in councillor.xpath('.//text()') if clean_string(text)]
            name = texts[0]
            district = texts[1]
            url = councillor.xpath('.//a/@href')[0]
            page = self.lxmlize(url)

            p = Person(primary_org='legislature', name=name, district=district, role='Conseiller')
            p.add_source(COUNCIL_PAGE)
            p.add_source(url)

            p.image = councillor.xpath('./preceding-sibling::td//img/@src')[-1]

            contacts = page.xpath('.//td[@class="ms-rteTableOddCol-0"]//text()')
            for contact in contacts:
                if re.findall(r'[0-9]{4}', contact):
                    phone = contact.strip().replace(' ', '-')
                    p.add_contact('voice', phone, 'legislature')
            get_links(p, page.xpath('.//td[@class="ms-rteTableOddCol-0"]')[0])

            email = self.get_email(page)
            p.add_contact('email', email)
            yield p
예제 #14
0
 def fetch_translations(self):
     response = self.request(self.url + "requests/?token={0}".format(self.token))
     if response[0].status == 200:
         etree = utils.element_tree(response)
         requests = etree.findall("resource")
         completed_requests = (r for r in requests if eval(r.findtext("ready")))
         in_progress_requests = set(
             [tr.external_id for tr in TranslationRequest.objects.filter(status=STATUS_IN_PROGRESS)]
         )
         for request in completed_requests:
             shortname = request.findtext("shortname")
             if shortname in in_progress_requests:
                 trans_request = TranslationRequest.objects.get_by_external_id(shortname)
                 response = self.request(self.url + "results/{0}/?token={1}".format(shortname, self.token))
                 etree = utils.element_tree(response)
                 result = etree.findtext("result")
                 result = re.sub("### (\[\[YAHOO_SPLITTER\]\]\n)?(### )?", "", result)
                 result = re.sub("(<[A-Z]\[)?(\]>)?", "", result)
                 result_sentences = [sentence.strip() for sentence in utils.clean_string(result.strip()).split("\n")]
                 store = Store.objects.get(translation_project=trans_request.translation_project)
                 units = store.unit_set.all()
                 if not len(units) == len(result_sentences):
                     trans_request.status = STATUS_ERROR
                     print "ERROR!"
                 else:
                     for i in range(len(units)):
                         units[i].target = result_sentences[i]
                         units[i].state = pootle_store.util.FUZZY
                         units[i].save()
                     trans_request.status = STATUS_FINISHED
                 trans_request.save()
     else:
         raise Exception(response[0].reason)
예제 #15
0
 def get_filename(self, modus):
     lecturer = clean_string(self.lecturer.last_name)
     term = str(self.semester)
     if modus == 'participants':
         title = clean_string(self.title).replace(' ', '_')
         return 'Teilnehmer_%s_%s_%s.csv' % (title, lecturer, term)
     elif modus == 'csv':
         title = clean_string(self.title).replace(' ', '_')
         return 'Ergebnisse_%s_%s_%s.csv' % (title, lecturer, term)
     elif modus == 'pdf':
         title = clean_string(self.title).replace(' ', '_')
         # file ending will be added automatically in latex.py
         return 'Ergebnisse_%s_%s_%s' % (title, lecturer, term)
     elif modus == 'evaluation':
         return 'Evaluation %s (%s) %s' % (self.title,
                                           self.lecturer.last_name, term)
예제 #16
0
def get_query_params(raw_query):
	query = {}
	for pair in raw_query.split(","):
		key, value = [clean_string(s) for s in pair.split(":")]
		query[key] = query[key] + [value] if key in query else [value]

	return query
예제 #17
0
def get_mushroom_info(mushroom_id):
    """Retrieves the details for a mushroom"""

    dict_mushroom = {}
    url = os.path.join(_BASEPATH, _SUBPATH_MUSHROOM,
                       '{}.html'.format(mushroom_id))

    # Get url content
    page = utils.get_html_content(url, _HEADERS)
    soup = BeautifulSoup(page, 'html.parser')

    # Find details section
    div = soup.find('div', id='contenido_2')

    # Find scientific name
    scientific_name = div.find('h3').get_text()
    dict_mushroom['scientific_name'] = utils.clean_string(scientific_name)

    # Find family and genre
    h4s = div.find_all('h4')
    family = h4s[0].get_text().split(':')[1]
    genre = h4s[1].get_text().split(':')[1]

    dict_mushroom['family'] = utils.clean_string(family)
    dict_mushroom['genre'] = utils.clean_string(genre)

    # Find other details
    ul = div.find('ul', class_='ficha')
    ps = ul.find_all('p')

    for idx_p, p in enumerate(ps):

        key, value = p.get_text().split(':', 1)
        key = utils.normalize_key(key)
        value = utils.clean_string(value)

        if key == '':
            if idx_p == 0:
                key = "alternative_scientific_names"
            else:
                continue

        dict_mushroom[key] = value

    return dict_mushroom
예제 #18
0
 def as_dict(station_xml):
     return {
         "id": int(station_xml.find('id').text),
         "street": clean_string(BicingProcessor.HTML_PARSER.unescape(station_xml.find('street').text)),
         "lat": convert_or_raw(station_xml.find('lat').text, float),
         "lon": convert_or_raw(station_xml.find('long').text, float),
         "slots": convert_or_raw(station_xml.find('slots').text, int),
         "bikes": convert_or_raw(station_xml.find('bikes').text, int)
     }
예제 #19
0
def input_team(name, message): 
    '''
    '''
    round = utils.get_current_round()

    team = utils.clean_string(message.split('=')[1])

    query = "INSERT INTO choices (name, choice, round) VALUES ('{}', '{}', '{}');".format(name, team, round)

    return utils.input_sql(query)
예제 #20
0
 def serialize(self):
     filters = {'language':self.language}
     if self.default_field:
         filters["default_field"] = self.default_field
         if not isinstance(self.default_field, (str, unicode)) and isinstance(self.default_field, list):
             if not self.use_dis_max:
                 filters["use_dis_max"] = self.use_dis_max
             if self.tie_breaker != 0:
                 filters["tie_breaker"] = self.tie_breaker
     if self.search_fields:
         filters['fields'] = self.search_fields
     if self.default_operator != "OR":
         filters["default_operator"] = self.default_operator
     if self.options:
         filters["options"] = self.options
     if self.synonyms:
         filters["synonyms"] = self.synonyms
     if self.stopwords:
         filters["stopwords"] = self.stopwords
     if self.analyzer:
         filters["analyzer"] = self.analyzer
     if self.analyzer:
         filters["analyzer"] = self.analyzer
     if not self.allow_leading_wildcard:
         filters["allow_leading_wildcard"] = self.allow_leading_wildcard
     if not self.lowercase_expanded_terms:
         filters["lowercase_expanded_terms"] = self.lowercase_expanded_terms
     if not self.enable_position_increments:
         filters["enable_position_increments"] = self.enable_position_increments
     if self.fuzzy_prefix_length:
         filters["fuzzy_prefix_length"] = self.fuzzy_prefix_length
     if self.fuzzy_min_sim != 0.5:
         filters["fuzzy_min_sim"] = self.fuzzy_min_sim
     if self.phrase_slop:
         filters["phrase_slop"] = self.phrase_slop
     if self.min_concept_size!=2:
         filters["min_concept_size"] = self.min_concept_size
     if self.max_concept_size!=4:
         filters["max_concept_size"] = self.max_concept_size
     if self.term_expansion_steps!=1:
         filters["term_expansion_steps"] = self.term_expansion_steps
         
     if self.boost!=1.0:
         filters["boost"] = self.boost
     if self.clean_text:
         query = clean_string(self.text)
         if not query:
             raise InvalidQuery("The query is empty")
         filters["query"] = query
     else:
         if not self.text.strip():
             raise InvalidQuery("The query is empty")
         filters["query"] = self.text            
     
     return {self._internal_name:filters}
예제 #21
0
def reconcile(book, bank, externalData):
    """
	Reconciles a book statement with a bank statement, supported by extra external data.

	Args:
		book: a Ledger object 
		bank: a Ledger object
		externalData: extra data such as previous bank reconciliations
	Returns:
		Nothing 
	Raises:
		Nothing
	"""
    # Algorithm 1: One-to-one reconciliation
    ut.clean_columns(book.ledger)
    bankColumn = ut.clean_string('G/L Account Name')  # Filter by bank
    bookByBank = Ledger(
        ledger=book.ledger.query('{} == "{}"'.format(bankColumn, bank.name)))
    pivotTable = Ledger(ledger=ut.toPivotTable(bookByBank.ledger))
    vendorPivotTable = Ledger(
        ledger=pivotTable.ledger.drop('Customer', axis=1).copy())
    customerPivotTable = Ledger(
        ledger=pivotTable.ledger.drop('Vendor', axis=1).copy())
    appendMatchColumn(vendorPivotTable)
    appendMatchColumn(customerPivotTable)
    appendMatchColumn(bank)
    oneToOneMatch(vendorPivotTable, bank, ut.clean_string('Vendor'),
                  ut.clean_string('Debit'))
    oneToOneMatch(customerPivotTable, bank, ut.clean_string('Customer'),
                  ut.clean_string('Credit'))

    # After getting all pivot table matches, map it back up to the Book Ledger
    reversePivotMapping(book, bookByBank, vendorPivotTable, customerPivotTable)

    # Reflect changes in excel
    highlightMatches(book)
    highlightMatches(bank)
    ut.newExcel(vendorPivotTable, 'VendorByDate.xlsx', 'Reconciliation')
    ut.newExcel(customerPivotTable, 'CustomerByDate.xlsx', 'Reconciliation')
    highlightMatches(vendorPivotTable)
    highlightMatches(customerPivotTable)
    return
예제 #22
0
    def serialize(self):
        filters = {}
        if self.default_field:
            filters["default_field"] = self.default_field
            if not isinstance(self.default_field,
                              (str, unicode)) and isinstance(
                                  self.default_field, list):
                if not self.use_dis_max:
                    filters["use_dis_max"] = self.use_dis_max
                if self.tie_breaker != 0:
                    filters["tie_breaker"] = self.tie_breaker

        if self.default_operator != "OR":
            filters["default_operator"] = self.default_operator
        if self.analyzer:
            filters["analyzer"] = self.analyzer
        if not self.allow_leading_wildcard:
            filters["allow_leading_wildcard"] = self.allow_leading_wildcard
        if not self.lowercase_expanded_terms:
            filters["lowercase_expanded_terms"] = self.lowercase_expanded_terms
        if not self.enable_position_increments:
            filters[
                "enable_position_increments"] = self.enable_position_increments
        if self.fuzzy_prefix_length:
            filters["fuzzy_prefix_length"] = self.fuzzy_prefix_length
        if self.fuzzy_min_sim != 0.5:
            filters["fuzzy_min_sim"] = self.fuzzy_min_sim
        if self.phrase_slop:
            filters["phrase_slop"] = self.phrase_slop
        if self.search_fields:
            if isinstance(self.search_fields, (str, unicode)):
                filters["fields"] = [self.search_fields]
            else:
                filters["fields"] = self.search_fields

            if len(filters["fields"]) > 1:
                if not self.use_dis_max:
                    filters["use_dis_max"] = self.use_dis_max
                if self.tie_breaker != 0:
                    filters["tie_breaker"] = self.tie_breaker
        if self.boost != 1.0:
            filters["boost"] = self.boost
        if self.analyze_wildcard:
            filters["analyze_wildcard"] = self.analyze_wildcard
        if self.clean_text:
            query = clean_string(self.query)
            if not query:
                raise InvalidQuery("The query is empty")
            filters["query"] = query
        else:
            if not self.query.strip():
                raise InvalidQuery("The query is empty")
            filters["query"] = self.query
        return {self._internal_name: filters}
예제 #23
0
def rename_project():
    user_record = is_logged_in()
    if not user_record:
        return redirect("/")
    project_id = request.form.get("project_id", type=str, default="ID?")
    project_name = request.form.get("project_name", type=str, default="ID?")
    project_name = utils.clean_string(project_name)
    project_record = mongo.db.projects.find_one({'_id': project_id})
    if project_record:
        project_record['project_name'] = project_name
        mongo.db.projects.save(project_record)
        flash('Project successfully renamed.')
    return redirect("/project")
예제 #24
0
def string_combinations(string, max_words):
    logger.info('')
    """
    Takes a string text and creates a list of all possible neighbouring strings.
    :param string: Input string
    :param max_words: Number of neighbouring strings
    :return: list
    """
    if max_words == 0:
        return None
    s = re.findall(r"[\w']+", string)
    r = combine(s, max_words)
    return [utils.clean_string(' '.join(a)) for a in r]
예제 #25
0
파일: query.py 프로젝트: ieure/pyes
    def serialize(self):
        filters = {}
        if self.default_field:
            filters["default_field"] = self.default_field
            if not isinstance(self.default_field, (str, unicode)) and isinstance(self.default_field, list):
                if not self.use_dis_max:
                    filters["use_dis_max"] = self.use_dis_max
                if self.tie_breaker != 0:
                    filters["tie_breaker"] = self.tie_breaker

        if self.default_operator != "OR":
            filters["default_operator"] = self.default_operator
        if self.analyzer:
            filters["analyzer"] = self.analyzer
        if not self.allow_leading_wildcard:
            filters["allow_leading_wildcard"] = self.allow_leading_wildcard
        if not self.lowercase_expanded_terms:
            filters["lowercase_expanded_terms"] = self.lowercase_expanded_terms
        if not self.enable_position_increments:
            filters["enable_position_increments"] = self.enable_position_increments
        if self.fuzzy_prefix_length:
            filters["fuzzy_prefix_length"] = self.fuzzy_prefix_length
        if self.fuzzy_min_sim != 0.5:
            filters["fuzzy_min_sim"] = self.fuzzy_min_sim
        if self.phrase_slop:
            filters["phrase_slop"] = self.phrase_slop
        if self.search_fields:
            if isinstance(self.search_fields, (str, unicode)):
                filters["fields"] = [self.search_fields]
            else:
                filters["fields"] = self.search_fields

            if len(filters["fields"]) > 1:
                if not self.use_dis_max:
                    filters["use_dis_max"] = self.use_dis_max
                if self.tie_breaker != 0:
                    filters["tie_breaker"] = self.tie_breaker
        if self.boost != 1.0:
            filters["boost"] = self.boost
        if self.analyze_wildcard:
            filters["analyze_wildcard"] = self.analyze_wildcard
        if self.clean_text:
            query = clean_string(self.query)
            if not query:
                raise InvalidQuery("The query is empty")
            filters["query"] = query
        else:
            if not self.query.strip():
                raise InvalidQuery("The query is empty")
            filters["query"] = self.query
        return {self._internal_name:filters}
예제 #26
0
def reversePivotMapping(book, bookByBank, vendorPivotTable,
                        customerPivotTable):
    """
	Maps matched values from the pivot table to its corresponding un-pivoted Ledger

	Args:
		book: the un-pivoted Ledger of pivotTable
		bookByBank: is necessary to identify book entries belonging to a specific bank
		vendorPivotTable: a pivoted Ledger object for vendors
		customerPivotTable: a pivoted Ledger object for customers
	Returns:
		Nothing - the changes are reflected in book
	Raises:
		Nothing
	"""
    appendMatchColumn(book)
    # The pivots share the same indices
    indexValues = vendorPivotTable.ledger.index.values
    for pivotIndex in indexValues:
        vendorAmount = vendorPivotTable.ledger.at[pivotIndex,
                                                  ut.clean_string('Vendor')]
        customerAmount = customerPivotTable.ledger.at[
            pivotIndex, ut.clean_string('Customer')]
        date = vendorPivotTable.ledger.at[pivotIndex,
                                          ut.clean_string('Posting Date')]
        rows = bookByBank.ledger[bookByBank.ledger[ut.clean_string(
            'Posting Date')].isin([date])]
        # Get the sum of all debits, check if it matches
        amounts = rows[ut.clean_string('Amount')]
        debits = amounts[isDebit(amounts)]
        credits = amounts[isCredit(amounts)]
        if vendorAmount == debits.sum():
            for i in debits.index.values:
                book.ledger.at[i, 'Matches'] = 1
        if customerAmount == credits.sum():
            for i in credits.index.values:
                book.ledger.at[i, 'Matches'] = 1
    return
예제 #27
0
def filter_keywords(kw_dict, max_words=2, min_words=1):
    out = {}
    items = sorted(kw_dict.items(), key=lambda x: x[1], reverse=True)[:15]
    for keyword, occurrences in items:
        keyword = utils.clean_string(keyword)
        if max_words >= len(keyword.split()) >= min_words:
            if len(keyword) > 2:
                if is_word_checked(keyword):
                    out[keyword] = occurrences

    if len(out) > 0:
        return out
    else:
        return None
예제 #28
0
def parse_npmaudit_file(threatplaybook, vul_result, project, target, scan,
                        token):
    severity_dict = {'moderate': 2, 'low': 1, 'critical': 3}

    vul_dict = {
        'name': str(clean_string(vul_result.get('title'))),
        'tool': 'Npm Audit',
        'description': str(clean_string(vul_result.get('overview'))),
        'project': str(project),
        'target': str(target),
        'scan': str(scan),
        'cwe': int(vul_result.get('cwe', '').split('-')[-1]),
        'observation': str(clean_string(vul_result.get('observation'))),
        'severity': int(severity_dict.get(vul_result.get('severity'), 0)),
        'remediation': str(clean_string(vul_result.get('recommendation')))
    }
    create_vulnerability_query = create_vulnerability(vul_dict=vul_dict)
    if create_vulnerability_query:
        response = _post_query(threatplaybook=threatplaybook,
                               query=create_vulnerability_query,
                               token=token)
        if response:
            cleaned_response_name = validate_vulnerability_response_name(
                content=response)
            vulnId = validate_vulnerability_response_id(content=response)
            for finding in vul_result.get('findings'):
                for path in finding.get('paths'):
                    evidence = {
                        'name': str(clean_string('File: {0}'.format(path))),
                        'url':
                        str(clean_string(vul_result.get('module_name'))),
                        'vulnId': str(vulnId)
                    }
                    create_evidence_query = create_evidence(evidence=evidence)
                    if create_evidence_query:
                        evidence_response = _post_query(
                            threatplaybook=threatplaybook,
                            query=create_evidence_query,
                            token=token)
                        if evidence_response:
                            cleaned_evidence_response = validate_evidence_response(
                                content=evidence_response)
                            if cleaned_evidence_response:
                                print('Evidence Created: {}'.format(
                                    cleaned_evidence_response))
                            else:
                                print('No Vulnerability Evidence')
                    else:
                        return {
                            'error':
                            'Error while creating Vulnerability Evidence Query'
                        }
            return {'success': cleaned_response_name}
        else:
            return {'error': 'Error while creating Vulnerability'}
    else:
        return {'error': 'Error while creating Vulnerability Query'}
예제 #29
0
def scrape(stats_file, pokedes_file):
    tr_elements = utils.getData('http://pokemondb.net/pokedex/all', '//tr')
    col = []

    # For each row, store each first element (header) and an empty list
    i = 0
    for t in tr_elements[0]:
        name = t.text_content()
        if name == '#':
            name = 'no'

        col.append((utils.clean_string(name), []))
        i += 1

    # Since out first row is the header, data is stored on the second row onwards
    for j in range(1, len(tr_elements)):
        T = tr_elements[j]

        if len(T) != 10:
            break

        i = 0
        for t in T.iterchildren():
            data = t.text_content()

            if i > 0:
                try:
                    data = int(data)
                except:
                    pass

            col[i][1].append(data)
            i += 1

    # Construct Data Frame using Pandas.
    Dict = {title: column for (title, column) in col}
    df = pd.DataFrame(Dict)

    # Apply clean up
    df['name'] = df['name'].apply(utils.str_bracket)
    df['type'] = df['type'].apply(utils.str_break)
    df['img_filename'] = df['name']
    df['img_filename'] = df['img_filename'].apply(utils.generate_img_file_name)

    # Save to json
    df.to_json(stats_file, orient='records')

    # Save image_filename list
    utils.save_df_to_text(df, pokedes_file, 'img_filename')
예제 #30
0
def get_xbox_game_pass_game_names(sort: bool = True):
    # headers = {'User-Agent': 'User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36'}
    # Determined from: https://www.xbox.com/en-US/xbox-game-pass/games?=pcgames
    game_ids_url = 'https://catalog.gamepass.com/sigls/v2?id=fdd9e2a7-0fee-49f6-ad69-4354098401ff&language=en-us&market=US'
    game_info_url = 'https://displaycatalog.mp.microsoft.com/v7.0/products?bigIds={}&market=US&languages=en-us&MS-CV=XXX'

    try:
        game_ids = requests.get(game_ids_url).json()
        s = ','.join(i['id'] for i in game_ids if 'id' in i)

        data = requests.get(game_info_url.format(s)).json()

        game_names = []

        for p in data['Products']:
            # print(json.dumps(p['LocalizedProperties'][0], sort_keys=True, indent=4))

            name = p['LocalizedProperties'][0]['ProductTitle']

            name = clean_string(name)

            for word in extraneous_trailing_strings:
                if (name.endswith(word)):
                    name = name.replace(word, '')
                    break

            # if (name.startswith('yakuza kiwami 2')):
            #    print(json.dumps(p, sort_keys=True, indent=4))

            game_names.append(name)

        # print some data to screen:
        # for p in data['Products']:
        #     print(p['LocalizedProperties'][0]['ProductTitle'])
        #     print(p['LocalizedProperties'][0]['ShortTitle'])
        #     print(p['LocalizedProperties'][0]['ShortDescription'])
        #     print('-' * 80)

    except (ReadTimeout, ConnectTimeout, HTTPError, Timeout,
            ConnectionError) as e:
        print("ERROR: get_xbox_game_pass_game_names(...)")
        print(str(e))

    if (sort):
        game_names.sort()

    return game_names
예제 #31
0
def process_nacionalidad(df_data, column):
    i = 0
    for element in df_data[column]:
        element = unicode(utils.clean_string(element)).lower()
        if unicode("chi") in element or\
         unicode("temuco") in element or\
         unicode("calama") in element or\
         unicode("chlena") in element or\
         unicode("calama") in element or\
         unicode("chle") in element or\
         unicode("chulena") in element or\
         unicode("nacional") in element or\
         unicode("hileno") in element:
            df_data.set_value(i, column, u"chilena")

        elif unicode("colombiano") in element or\
         unicode("colom") in element:
            df_data.set_value(i, column, u"colombiano")

        elif unicode("peru") in element:
            df_data.set_value(i, column, u"peruano")

        elif unicode("argen") in element:
            df_data.set_value(i, column, u"argentina")

        elif unicode("bolivia") in element or\
         unicode("boli") in element:
            df_data.set_value(i, column, u"boliviana")

        elif unicode("alemana") in element:
            df_data.set_value(i, column, u"alemana")

        elif unicode("estado") in element:
            df_data.set_value(i, column, u"estadounidense")

        elif unicode("ecua") in element:
            df_data.set_value(i, column, u"ecuatoriana")

        elif unicode("vnezolano") in element or\
         unicode("vene") in element:
            df_data.set_value(i, column, u"venezolano")

        else:
            df_data.set_value(i, column, u"no especifica")
        i += 1
예제 #32
0
def process_act_fisica(df_data, column):
    i = 0
    for element in df_data[column]:
        #clean element in every instance
        element = unicode(utils.clean_string(element)).lower()
        if unicode("1vezporsemana") in element:
            df_data.set_value(i, column, u"1 vez por semana")

        elif unicode("2vecesporsemana") in element:
            df_data.set_value(i, column, u"2 veces por semana")

        elif unicode("3vecesporsemana") in element:
            df_data.set_value(i, column, u"3 veces por semana")

        else:
            df_data.set_value(i, column, u"no")

        i += 1
예제 #33
0
def get_xbox_game_pass_game_names(sort: bool = True):
    # The following urls were determined from: https://www.xbox.com/en-US/xbox-game-pass/games?=pcgames.
    # Use Chrom Dev Tools to watch where data is being pulled from
    game_ids_url = 'https://catalog.gamepass.com/sigls/v2?id=fdd9e2a7-0fee-49f6-ad69-4354098401ff&language=en-us&market=US'
    game_info_url = 'https://displaycatalog.mp.microsoft.com/v7.0/products?bigIds={}&market=US&languages=en-us&MS-CV=XXX'

    try:
        game_ids = requests.get(game_ids_url).json()
        s = ','.join(i['id'] for i in game_ids if 'id' in i)

        data = requests.get(game_info_url.format(s)).json()

        game_names = []

        for p in data['Products']:
            # print(json.dumps(p['LocalizedProperties'][0], sort_keys=True, indent=4))

            name = p['LocalizedProperties'][0]['ProductTitle']

            name = clean_string(name)

            for word in extraneous_trailing_strings:
                if (name.endswith(word)):
                    name = name.replace(word, '')
                    break

            game_names.append(name)

        # print some data to screen:
        # for p in data['Products']:
        #     print(p['LocalizedProperties'][0]['ProductTitle'])
        #     print(p['LocalizedProperties'][0]['ShortTitle'])
        #     print(p['LocalizedProperties'][0]['ShortDescription'])
        #     print('-' * 80)

    except (ReadTimeout, ConnectTimeout, HTTPError, Timeout, ConnectionError) as e:
        print("ERROR: get_xbox_game_pass_game_names(...)")
        print(str(e))

    if (sort):
        game_names.sort()

    return game_names
예제 #34
0
def check_validity(name, message): 
    '''
    '''
    team = utils.clean_string(message.split('=')[1])

    query = "SELECT COUNT(*) AS count FROM choices WHERE name = '{}' and choice = '{}'".format(name, team)

    df = utils.read_from_sql(query)

    times_chosen = df['count'][0]  

    if times_chosen >= 2: 

        validity = False 
        
    else: 

        validity = True

    return validity 
예제 #35
0
    def scrape(self):
        page = self.lxmlize(COUNCIL_PAGE)

        councillors = page.xpath(
            '//div[@class="article-content"]//td[@class="ms-rteTableOddCol-0"]'
        )
        yield self.scrape_mayor(councillors[0])
        assert len(councillors), 'No councillors found'
        for councillor in councillors[1:]:
            if not councillor.xpath('.//a'):
                continue

            texts = [
                text for text in councillor.xpath('.//text()')
                if clean_string(text)
            ]
            name = texts[0]
            district = texts[1]
            url = councillor.xpath('.//a/@href')[0]
            page = self.lxmlize(url)

            p = Person(primary_org='legislature',
                       name=name,
                       district=district,
                       role='Conseiller')
            p.add_source(COUNCIL_PAGE)
            p.add_source(url)

            p.image = councillor.xpath('./preceding-sibling::td//img/@src')[-1]

            contacts = page.xpath(
                './/td[@class="ms-rteTableOddCol-0"]//text()')
            for contact in contacts:
                if re.findall(r'[0-9]{4}', contact):
                    phone = contact.strip().replace(' ', '-')
                    p.add_contact('voice', phone, 'legislature')
            get_links(p, page.xpath('.//td[@class="ms-rteTableOddCol-0"]')[0])

            email = self.get_email(page)
            p.add_contact('email', email)
            yield p
예제 #36
0
    def get_keywords(self, txt, url, language='en'):
        """
        Function that gets a text, and returns a dictionary of keywords.

        It does this by removing the stopwords in the text and then counting the words.
        :param txt: String of text, that we want to get keywords to.
        :param url: Url from which text was fetched.
        :return:
        """
        logger.info(url + ':' + txt)
        outs = []
        swords = set(self.stopwords[language])
        for w in txt.split():
            w = utils.clean_string(w).lower()
            if w not in swords and len(w) > 1:
                outs.append(w)
        out = Counter(outs).most_common()
        d = {}
        for w, o in out:
            d[w] = o
        return d
예제 #37
0
def process_dieta(df_data, column):
    i = 0
    for element in df_data[column]:
        element = unicode(utils.clean_string(element)).lower()
        if unicode("vegetariano") in element:
            df_data.set_value(i, column, u"vegetariano")

        elif unicode("vegano") in element:
            df_data.set_value(i, column, u"vegano")

        elif unicode("omnivoro") in element:
            df_data.set_value(i, column, u"omnivoro")

        elif unicode("normal") in element:
            df_data.set_value(i, column, u"omnivoro")

        elif unicode("comodetodo") in element:
            df_data.set_value(i, column, u"omnivoro")

        else:
            df_data.set_value(i, column, u"otro")
        i += 1
예제 #38
0
파일: plotter.py 프로젝트: kpj/Bioto
    def show(name, fname=None, timestamp=False):
        """ Shows plot and automatically saves after closing preview
        """
        fname = os.path.join(Plotter.plot_save_directory, utils.clean_string(name)) if fname is None else fname

        # handle filesuffix
        if not (fname.endswith('.png') or
                fname.endswith('.svg')):
            fname += '.png'
        log('Plotting "%s"' % fname)

        # add timestamp
        if timestamp:
            parts = os.path.splitext(fname)
            no_ext = parts[0]
            no_ext += '_%s' % utils.get_strtime()
            fname = '%s%s' % (no_ext, parts[1])

        # handle surrounding directory structure
        dire = os.path.dirname(fname)
        if len(dire) > 0 and not os.path.exists(dire):
            os.makedirs(dire)

        # fix confusing axis offset
        formatter = ptk.ScalarFormatter(useOffset=False)
        plt.gca().xaxis.set_major_formatter(formatter)
        plt.gca().yaxis.set_major_formatter(formatter)

        fig = plt.gcf()

        if Plotter.show_plots:
            plt.show()
        else:
            plt.close()

        # save plot
        fig.dpi = 250
        fig.savefig(fname, dpi=fig.dpi)
예제 #39
0
    def get_extra_info(self, cee):
        print(f"Downloading extra info for {cee}")

        info = {}

        url = MOVISTAR_DESCRIPTION_URL + cee

        response = requests.get(url)

        response.raise_for_status()

        soup = BeautifulSoup(response.content, "html.parser")

        info["image"] = soup.find("img", class_="img-v-detail")["src"]

        info["desc"] = soup.find("div", class_="sinopsis_large").find(
            text=True, recursive=False).strip()

        info["details"] = {}

        details = soup.find("div", class_="details_container").find_all("div")

        for i in range(0, len(details) - 1, 2):
            title = clean_string(details[i].text)
            value_item = details[i + 1].find("span", class_="details_value")

            content_list = value_item.find_all("span")
            if content_list:
                value = [v.text.strip() for v in content_list]
            else:
                value = value_item.text.strip()

            info["details"][title] = value

        info["age_rating"] = soup.find("span",
                                       class_="nivel_moral").text.strip()

        return info
예제 #40
0
def create_project():
    user_record = is_logged_in()
    if not user_record:
        return redirect("/")
    project_name = request.form.get("project_name", type=str, default="Exciting Project")
    project_name = utils.clean_string(project_name)

    project_url = utils.random_string(config.PROJECT_ID_LEN)
    try:
        aws.Bucket('swiftlatex').put_object(Key=project_url + '/manifest', Body='%s' % datetime.datetime.now())
    except:
        return abort(500)
    project = {}
    project['_id'] = project_url
    project['project_name'] = project_name
    project['author_id'] = user_record['_id']
    project['author_name'] = user_record['display_name']
    project['created_time'] = datetime.datetime.now()
    project['modified_time'] = datetime.datetime.now()
    mongo.db.projects.insert_one(project)
    user_record['project_list'].append(project_url)
    mongo.db.users.save(user_record)

    return redirect('/project/postcreate/%s/' % project_url)
예제 #41
0
def search_feasts(search_string):
    """
    Search saints and feasts in year menology.
    :param search_string: search input string (e.g. saint or feast name).
    :return list, witch contains gregorian & julian dates and search result.
    """
    if len(search_string) < 3:
        return dict(count=0, result=[])

    # TODO: Fix endings in the words: Андрей, Андриан
    endings = re.compile(
        ur"(ый|ой|ая|ое|ые|ому|а|ем|о|ов|у|е|й|" ur"ого|ал|ему|и|ство|ых|ох|ия|ий|ь|я|он|ют|ат|ин|ея)$", re.U
    )

    search_string_clean = re.sub(endings, u"", search_string)
    pattern = re.compile(ur"(\b%s\w+\b)" % search_string_clean, re.I | re.U)

    if len(search_string) <= 5:
        endings = re.compile(ur"(а|ы|у|и|я)$", re.U)

        search_string_clean = re.sub(endings, u"", search_string)
        pattern = re.compile(ur"(\b%s[а-я]{0,2}\b)" % search_string_clean, re.I | re.U)

    date = datetime.date.today()
    year = convert_gregorian_to_julian(date.day, date.month, date.year)[2]
    year_menology = menology
    cal_out = []

    # List of weekday names.
    for month in range(1, 13):
        days_in_month = calendar.monthrange(year, month)[1]
        for day in range(1, days_in_month + 1):
            cal = AncientCalendar(day, month, year, calendar="julian")
            weekdayname = cal.get_daily_feast()
            cal_out.extend([[list((day, month, year)), weekdayname]])

    # Construct year menology with movable feasts.
    for item in cal_out:
        year_menology[item[0][1]][item[0][0]]["weekday"] = item[1]

    d = year_menology
    out = []

    # Search string in menology.
    for _month, value in d.iteritems():
        for _day, value1 in d[_month].iteritems():
            for key2, content in d[_month][_day].iteritems():

                if re.search(pattern, unicode(content)):
                    _date = (_day, _month, year)
                    out.extend(
                        [
                            [
                                list(convert_julian_day_to_julian_am(convert_julian_to_julian_day(*_date))),
                                list(convert_julian_to_gregorian(*_date)),
                                clean_string(content),
                            ]
                        ]
                    )

    # Highliting search_string.
    start_str = u"{highlight_start}"
    end_str = u"{highlight_end}"

    pattern_highlite = re.compile(ur"(\b%s\w*\b)" % search_string_clean, re.I | re.U)

    tr = lambda m: start_str + m.group() + end_str

    result = [[t[0], t[1], pattern_highlite.sub(tr, t[2])] for t in out]
    # Count of serach results.
    count = len([re.findall(pattern, item[2]) for item in out])

    return dict(count=count, result=result)
예제 #42
0
def team_submission_text(name, text): 
    '''
    '''
    return '''Hi {}, you have submitted {} as your team for this week!

If this is incorrect please redo your submission'''.format(name, utils.clean_string(text.split('=')[1]))
예제 #43
0
    def fetch_translations(self):
        proxy = xmlrpclib.ServerProxy(self.url)
        
        # Fetch the translation requests (and make sure the result is a list)
        requests = utils.cast_to_list(proxy.list_requests(self.token))
        
        # Retrieve the translation requests that are "ready"
        completedRequests = [request for request in requests if request['ready']]
        
        # Process the completed requests
        for completedRequest in completedRequests:
            # Get the result
            result = proxy.list_results(self.token, completedRequest['request_id'])
            
            # Get the external request id and sentences
            external_request_id = completedRequest['shortname']
            result_sentences = utils.clean_string(result['result'].strip()).split('\n')
            
            # FIXME: Add exception handling when the translation request is not found.
            try:
                # Look up the original translation request
                request = TranslationRequest.objects.get_by_external_id(external_request_id)
                
                # TODO: Should we delete the translations, or filter them some way in Serverland to prevent re-retrieving translations? This could get expensive...
                if requst.status == STATUS_FINISHED:
                    # Only update if the translation isn't marked as finished.
                    continue
                
                # Fetch the Pootle store for the corresponding translation project and fill in the translations.
                store = Store.objects.get(translation_project = request.translation_project)
                
                # Get all of the units
                units = store.unit_set.all()
                
                # Make sure that len(units) matches len(result)
#                print units[0].source
#                print result_sentences[0]
#                print "----"
#                print units[len(units) - 1].source
#                print result_sentences[len(result_sentences)-1]
#                print result_sentences[len(result_sentences)-2]
                
                # If the sentence length doesn't match, then we don't have a one-to-one correspondence
                # between sentences. Mark the translation request with an error
                if (len(units) != len(result_sentences)):
                    request.status = STATUS_ERROR
                    print "An error occurred with request %s" % request
                else:
                    # Start adding the translations to Pootle (lazy).
                    for i in range(0, len(units)):
#                        units[i].target = multistring(result_sentences[i])
                        units[i].target = result_sentences[i]
                        units[i].state = pootle_store.util.FUZZY
                        units[i].save()
                        
                    # Set the status of the current TranslationRequest as completed
                    request.status = STATUS_FINISHED
                    
                request.save()
                
            except ObjectDoesNotExist as ex:
                pass
예제 #44
0
파일: CppClass.py 프로젝트: oracal/cppstub
    def parse_header(self, string):
        self.code = string
        access = "private"
        string_split = re.split("((?:public|private|protected)\s*:)+", string)
        access_re = re.compile("(?:(public|private|protected)\s*:)+")
        class_re = re.compile(
            "(?:template\s*\<\s*(?:class|typename)\s+(?P<template_type>[\w+])\s*\>)?(?P<class_or_struct>class|struct)\s+(?P<class_name>\w+)(?:\s*:\s*(?P<inherited_classes>[:\w,\s]+))?\s*\{"
        )
        # regular expression to match class methods in the form <return type> <method_name>(<method_arguments>) [<const>] [<implemented>]
        method_re = re.compile(
            "(?:(?:template\s*\<\s*(?:class|typename)\s+(?P<template_type>[\w]+)\s*\>)|(?:(?P<virtual>virtual)[ \t\f\v]*)|(?:(?P<static>static)[ \t\f\v]*))?(?:(?P<method_const_return_type>const)[ \t\f\v]*)?(?:[ \t\f\v]*(?P<method_return_type>[<>^:\w&* \t\f\v]+?[&*\s]+))(?P<method_name>\w+[,()\s\w~*+=/*^!<>\[\]|&%-]*)\s*\((?P<method_arguments>(?:[\w=\"'.&\s*:\[\]]+\s*,?\s*)*)\)\s*(?P<const>const)?\s*(?P<implemented>{)?"
        )
        # regular expression to match special class methods such as constructor and destructor
        special_method_re = re.compile(
            "(?P<method_name>~?"
            + self.name
            + ")\s*\((?P<method_arguments>(?:[\w&\s*:]+[\[\]\w]+\s*,?\s*)*)\)\s*(?P<implemented>{)?"
        )
        for string in string_split:
            match = access_re.search(string)
            if match is not None:
                access = match.group(1)
                continue
            while True:
                class_match = class_re.search(string)
                if class_match is None:
                    break
                class_or_struct = class_match.group("class_or_struct")
                struct = False
                if class_or_struct == "struct":
                    struct = True
                class_name = class_match.group("class_name")
                inherited_classes = []
                if class_match.group("inherited_classes"):
                    inherited_classes = [i.strip() for i in class_match.group("inherited_classes").split(",")]
                start = class_match.end() - 1
                end, output = parse_brackets(string[start:])
                string = string[: class_match.start()] + string[end:]
                cpp_class = CppClass(class_name, inherited_classes, struct, self)
                cpp_class.parse_header(output)
                if class_match.group("template_type") is not None:
                    cpp_class.templated = True
                    cpp_class.template_type = class_match.group("template_type")
                self.classes[access].append(cpp_class)

            method_matches = method_re.finditer(string)
            for match in method_matches:
                method_name = match.group("method_name")
                method_name = method_name.strip()
                method_return_type = match.group("method_return_type")
                if method_return_type is not None:
                    method_return_type = method_return_type.strip()
                # deal with the regular incorrectly matching a constructor
                if method_return_type == "":
                    continue
                method_arguments = []
                for argument in clean_string(match.group("method_arguments")).split(","):
                    # deal with default arguments
                    if "=" in argument:
                        argument = argument[argument.find("=") - 1]
                    method_arguments.append(argument.strip())
                cpp_method = CppMethod(method_name, method_arguments, method_return_type, self)
                if match.group("virtual") is not None:
                    cpp_method.virtual = True
                if match.group("implemented") is not None:
                    cpp_method.implemented = True
                if match.group("const") is not None:
                    cpp_method.const = True
                if match.group("template_type") is not None:
                    cpp_method.templated = True
                    cpp_method.template_type = match.group("template_type")
                if match.group("method_const_return_type") is not None:
                    cpp_method.const_return_type = True
                if match.group("static") is not None:
                    cpp_method.static = True
                self.methods[access].append(cpp_method)

            special_method_matches = special_method_re.finditer(string)
            for match in special_method_matches:
                method_name = match.group("method_name")
                method_name = method_name.strip()
                method_arguments = [i.strip() for i in match.group("method_arguments").split(",")]
                cpp_method = CppMethod(method_name, method_arguments, parent=self)
                if match.group("implemented") is not None:
                    cpp_method.implemented = True
                self.methods[access].append(cpp_method)
예제 #45
0
def sentence_to_words(string):
    string = utils.clean_string(string.lower(), stripdot=True)
    split = string.split(' ')
    split = map(stemmer.lemmatize, split)
    return split