def find_phones(self, text, leader=False):
     """the method searches for phone numbers on the page"""
     phones = list()
     try:
         if leader is False:
             for match in phonenumbers.PhoneNumberMatcher(text, "CH"):
                 phone = str(match).split(sep=') ', maxsplit=1)[1]
                 if phone:
                     phones.append(phone)
         if leader is True:
             soup = BeautifulSoup(text, 'lxml')
             for word in self.words_for_company_leader:
                 if word in str(soup):
                     try:
                         for match in phonenumbers.PhoneNumberMatcher(str(soup.find(text=re.compile(word)).parent), "CH"):   # noqa  TODO
                             result = str(match).split(sep=') ', maxsplit=1)[1]
                             if result:
                                 phones.append(result)
                     except Exception:  # noqa
                         continue
             if not phones:  # noqa
                 for word in self.words_for_company_leader:
                     if word in str(soup):
                         try:
                             for match in phonenumbers.PhoneNumberMatcher(str(soup.find(text=re.compile(word)).parent.parent), "CH"):  # noqa  TODO
                                 result = str(match).split(sep=') ', maxsplit=1)[1]
                                 if result:
                                     phones.append(result)
                         except Exception:  # noqa
                             continue
         phones = self.unique_phones(phones)
     except Exception as e:
         print(f'find_phones: {e}')
     return phones
示例#2
0
def lf_contains_phonenumber(x):
    res = ''.join(filter(lambda i: i.isdigit(), str(x.text)))
    if len(res) != 0:
        temp = []
        lets = str(x.text)
        count_of_dash = str.count("-", lets)
        pre_context = x.text_blob[x.string_index - 15:x.string_index]
        post_context = x.text_blob[x.string_index:x.string_index + 15]
        for match in phonenumbers.PhoneNumberMatcher(lets, "US"):
            temp.append(
                phonenumbers.format_number(
                    match.number, phonenumbers.PhoneNumberFormat.E164))
        if len(temp) > 0:
            return SPECIAL_NUMBER
        elif re.search(r"\w{3}-\w{3}-\w{4}", lets):
            return SPECIAL_NUMBER
        elif re.search(r"(\w{3})\w{3}-\w{4}", lets):
            return SPECIAL_NUMBER
        elif re.search("\(\w{3}\)\w{3}-\w{4}", lets):
            return SPECIAL_NUMBER
        elif count_of_dash != 0:
            just_test = str(pre_context + lets + post_context)
            for later_search in phonenumbers.PhoneNumberMatcher(
                    just_test, "US"):
                temp.append(later_search)
            if len(temp) > 0:
                return SPECIAL_NUMBER
            else:
                return ABSTAIN
        else:
            return ABSTAIN
    else:
        return ABSTAIN
def test(sent):
    for match in phonenumbers.PhoneNumberMatcher(text, "US"):
        print(match)

    for match in phonenumbers.PhoneNumberMatcher(text, "US"):
        print(
            phonenumbers.format_number(match.number,
                                       phonenumbers.PhoneNumberFormat.E164))
示例#4
0
def forgot_password():
    if request.method == 'POST':
        phone_email = request.form['phone_email']
        for match in phonenumbers.PhoneNumberMatcher(phone_email, 'MY'):
            phone_number = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164)
            qr_user = Qr.query.filter_by(phone_number=PhoneNumber(phone_number, 'MY')).first()
            if qr_user:
                qr_user.token = secrets.token_urlsafe(90)
                db.session.commit()
                reset_link = url_for('reset_password', _external=True)+'?token='+qr_user.token
                msg = Message(subject="Resetting your QR Id password", sender='Pintar-AI', recipients=[qr_user.email])
                msg.html = render_template('mail_reset.html', username=qr_user.name, reset_link=reset_link)
                mail.send(msg)
        qr_user = Qr.query.filter_by(email=phone_email).first()
        if qr_user:
            qr_user.token = secrets.token_urlsafe(90)
            db.session.commit()
            reset_link = url_for('reset_password', _external=True)+'?token='+qr_user.token
            msg = Message(subject="Resetting your QR Id password", sender='Pintar-AI', recipients=[qr_user.email])
            msg.html = render_template('mail_reset.html', username=qr_user.name, reset_link=reset_link)
            mail.send(msg)
        flash("If any account related, we sent link to your email", category="success")
        return redirect(url_for('forgot_password'))
    else:
        return render_template('forgot_password.html')
示例#5
0
def qr_login():
    phone_email = request.form['phone_email']
    password = request.form['password']
    for match in phonenumbers.PhoneNumberMatcher(phone_email, 'MY'):
        phone_number = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164)
        qr_user = Qr.query.filter_by(phone_number=PhoneNumber(phone_number, 'MY')).first()
        if qr_user:
            if not bcrypt.checkpw(password.encode('utf-8'), qr_user.password):
                flash("Wrong password", category="danger")
                return redirect(url_for('login'))
            if not qr_user.is_verified:
                flash("Please verify your account", category="danger")
                return redirect(url_for('login'))
            img = string_to_qr("qr_id/" + qr_user.phone_number.e164)
            return serve_pil_image(img)
    qr_user = Qr.query.filter_by(email=phone_email).first()
    if qr_user:
        if not bcrypt.checkpw(password.encode('utf-8'), qr_user.password):
            flash("Wrong password", category="danger")
            return redirect(url_for('login'))
        if not qr_user.is_verified:
            flash("Please verify your account", category="danger")
            return redirect(url_for('login'))
        img = string_to_qr("qr_id/" + qr_user.phone_number.e164)
        return serve_pil_image(img)
    else:
        flash("Phone number or Email hasn't been registered", category="danger")
        return redirect(url_for('login'))
示例#6
0
def _parse_phone(msg):
    for match in phonenumbers.PhoneNumberMatcher(msg, "US"):
        number = phonenumbers.format_number(
            match.number, phonenumbers.PhoneNumberFormat.E164)
        if number != MY_PHONE:
            return number
    return ''
def parse_by_phonenumbers(text):
    not_validated_numbers = set()
    validated_numbers = set()

    def format_number(number_object):
        return phonenumbers.format_number(
            number_object,
            phonenumbers.PhoneNumberFormat.E164,
        )

    for match in phonenumbers.PhoneNumberMatcher(
            text,
            'RU',
            # I used POSSIBLE to get numbers without city code
            phonenumbers.Leniency.POSSIBLE,
    ):
        # let's suppose: number without city code always have "-" symbol
        if phonenumbers.is_valid_number(match.number):
            validated_numbers.add(format_number(match.number))
        elif '-' in match.raw_string:
            not_validated_numbers.add(format_number(match.number))

    for number in not_validated_numbers:
        if len(number) == NO_CITY_CODE_LEN_NUMBER:
            number = f'+7{MOSCOW_CODE}{number[2:]}'
            validated_numbers.add(number)
    # convert international format to russian internal format
    return {f'8{n[2:]}' for n in validated_numbers}
示例#8
0
def find_phone_numbers(string: str, region_code: Optional[str] = None) -> str:
    """
    Python port of Google's libphonenumber.
    https://github.com/daviddrysdale/python-phonenumbers

    Parameters
    ----------
    region_code : str, optional
        If specified, will find the number of the specified country.
    eg. 06.00.00.00.00 if "FR" is specified.

    If not specified, only works for international-formatted phone numbers.
    - ie. phone number with +country code specified
    eg. 06.00.00.00.00 will return an error but +33 6 00 00 00 00 will work.
    supported value: look SUPPORTED_COUNTRY variable.

    Returns
    -------
    list
        list of matched phone numbers.

    Raises
    ------
    ValueError
        if country code is not supported.
    """
    if region_code not in SUPPORTED_COUNTRY:
        raise ValueError(
            'Please enter a valid contry code. See SUPPORTED_COUNTRY list.')
    return [
        match.raw_string
        for match in _phonenumbers.PhoneNumberMatcher(string, region_code)
    ]
示例#9
0
	def get_phones(self, text):
		''' extracts phones from webpage '''
		phones = []
		for match in phonenumbers.PhoneNumberMatcher(text, ''):
			phones.append(phonenumbers.format_number(match.number, ))
		phones = list(set(phones+self.info['phones']))
		return phones
示例#10
0
 def normalize_devices_in_string(string, with_emails=False, country="US"):
     result = re.findall(r'[\w\.-]+@[\w\.-]+', str(string)) if with_emails else []
     for match in phonenumbers.PhoneNumberMatcher(str(string), country):
         number = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164)
         if len(number) > 10:
             result.append(number)
     return result
示例#11
0
def extract_number_and_standardise(list_of_text, default_country):
    
    # Preconditions
    assert isinstance(list_of_text, list)
    
    national = list()       # List to store national version of number
    international = list()  # List to store international version of number
    e164 = list()           # List to store E164 version of number
    
    # Extract numbers in each string
    for t in list_of_text:
        number = phonenumbers.PhoneNumberMatcher(t, default_country)
       
        for n in number:
        
            # Standardise to national format
            national.append(phonenumbers.format_number(n.number, phonenumbers.PhoneNumberFormat.NATIONAL))
        
            # Standardise to international format
            international.append(phonenumbers.format_number(n.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL))
        
            # Standardise to E164 format
            e164.append(phonenumbers.format_number(n.number, phonenumbers.PhoneNumberFormat.E164))
            
    
    return national, international, e164
示例#12
0
def anon_ner(text):
    result = ''
    doc = Doc(text)
    doc.segment(segmenter)
    doc.tag_ner(ner_tagger)
    result_temp = ''
    last = 0
    for span in doc.spans:
        if span.type == 'PER':
            result_temp += text[last:span.start]
            result_temp += 'ИМЯ'
        if span.type == 'ORG':
            result_temp += text[last:span.start]
            result_temp += 'ОРГАНИЗАЦИЯ'
        if span.type == 'LOC':
            result_temp += text[last:span.start]
            result_temp += 'ЛОКАЦИЯ'
        if span.type == 'PER' or span.type == 'ORG' or span.type == 'LOC':
            last = span.stop
    result_temp += text[last:]
    result = result_temp
    result_temp = ""
    last = 0
    countries = [
        'AZ', 'AM', 'BY', 'KZ', 'KG', 'MD', 'RU', 'TJ', 'TM', 'UZ', 'UA'
    ]
    for country in countries:
        for match in phonenumbers.PhoneNumberMatcher(result, country):
            result_temp += result[last:match.start]
            result_temp += 'ТЕЛЕФОН '
            last = match.end
    result_temp += result[last:]
    result = result_temp
    return result
示例#13
0
def test():
    print find_phone_numbers("PHONE: 1021-34662020/21/22/23/24")
    print find_phone_numbers("1021-34662020")
    print "done.."
    text = "Call me at ++1510-748-8230 if it's before 9:30, or on +703-4800500 after 10am. +971-9-4662020"
    for match in phonenumbers.PhoneNumberMatcher(text, "US"):
        print match
示例#14
0
def url_fetch(query=""):
    if not query:
        last_query = session.get('last_query', None)
        if last_query is not None:
            query = session['last_query']['query']
    stopset = set(stopwords.words('english'))
    q = {"fields": ["file"], "query": {"term": {"file": query}}}
    r = es.search(body=q, index=es_index)
    data = r['hits']['hits']
    urls = []
    pn = []
    for doc in data:
        urls.append(re.findall(r'(https?://[^\s]+)', doc['fields']['file'][0]))
        try:
            for match in phonenumbers.PhoneNumberMatcher(
                    doc['fields']['file'][0], region=None):
                pn.append({
                    'number':
                    phonenumbers.format_number(
                        match.number, phonenumbers.PhoneNumberFormat.E164),
                    'location':
                    geocoder.description_for_number(match.number, "en")
                })
        except KeyError:
            pass
    urls = filter(lambda x: x != [], urls)
    # urls_flat=reduce(lambda x,y: x.extend(y),urls)
    urls_flat = [item for sublist in urls for item in sublist]
    return json.dumps({'urls': dict(Counter(urls_flat)), 'pn': pn})
示例#15
0
def find_info(value_matrix, sheet):
    phone_string = ""
    data_matrix = [[]]
    data_matrix.clear()

    for v in value_matrix:

        # Provider name from matrix
        provider_string = v[0]

        try:
            add_string = pyap.parse(v[1], country='US')[0].__str__(
            )  # Find address from matrix value index 1
        except:
            add_string = ""  # Expect error when no address found. Make value "" to add to matrix

        try:
            web_string = URLExtract().find_urls(
                v[1])[0]  # Find URL from matrix value index 1
        except:
            web_string = ""  # Expect error when no URL found. Make value "" to add to matrix

        for match in phonenumbers.PhoneNumberMatcher(
                v[1], "US"):  # Find phone number from matrix value index 1
            phone_string = phonenumbers.format_number(
                match.number, phonenumbers.PhoneNumberFormat.NATIONAL)

        data_matrix.append(
            [provider_string, phone_string, web_string, add_string])
        phone_string = ""

    sheet.update_values("C:H",
                        data_matrix)  # Update cell range with found values
示例#16
0
def anonymize(data):
    try:
        for match in phonenumbers.PhoneNumberMatcher(data, "US"):
            data = data.replace(match.raw_string, '<Phone>')
    finally:
        return data
        pass
示例#17
0
 def get_phone_numbers(self, text) -> List:
     return [
         pn.format_number(match.number,
                          pn.PhoneNumberFormat.INTERNATIONAL).replace(
                              '-', ' ')
         for match in pn.PhoneNumberMatcher(text, self.__region)
     ]
示例#18
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        sourceData = self.sf.hashstring(eventData)

        if sourceData in self.results:
            return None
        else:
            self.results.append(sourceData)

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        # Make potential phone numbers more friendly to parse
        content = eventData.replace('.', '-')
        for match in phonenumbers.PhoneNumberMatcher(content, region=None):
            n = phonenumbers.format_number(match.number,
                                           phonenumbers.PhoneNumberFormat.E164)
            evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
示例#19
0
文件: tests.py 项目: pld/flows
    def evaluate(self, runner, run, context, text):
        country = run.org.country

        # try to find a phone number in the text we have been sent
        matches = phonenumbers.PhoneNumberMatcher(text, country)

        # try it as an international number if we failed
        if not matches.has_next():
            matches = phonenumbers.PhoneNumberMatcher('+' + text, country)

        if matches.has_next():
            number = next(matches).number
            number = phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.E164)
            return Test.Result.match(number)
        else:
            return Test.Result.NO_MATCH
示例#20
0
    def handleEvent(self, event):
        if "sfp_spider" in event.module:
            eventSource = event.sourceEvent
        else:
            eventSource = event
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        hashData = self.sf.hashstring(eventData)

        if hashData in self.results:
            return None
        else:
            self.results.append(hashData)

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        for match in phonenumbers.PhoneNumberMatcher(eventData, region=None):
            n = phonenumbers.format_number(match.number,
                                           phonenumbers.PhoneNumberFormat.E164)
            evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__,
                                  eventSource)
            self.notifyListeners(evt)

        return None
示例#21
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        sourceData = self.sf.hashstring(eventData)

        if sourceData in self.results:
            return None
        else:
            self.results[sourceData] = True

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if eventName in ['TARGET_WEB_CONTENT', 'DOMAIN_WHOIS', 'NETBLOCK_WHOIS']:
            # Make potential phone numbers more friendly to parse
            content = eventData.replace('.', '-')

            for match in phonenumbers.PhoneNumberMatcher(content, region=None):
                n = phonenumbers.format_number(match.number,
                                           phonenumbers.PhoneNumberFormat.E164)
                evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)

        if eventName == 'PHONE_NUMBER':
            try:
                number = phonenumbers.parse(eventData)
            except BaseException as e:
                self.sf.debug('Error parsing phone number: ' + str(e))
                return None

            try:
                number_carrier = carrier.name_for_number(number, 'en')
            except BaseException as e:
                self.sf.debug('Error retrieving phone number carrier: ' + str(e))
                return None

            if number_carrier:
                evt = SpiderFootEvent("PROVIDER_TELCO", number_carrier, self.__name__, event)
                self.notifyListeners(evt)
            else:
                self.sf.debug("No carrier information found for " + eventData)

            #try:
            #    location = geocoder.description_for_number(number, 'en')
            #except BaseException as e:
            #    self.sf.debug('Error retrieving phone number location: ' + str(e))
            #    return None

            #if location:
            #    evt = SpiderFootEvent("GEOINFO", location, self.__name__, event)
            #    self.notifyListeners(evt)
            #else:
            #    self.sf.debug("No location information found for " + eventData)

        return None
示例#22
0
    def parse_item(self, response):
        """
        * * * * * * 
        * Uses regex to broad scrape the entirety of the HTML for numbers and emails
        * on the current web page, if any emails or phone numbers exist
        * they will be passed down the item pipeline for further validation.
        * * * * * * 
        * @param <Response> response     : Scrapy Response object from the newest page
        * @yield ContactInfo             : scrapy Item class with emails, logos, numbers, and url
        */
        """

        contact_info = ContactInfo()

        contact_info['url'] = response.url
        html_text = str(response.text)

        potential_numbers = [
            pn.format_number(match.number, pn.PhoneNumberFormat.E164)
            for match in pn.PhoneNumberMatcher(html_text, self.region)
        ]

        potential_emails = re.findall(
            r'([a-zA-Z0-9+._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)', html_text)

        potential_logos = []
        soup = BeautifulSoup(html_text)
        try:
            potential_logos = [soup.findAll('img')[0]['src']]
        except IndexError:
            pass

        if response.url not in self.seen_urls and \
                (len(potential_numbers) != 0 or len(potential_emails) != 0 or len(potential_logos) != 0):

            if self.scrape_emails:
                contact_info['emails'] = potential_emails
            else:
                contact_info['emails'] = []

            if self.scrape_numbers:
                contact_info['numbers'] = potential_numbers
            else:
                contact_info['numbers'] = []

            if self.scrape_logos:
                contact_info['logos'] = potential_logos
            else:
                contact_info['logos'] = []

            if not self.max_results or self.total_results < self.max_results:
                self.total_results += 1
                self.seen_urls.add(response.url)
                yield contact_info

            logging.info(
                f"found {self.total_results}/{self.max_results} results")
            if self.total_results >= self.max_results:
                raise CloseSpider('Reached max results')
示例#23
0
 def iter_filth(self, text):
     # create a copy of text to handle multiple phone numbers correctly
     for match in phonenumbers.PhoneNumberMatcher(text, self.region):
         yield PhoneFilth(
             beg=match.start,
             end=match.end,
             text=match.raw_string,
         )
示例#24
0
def verify_phone_number(number: str, region: str):
    numbers = [
        i.raw_string for i in phonenumbers.PhoneNumberMatcher(number, region)
    ]
    #assert numbers != []
    if not numbers:
        return messages.PHONE_EMPTY.value
    return numbers
示例#25
0
def extract_phones_from_file(file):
    phones = []    
      
    for line in file:
        for match in pn.PhoneNumberMatcher(line.strip(), "US"):
            phones.append(pn.format_number(match.number, pn.PhoneNumberFormat.E164))
       
    return phones
示例#26
0
 def iter_filth(self, text, document_name: Optional[str] = None):
     # create a copy of text to handle multiple phone numbers correctly
     for match in phonenumbers.PhoneNumberMatcher(text, self.region):
         yield PhoneFilth(beg=match.start,
                          end=match.end,
                          text=match.raw_string,
                          detector_name=self.name,
                          document_name=document_name)
示例#27
0
def get_phones(text, country):
    list_of_phones = list(phonenumbers.PhoneNumberMatcher(text, country))
    return list(
        set([
            phonenumbers.format_number(x.number,
                                       phonenumbers.PhoneNumberFormat.E164)
            for x in list_of_phones
        ]))
示例#28
0
文件: data_extr.py 项目: sc0eur/mama
def find_all(text):
    dic = {"dates": [], "numbers": []}
    # if search_dates(text):
    # for date, ts in search_dates(text):
    #     dic["dates"].append(date)
    for match in phonenumbers.PhoneNumberMatcher(text, "RU"):
        dic["numbers"].append(match.raw_string)
    return dic
示例#29
0
def format(text):
    diff = 0
    for match in phonenumbers.PhoneNumberMatcher(text, 'RU'):
        num = match.number
        num.country_code = 1
        formatted = phonenumbers.format_number(num, 1)
        text = text[:match.start + diff] + formatted + text[match.end + diff:]
        diff += len(formatted) - len(match.raw_string)
    return text
示例#30
0
 def replace_phone(text, region):
     for reg in region:
         offset = 0
         cleaned = ""
         for match in phonenumbers.PhoneNumberMatcher(text, reg):
             cleaned += text[offset:match.start] + "{PHONENUMBER}"
             offset = match.end
         cleaned += text[offset:]
     return(cleaned)