Exemplos de findLoc em Python, exemplos de isEventRegex.findLoc em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: spider.py Projeto: kongmunist/CarnegieCalendar

def isEvent(text):
    text = text.replace("\n", " ")
    if ier.findDate(text) != None and ier.findTime(
            text) != None and ier.findLoc(text) != None:
        return True
    else:
        return False

Exemplo n.º 2

0

Exibir arquivo

Arquivo: spider.py Projeto: kongmunist/CarnegieCalendar

def testOnWebsite(siteLinks):
    print("start testing")
    # for layer in siteLinks:
    #     print(len(layer))

    data = []
    for i in range(5):
        data.append([])

    for s in siteLinks:
        for linq in s:
            # if linq.find("staff") != -1 or linq.find("twitter") != -1 or linq.find("instagram") != -1:
            #     continue
            print(linq)

            try:
                response = get(linq)
            except:
                pass
            # print(linq)

            soup = BeautifulSoup(response.text, "lxml")
            # soup = BeautifulSoup(url, "lxml")

            # print(soup.prettify())
            for script in soup(["script", "style", "aside"]):
                script.decompose()

            txt = soup.get_text()
            txt = " ".join(txt.split())

            if isEvent(txt):
                print(txt)
                print("Time: ", ier.findTime(txt))
                print("Date: ", ier.findDate(txt))
                print("Location: ", ier.findLoc(txt))
                try:
                    print(txt[:txt.find("|")])
                except:
                    pass

                data[0].append(ier.findDate(txt))
                times = ier.findTime(txt)
                data[1].append(times[0])
                try:
                    data[2].append(times[1])
                except:
                    data[2].append("")
                data[3].append(ier.findLoc(txt))
                if len(txt[:txt.find("|")]) < 50:
                    data[4].append(txt[:txt.find("|")])
                else:
                    try:
                        data[4].append(
                            txt[txt.find(ier.findLoc(txt)
                                         ):txt.find(ier.findLoc(txt)) + 100])
                    except:
                        try:
                            for key in ier.findKey(txt[:5000]):
                                data[4].append(key)
                                break
                        except:
                            data[4].append("NA")

                # f.write(" ".join(txt.split()))
                # try:
                #     print(txt[:txt.find("|")])
                # except:
                #     pass
                # relevant = txt[(txt.find(ier.findDate(txt))-200):(txt.find(ier.findDate(txt))+200)]
                # print(ier.findKey(relevant))
                # print(relevant)
                # print(txt[(txt.find(ier.findDate(txt))-200):(txt.find(ier.findDate(txt))+200)])

                # print(" ".join((txt[txt.find(ier.findTime(txt))-200:txt.find(ier.findTime(txt))+200]).split()))

    fileio.writeToFile(data[0], data[1], data[2], data[3], data[4])

Exemplo n.º 3

0

Exibir arquivo

            text) != None and ier.findLoc(text) != None:
        return True
    else:
        return False


#
# c = "CMU - Department of Materials Science and Engineering - Carnegie Mellon University Carnegie Mellon University ——— Search Search Search this site only Department of Materials Science and Engineering Welcome to MSE The Department of Materials Science and Engineering (MSE) is one of seven academic departments in Carnegie Institute of Technology, the engineering college at Carnegie Mellon. MSE has a long and distinguished tradition in materials education and research, and today our faculty continue to address the more important and challenging issues at the forefront of science and technology. Materials Science and Engineering is an interdisciplinary activity that applies the principles of basic sciences and engineering to understanding the behavior of materials, their development and applications. Both our undergraduate and graduate students are exposed to this interdisciplinary approach. Spring 2019 - Fridays at 11:30am - Doherty Hall 2210 Application Submission Cycles Fall Term of Entry Ph.D.: October 1 – December 15 M.S.: October 1 – January 15 Spring Term of Entry Ph.D.: July 1 – September 26 MSE EMPLOYMENT OPPORTUNITY Materials Characterization Facilities Specialist, MSE-2011151 Tweets by cmu_mse Materials Research Recent Research Highlights Make an impact in MSE today!Visit giving.cmu.edu/mse Carnegie Mellon College of Engineering Directions Carnegie Mellon University5000 Forbes AvenueWean Hall 3325Pittsburgh, PA 15213412-268-2700 Fax 412-268-7596 Legal Info www.cmu.edu © 2019 Carnegie Mellon University News & Events Newsletters Departmental Seminar Series People Faculty Research MSE Alumni Faculty Faculty-Courtesy Faculty-Emeritus Faculty-Adjunct Staff MSE Staff - Point of Contact Graduate Program Graduate Student Advisory Committee GSAC Summer Seminar Series Graduate Student Symposium Master of Science Programs Master of Science In Additive Manufacturing Dual Degree Programs Master of Science In Materials Science Master of Science In Materials Science and Engineering Doctor Of Philosophy Undergraduate Program Undergraduate Curriculum Objectives, Outcomes, Mission, Accreditations Core Course Content B.S. in MSE Sample Schedule MSE Additional Major Programs Research Opportunities Integrated Master and Bachelor (IMB) Degrees Careers Facilities SEM Training Course TEM Training Course Research Research Centers Computational Materials Science Inorganic Functionall Materials Manufacturing and Materials Microstructure Materials for Healthcare"


c = """There will be a midterm review session tomorrow (2/10) at 4 pm in GHC 4215. We'll be going over examples for the topics that could be on the midterm. Please come with questions and post below with any topics you want to make sure are covered in the review!
This week's lab will be focused on midterm review (attendance is still required). Since the lab is the day before the midterm, we will be releasing the lab early to give you more time to work with it. We'll update this post when it's released.
Sunday Office Hours start this week! I'm sorry for the delay on this, but they are starting tomorrow. They'll be """#every Sunday 12-6 in BH 235B."""

print(isEvent(c))
print(ier.findTime(c), ier.findDate(c), ier.findLoc(c))

# url = "https://hcii.cmu.edu/news"
# url = "https://thebridge.cmu.edu/events"

# soup = BeautifulSoup(response.text,"lxml")
#
# for script in soup(["script", "style", "aside"]):
#     script.decompose()

# for thing in soup.find_all("a"):
#     txt = thing.get("href")
#     print(txt)

#     try:
#         r = get(txt)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: gmail.py Projeto: kongmunist/CarnegieCalendar

def getEmailData():
    """Shows basic usage of the Gmail API.
    Lists the user's Gmail labels.
    """
    SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

    listOfMessages = []
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'client_id.json', SCOPES)
            creds = flow.run_local_server()
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    # Call the Gmail API
    service = build('gmail', 'v1', credentials=creds)

    #getting the thread messages from email
    threads = service.users().messages().list(userId="me").execute()
    threads1 = threads["messages"]
    listDecode = []
    keyWordsAzure = []
    subject = []
    #get the name of the subject line (should be under headers then name)

    for thread in threads1:
        msg_id = thread["id"]
        message = service.users().messages().get(userId="me",
                                                 id=msg_id,
                                                 format='full').execute()
        try:
            listParts = [
                part for part in message["payload"]["parts"]
                if (part["mimeType"] == 'text/html')
            ]
            for element in listParts:
                decode = base64.urlsafe_b64decode(element["body"]["data"])
                titleName = message["payload"]["headers"]
                subjectName = findSetWithSubject(titleName)
                stringOfDecode = str(decode)
                msg = BeautifulSoup(stringOfDecode, "lxml")
                #listDecode.append(stringOfDecode)

                thing = msg.get_text()
                thing = "SUBJECTSTART+ " + thing + " +SUBJECTEND"
                keyWordVar = az.keyWordsML(thing)
                thingEmail = er.findTime(thing)
                if (isEvent(thing)):
                    listDecode.append(thing)
                    subject.append(subjectName)
                    keyWordsAzure.append(keyWordVar)

        except:
            pass
    blankString = ""
    spaceString = " "
    answerString = spaceString.join(listDecode)
    dictionaryBetweenEmailKeyValue = makeDictionary(listDecode, keyWordsAzure)

    dates = []
    beginTimes = []
    endTimes = []
    locations = []
    for email in listDecode:
        overallTimes = er.findTime(email)
        if (overallTimes == None):
            continue
        elif (len(overallTimes) == 2):
            dateWord = er.findDate(email)
            dates.append(dateWord)
            beginTimeWord = overallTimes[0]
            beginTimes.append(reg.convertTime(beginTimeWord))
            endTimeWord = overallTimes[1]
            endTimes.append(reg.convertTime(endTimeWord))
            locationWord = er.findLoc(email)
            locations.append(locationWord)
        elif (len(overallTimes) == 1):
            dateWord = er.findDate(email)
            dates.append(dateWord)
            beginTimeWord = overallTimes[0]
            beginTimes.append(reg.convertTime(beginTimeWord))

            endTimeWord = ""
            endTimes.append(endTimeWord)
            locationWord = er.findLoc(email)
            locations.append(locationWord)

    wordKeyList = []

    for i in range(len(keyWordsAzure)):
        wordKeyList += keyWordsAzure[i]["documents"][0]["keyPhrases"]
    return fl.writeToFile(subject, dates, beginTimes, endTimes, locations)