Пример #1
0
def date_parser(date):
    index = date.find('-')
    year = int(date[:index])
    next_index = date.find('-', index + 1)
    month = int(date[index + 1:next_index])
    day = int(date[next_index + 1:])
    return {'month': month, 'day': day, 'year': year}
Пример #2
0
	def _editDate(self, date):
		format_date = None

		if date.find('about ') >= 0:
			new_date = date[date.find('about ') + 6 :]

			# Number of days to push back for current day
			if new_date[ :new_date.find(' ')] == 'a': 
				num_day = 1
			else: 
				num_day = int(new_date[ :new_date.find(' ')])

			# Finds a possible date of joining (based off Facebook estimation)
			if new_date.find('weeks') >= 0: 
				format_date = datetime.now() - timedelta(days= 7 * num_day)
			else: 
				format_date = datetime.now() - timedelta(days= 30 * num_day)
		elif date.find('Joined on ') >= 0:
			new_date = date[date.find('Joined on ') + 10: ]
			today = datetime.today()
	
			if new_date == 'Monday': 		offset = (today.weekday()) % 7
			elif new_date == 'Tuesday': 	offset = (today.weekday() - 1) % 7
			elif new_date == 'Wednesday':	offset = (today.weekday() - 2) % 7
			elif new_date == 'Thursday':	offset = (today.weekday() - 3) % 7
			elif new_date == 'Friday': 		offset = (today.weekday() - 4) % 7
			elif new_date == 'Saturday': 	offset = (today.weekday() - 5) % 7
			else:							offset = (today.weekday() - 6)

			format_date = datetime.now() - timedelta(days=offset)
		else:
			format_date = datetime.strptime(date[date.find('on ') + 3:], '%B %d, %Y')
			
		return format_date
Пример #3
0
 def __compareTime(self, date):
     hour = datetime.now().time().hour
     minute = datetime.now().time().minute
     hour_param = int(date[:date.find(':')])
     minute_param = int(date[date.find(':') + 1:])
     if hour == hour_param and minute == minute_param:
         return True
     else:
         return False
def get_week_result(url, input_text="USD"):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')

    tb_rows = soup.find_all('tr', class_='calendar__row')
    final = []
    i = 0
    for row in tb_rows:
        x = []
        # print(row.find('span', class_='worse'))
        if row.find('td', class_='calendar__cell calendar__actual actual'):
            span = row.find('td', class_='actual')
            if span.find('span', class_='worse'):
                if row.find('td', class_='calendar__cell calendar__currency currency'):
                    i += 1
                    x.append(i)
                    x.append((row.find('td', class_='calendar__cell calendar__currency currency').text[1:4]))
                    x.append("bullish")
                    
                    date_ = row.find('td', class_='calendar__cell calendar__date date').find('span', class_='date')
                    if date_ != None:
                        date = date_
                    x.append(date.find().text[:])
                    
                    final.append(x)

            elif span.find('span', class_='better'):
                if row.find('td', class_='calendar__cell calendar__currency currency'):
                    i += 1
                    x.append(i)
                    x.append((row.find('td', class_='calendar__cell calendar__currency currency').text[1:4]))
                    x.append("bearish")
                    
                    date_ = row.find('td', class_='calendar__cell calendar__date date').find('span', class_='date')
                    if date_ != None:
                        date = date_
                    x.append(date.find().text[:])
                    
                    final.append(x)
                    
            else:
                if row.find('td', class_='calendar__cell calendar__currency currency'):
                    i += 1
                    x.append(i)
                    x.append((row.find('td', class_='calendar__cell calendar__currency currency').text[1:4]))
                    x.append("neutral")
                    
                    date_ = row.find('td', class_='calendar__cell calendar__date date').find('span', class_='date')
                    if date_ != None:
                        date = date_
                    x.append(date.find().text[:])
                    
                    final.append(x)
                    
# input_text = input("Input the Currency (Ex:- NZD, USD, JPY, AUD, JPY) : ")
    return final
Пример #5
0
def getData(file, cols):
    "Takes in an HTML DOM and returns a pandas dataframes"
    with open(file) as f:
        soup = BeautifulSoup(f, 'html.parser')
        text = str(soup.find_all('script')[5])

        first = 0
        results = []
        while first != -1:
            beginning = text.find("[new Date", first)
            ending = text.find("]", beginning)
            dataText = text[beginning + 1:ending]
            row = dataText.split(",")
            results.append(row)
            first = ending
        results = results[:-1]
        for i, row in enumerate(results):
            date = row[0]
            date = date[:-1]
            junkIdx = date.find("(")
            row[0] = date[junkIdx + 1:]
            results[i] = row

        df = pd.DataFrame(results, columns=cols)

        df['Date'] = df['Date'].apply(
            lambda x: datetime.strptime(x, '"%Y/%m/%d"'))
        #covert to float
        numerical_columns = cols[1:]
        for num_col in numerical_columns:
            df[num_col] = df[num_col].apply(
                lambda x: float(0) if x == "null" else math.log(float(x)))
        return df
Пример #6
0
def extract_google(query_terms, startDate, endDate):
    if len(startDate) == 0:
        startDate = datetime.datetime.today().strftime('%d/%m/%Y')
    if len(endDate) == 0:
        endDate = datetime.datetime.strftime(
            datetime.datetime.today().date() - datetime.timedelta(days=7),
            '%d/%m/%Y')
    startDate = datetime.datetime.strptime(startDate,
                                           '%Y-%m-%d').strftime('%d/%m/%y')
    endDate = datetime.datetime.strptime(endDate,
                                         '%Y-%m-%d').strftime('%d/%m/%y')
    final_articles = []
    print(startDate)
    print(endDate)
    print("Crawling Starting")
    # here extracting news from google news
    googlenews = GoogleNews()
    googlenews.setTimeRange(startDate, endDate)
    for query in query_terms:
        googlenews.clear()

        #forming the search term
        googlenews.search("India Technology " + query)

        result = googlenews.result()

        for n in range(len(result)):
            source = result[n]['media']
            url = result[n]['link']
            try:
                article = Article(url)
                article.download()
                article.parse()
            except Exception as e:
                print("Trouble downloading so skipping")
                continue
            content = article.text

            # summarize the content
            temp_content = re.sub(r'^\s*[\(\[].*?[\)\]]\s*', '', content)
            sentences = sent_detector.tokenize(temp_content)
            summary = (" ".join(sentences[:2]).strip())

            date = result[n]['date']
            if (date.find('ago') != -1):
                date = current.date()
            title = result[n]['title']
            #         content=result[n]['desc']
            img = result[n]['img']
            #adding the extracted info in final_articles list
            final_articles.append({
                'source': source,
                'url': url,
                'date': date,
                'title': title,
                'content': content,
                'img': img
            })
    return final_articles
Пример #7
0
def split_datetime_range(date):
    if isinstance(date,(tuple,list)):
        d0,d1 = date
    elif isinstance(date, (str,unicode)) and date.find('~')>=0:
        d0,d1 = date.split('~')
    else:
        d0,d1 = date, date
    return d0, d1
Пример #8
0
def ru_months(date, simple=None):
	try:
		return [
			date.replace(month[0], (simple and month[1] or month[2]))
			for month in months if date.find(month[0]) != -1
		][0]
	
	except IndexError:
		return ''
Пример #9
0
def extract_date(date) -> object:
    crrct_date = 0
    new_date = ""
    from datetime import date

    today = date.today()
    print("Today's date:", today)

    if date.find(",") != -1:
        mnth_day, year = date.split(',')

        if mnth_day.find(" ") != -1:
            month, day = mnth_day.split(" ")

            crrct_date = 1

            day = day.strip()
            month = month.strip()
            year = year.strip()

            if month == "January":
                new_date = "1" + "/"
            elif month == "February":
                new_date = "2" + "/"
            elif month == "March":
                new_date = "3" + "/"
            elif month == "April":
                new_date = "4" + "/"
            elif month == "May":
                new_date = "5" + "/"
            elif month == "June":
                new_date = "6" + "/"
            elif month == "July":
                new_date = "7" + "/"
            elif month == "August":
                new_date = "8" + "/"
            elif month == "September":
                new_date = "9" + "/"
            elif month == "October":
                new_date = "10" + "/"
            elif month == "November":
                new_date = "11" + "/"
            elif month == "December":
                new_date = "12" + "/"
            else:
                crrct_date = 0

            new_date += day + "/"

            new_date += year

    if crrct_date == 1:
        return new_date
    else:
        return ""
def format_goodwill(listing):
    title = listing[0]
    title = title[title.find("\n"):]
    title = title[21:title.find("\r")]
    listing[0] = title
    date = listing[4]
    date = date[:date.find(' ')]
    slash1 = date.find('/')
    slash2 = date[slash1 + 1:].find("/") + slash1 + 1
    month = date[:slash1]
    day = date[slash1 + 1:slash2]
    year = date[slash2 + 1:]
    if len(month) == 1:
        month = "0" + month
    if len(day) == 1:
        day = "0" + day
    listing[4] = year + "-" + month + "-" + day
    if listing[9] == 'Buy It Now':
        listing[3] = listing[1]
        listing[1] = 'ex'
    return listing
Пример #11
0
def parse_datetime(date):
    """
    parse string date to datetime type
    :param date:
    :return:
    """
    if not date:
        return None
    if isinstance(date, str) and date.find(birthday_sep):
        return dateparser.parse(date)
    elif isinstance(date, str):
        return fix_cn_parse_year_error(dateparser.parse(str(date)))
    elif isinstance(date, int):
        return dateparser.parse(str(date))
Пример #12
0
def CallFromSQL(x, y, date):
    date = str(date)
    SQLite3conn = sqlite3.connect("DailyDB")
    query = 'select * from DailyTable'
    if date.find("-") > -1:
        query = query + " where Symbol = '" + x + "' and insertionDay = '" + date + "';"
    cursor = SQLite3conn.cursor()
    cursor.execute(query)
    info = cursor.description
    info = pandas.DataFrame(info)
    titles = info[0]

    results = cursor.fetchall()
    results = pandas.DataFrame(results)
    if len(results) > 0:
        results.columns = titles
    cursor.close()
    return results
Пример #13
0
def getNews(url):
    try:
        res = requests.get(url)
        soup = BeautifulSoup(res.text,"lxml")
    
        title = soup.title.string
        #print "Title: "+title
		kinds = soup.find(attrs={"name":"section"})['content']
		#print "Kinds: "+kinds

        date = soup.find("time").text.strip()    
        date = date.replace(u"年","-")
        date = date.replace(u"月","-")
        date = date.replace(u"日","")
		if kinds != u"寵物動物":
			if date.find(':')!=-1:
					 date+=":00"
				else:
					 date+=" 00:00:00"
			date = date.encode('utf-8')
		date = datetime.strptime(date,"%Y-%m-%d %H:%M:%S")
Пример #14
0
    def get(self, request, id):

        hike = Hike.objects.get(id=id)
        context = base_context(request, title=hike.name)
        this_hike = {}

        # Сюда вставить все достопримечательности
        this_hike['landmarks'] = list(Landmark.objects.filter(is_public=True))
        if hike.image.name is not None and hike.image.name != "":
            this_hike['image'] = hike.image
        else:
            this_hike['image'] = ''
        days = []
        months = [
            'января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля',
            'августа', 'сентября', 'октября', 'ноября', 'декабря'
        ]

        for day in Day.objects.filter(hike=hike).order_by('date'):

            day_id = int(day.name.split()[1])

            if day_id == 1 or day.description != '' or day.caption != '':
                data = {}
                data['description'] = day.description
                data['header'] = day.caption
                data['date'] = str(
                    day.date.day) + ' ' + months[day.date.month - 1]
                data['name'] = day.name
                data['id'] = str(day_id)
                days.insert(0, data)
            else:

                if days[0]['date'].find(' - ') != -1:
                    days[0]['date'] = days[0]['date'][:days[0]['date'].
                                                      find(' - ')]

                if days[0]['name'].find(' - ') != -1:
                    days[0]['name'] = days[0]['name'][:days[0]['name'].
                                                      find(' - ')]

                # days[0]['name'].replace('День', 'Дни')

                days[0]['date'] += ' - ' + str(
                    day.date.day) + ' ' + months[day.date.month - 1]

                days[0]['name'] += ' - ' + str(day_id)

        days = sorted(days, key=lambda x: int(x['id']))

        for day in days:
            if day['date'].count(day['date'].split(' ')[-1]) > 1:
                date = day['date']
                month = day['date'].split(' ')[-1]
                day['date'] = date[:date.find(month) -
                                   1] + date[date.find(month) + len(month):]

        this_hike['days'] = days
        participants = []
        usernames = []

        for participant in hike.participants.all():
            usernames.append(participant.username)
            props = [full_name(participant), participant.username, '']
            if participant.profile.avatar.name != '':
                props[2] = participant.profile.avatar
            props.append("/account/" + participant.username)
            participants.append(props)

        this_hike['participants'] = participants
        this_hike['usernames'] = usernames

        if hike.limit_of_members - len(participants) > 0:
            this_hike['vacancies'] = hike.limit_of_members - len(participants)
            this_hike['free_plases'] = "Yup"
        else:
            this_hike['vacancies'] = 0
            this_hike['free_plases'] = "Nope"

        context['content'] = hike.__dict__
        context['content'].update(this_hike)
        context['content']['creator'] = hike.creator

        context['rus_date'] = beauty_date_interval(hike.start_date,
                                                   hike.end_date, True, True)

        if 0 < int(str(this_hike['vacancies'])[-1:]) < 5:
            context['number_of_free_places'] = str(
                this_hike['vacancies']) + ' места'
        else:
            context['number_of_free_places'] = str(
                this_hike['vacancies']) + ' мест'
        context['author_full_name'] = full_name(hike.creator)

        # Комментарии

        comments = []

        hike_comments_models = Message.objects.filter(
            hike=hike).order_by('creation_datetime')

        for ct_model in hike_comments_models:
            ct = {}
            ct['author'] = full_name(ct_model.author)
            ct['author_username'] = ct_model.author.username
            ct['comment'] = ct_model.text

            ct['avatar'] = ''
            if ct_model.author.profile.avatar.name != '':
                ct['avatar'] = ct_model.author.profile.avatar

            published_time = ct_model.creation_datetime.strftime(
                '%H:%M, %d ') + months[ct_model.creation_datetime.month - 1]
            ct['time_published'] = published_time
            comments.append(ct)
        context['comments'] = comments

        return render(request, "hike.html", context)
Пример #15
0
import time
#Using datetime.strptime()
datetemp = sys.argv[1]
datetemp1 = datetime.strptime(datetemp,"%m/%d/%Y %H:%M:%S")
##datetemp2 = datetemp1.ctime()

print datetemp1

myFile = open("logAfterAssignment1.txt","r")

lateAuthor = []
submissionDate = []
count = 0
for date in myFile:
    if(count==0):
        indexofDate = date.find("Date:")        
        if indexofDate != -1:
            ds = date.split()
            x = (str(ds[2])+' '+str(ds[3])+' '+str(ds[5])+' '+str(ds[4]))
            subDate = datetime.strptime(x,"%b %d %Y %H:%M:%S",)
            if subDate>datetemp1:
                submissionDate.append(date)
                count = 1
    else:
        indexofAuthor = date.find("Author:")
        if indexofAuthor !=-1:
            lateAuthor.append(date)
            count = 0
myFile.close()

mydic = dict(zip(lateAuthor, submissionDate))
Пример #16
0
 def _get_time(self, commit):
     date = commit["date"]
     date = date[:date.find("T")]
     time = datetime.strptime(date, '%Y-%m-%d').date()
     return time
Пример #17
0
def parser():

    req = urllib2.Request('https://spaceflightnow.com/launch-schedule/')
    response = urllib2.urlopen(req)
    the_page = response.read()

    d = datetime.utcnow()
    h = HTMLParser()
    cal = Calendar()
    cal.add('version', 2.0)
    cal.add('prodid', '-//madkat//SpaceX feed//EN')

    # Get all DATETAG indexes
    date_group = [m.start() for m in re.finditer(DATETAG, the_page)]

    # For each date index in date_group, extract the other data
    for _idx in range(len(date_group)):

        date_idx = date_group[_idx]
        if _idx + 1 == len(date_group):
            block_end = len(the_page)
        else:
            block_end = date_group[_idx + 1]

        date_start_idx = date_idx + len(DATETAG)
        date_end_idx = the_page[date_start_idx:block_end].find(
            SPANENDTAG) + date_start_idx
        date = the_page[date_start_idx:date_end_idx]
        if '/' in date:
            _idx = date.find('/')
            date = date[_idx + 1:]

        found_month = False
        mth_idx = 0

        while not found_month and mth_idx < 12:
            if SH_MTH[mth_idx] in date:
                _idx = date.find(SH_MTH[mth_idx])
                day = date[_idx + len(SH_MTH[mth_idx]) + 1:]
                found_month = True
                break
            if FL_MTH[mth_idx] in date:
                _idx = date.find(FL_MTH[mth_idx])
                day = date[_idx + len(FL_MTH[mth_idx]) + 1:]
                found_month = True
                break
            mth_idx += 1

        # If I find a day, or month, start building datetime object
        # Otherwise, I just skip the event
        if found_month and day != '':
            event = Event()
            # Check if day has '/' in it
            year = d.year
            _idx = day.find('/')
            if _idx != -1:
                day = day[_idx + 1:]

            mth = mth_idx + 1
            if mth < d.month:
                year += 1

            # Get event title
            mission_start_idx = the_page[date_end_idx:block_end].find(
                MISSIONTAG) + len(MISSIONTAG) + date_end_idx
            mission_end_idx = the_page[mission_start_idx:block_end].find(
                SPANENDTAG) + mission_start_idx
            mission = the_page[mission_start_idx:mission_end_idx]
            mission = re.sub(r'[^\x00-\x7F]+', '-', mission)
            # Escape all sorts of weird characters
            mission = mission.decode("ascii", errors="ignore").encode()
            # Escape HTML characters & add summary
            event.add('summary', h.unescape(mission))

            # Get launch window
            launch_win_start_idx = the_page[mission_end_idx:block_end].find(
                LAUNCHWINDOWTAG) + len(LAUNCHWINDOWTAG) + mission_end_idx
            launch_win_end_idx = the_page[launch_win_start_idx:block_end].find(
                SPANSTARTTAG) + launch_win_start_idx
            launch_win_raw = the_page[launch_win_start_idx:launch_win_end_idx]
            is_gmt_idx = launch_win_raw.find(GMT)
            # If there is no launch window yet, just make it a 24hr event (all day equivalent?)
            if is_gmt_idx == -1:
                launch_win = "0000-2359"
            else:
                launch_win = re.search(LAUNCHREGEX,
                                       launch_win_raw[:is_gmt_idx]).group(0)

            # Parse launch window
            if '-' in launch_win:
                # I have a launch window!
                ev_date = datetime(year,
                                   mth,
                                   int(day),
                                   int(launch_win[:2]),
                                   int(launch_win[2:4]),
                                   0,
                                   0,
                                   tzinfo=pytz.utc)
                ev_date_end = datetime(year,
                                       mth,
                                       int(day),
                                       int(launch_win[5:7]),
                                       int(launch_win[7:]),
                                       0,
                                       0,
                                       tzinfo=pytz.utc)
            else:
                ev_date = datetime(year,
                                   mth,
                                   int(day),
                                   int(launch_win[:2]),
                                   int(launch_win[2:4]),
                                   0,
                                   0,
                                   tzinfo=pytz.utc)
                ev_date_end = ev_date + timedelta(hours=1)
            event.add('dtstart', ev_date)
            event.add('dtend', ev_date_end)

            # Get event location
            loc_start_idx = the_page[launch_win_end_idx:block_end].find(
                LOCTAG) + len(LOCTAG) + launch_win_end_idx
            loc_end_idx = the_page[loc_start_idx:block_end].find(
                DIVENDTAG) + loc_start_idx
            location = the_page[loc_start_idx:loc_end_idx]
            event.add('location', location)

            # Get event description
            desc_start_idx = the_page[launch_win_end_idx:block_end].find(
                DESCTAG) + launch_win_end_idx + len(DESCTAG)
            desc_end_idx = the_page[desc_start_idx:block_end].find(
                UPDATETAG) + desc_start_idx
            desc = the_page[desc_start_idx:desc_end_idx].decode(
                "ascii", errors="ignore").encode()
            desc_filtered = h.unescape(desc)
            # If it didn't have a launch window, write a comment in description
            if launch_win == "0000-2359":
                desc_filtered = "Launch window currently unavailable. Please check at a later time. " + desc_filtered
            event.add('description', desc_filtered)

            # Add event to calendar
            cal.add_component(event)

    # Return calendar
    return cal.to_ical()
Пример #18
0
def wiki_event_extract_2003(month,year,dir):
#extractor for 2003.1~2006.4 
    events = []
    print month,year
    sp = BeautifulSoup(open(dir+month+'_'+year))
    spans = sp.find_all('span','mw-headline')
    span_dates = [s for s in spans if s.text.find(year)>=0]
    for span_date in span_dates:
        date = span_date.text
        day = ''
        for d in ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']:
            if date.find(d)>=0:
                day = d
                break
        type = ''
        lis = span_date.find_next('ul')('li')
        parent = None
        for li in lis:
            title,txt,anchs,urls='','',[],[]
            lis2 = li('li')
            if len(lis2)>0:
                parent = li
                try:
                    if li.contents[1].startswith(':'):
                        title = li.a.text
                except:
                    print 'Type err'
                for li2 in lis2:
                    txt = li2.text
                    txt = txt[:txt.rfind('.')]
                    anchs = '||'.join(link.text+'=>'+link['title'] for link in li2("a") if link['href'].startswith('/wiki/'))
                    urls = '||'.join(lk['href'] for lk in li2("a","external text"))
                    if len(urls)==0:
                        sups = li('sup','reference')
                        refs = [sp.find(id=lk.a['href'][1:]) for lk in sups]
                        if len(refs)>0:
                            hrefs = [ref('a','external free')[0]['href'] for ref in refs if len(ref('a','external free'))>0]
                            if len(hrefs)==0:
                                hrefs = [ref('a','external text')[0]['href'] for ref in refs if len(ref('a','external text'))>0]
                            urls = '||'.join(hrefs)
                    if len(urls)==0:
                        urls = '||'.join(lk['href'] for lk in li2('a','external autonumber'))
                    events.append((date,day,type,title,txt,anchs,urls))
            else:
                if li.find_parent().find_parent()==parent:
                    continue
                txt = li.text
                txt = txt[:txt.rfind('.')]
                anchs = '||'.join(link.text+'=>'+link['title'] for link in li("a") if link['href'].startswith('/wiki/'))
                urls = '||'.join(lk['href'] for lk in li("a","external text"))
                if len(urls)==0:
                    sups = li('sup','reference')
                    refs = [sp.find(id=lk.a['href'][1:]) for lk in sups]
                    if len(refs)>0:
                        hrefs = [ref('a','external free')[0]['href'] for ref in refs if len(ref('a','external free'))>0]
                        if len(hrefs)==0:
                            hrefs = [ref('a','external text')[0]['href'] for ref in refs if len(ref('a','external text'))>0]
                        urls = '||'.join(hrefs)
                if len(urls)==0:
                    urls = '||'.join(lk['href'] for lk in li('a','external autonumber'))
                events.append((date,day,type,title,txt,anchs,urls))
    return events
 def filter(filtered_listing, unfiltered_listing):
     date = str(unfiltered_listing[4])
     date = date[date.find('data-countdown') + 16:]
     date = date[:date.find('>') - 1]
     filtered_listing[4] = date
     return filtered_listing
    Boroughs = []

    for hood in Results:
        hoods = hood.find(class_='result-hood')
        if hoods is not None:
            boro = hoods.get_text()
            correct = re.search(r'\((.*?)\)', boro).group(1)
            Boroughs.append(correct)
        else:
            boro = None
            Boroughs.append(boro)

    PostDates = []

    for date in Results:
        dates = date.find(class_='result-date')['datetime']
        PostDates.append(dates)

    Description = []
    specs = []

    for link in Links:
        listing = requests.get(str(link))

        Listing_soup = BeautifulSoup(listing.content, features="lxml")

        Desc = Listing_soup.find('section', {'id': 'postingbody'})
        if Desc is not None:
            stuff = Desc.get_text()
            Description.append(stuff)
        else:
def format_goodwill_listing(formatted_item, original_item):
    date = str(original_item[4])
    date = date[date.find('data-countdown')+16:]
    date = date[:date.find('>')-1]
    formatted_item[4] = date
    return formatted_item