def loadHeaderInfo(self): """Initializes the object by parsing the data in the given file path.""" #This opens the file, and stores the file stream into the variabe fs with open(self.FilePath) as fs: #Reading in the Date and Time of the test datetime = getLinesWith(fs, "Testing started at") #If we were returned something, then we need to parse the date and time # # TODO : Use datetime objects # if datetime: datetime = datetime[0].split("Testing started at")[1] #Removing the day of the week from the remaining text datetime = datetime[4:-1].strip() #Determining the Month, Day, and Time from the first part of the text monthName = datetime[:3] month = str(monthAbbrToNum(monthName)) datetime = datetime.split(monthName)[1].strip() day = str(datetime[:2]) datetime = datetime[2:].strip() time = str(datetime[:8]) datetime = datetime[8:].strip() #The year cannot be assumed to be in the same place, in the same format, so we # will split on " 20", and the next two characters must be the suffix year = "20" + datetime.split(" 20")[1][:2] self.Date = month + "/" + day + "/" + year self.Time = time else: #I can't use the getLinesWith function as I do not know what I'm looking for exactly, # so I'll have to do some regular expression searching #Reading in a chunk of text allText = fs.read(100) #Splitting based on newline characters topChunk = allText.split("\n")[:5] from re import search datetime = "" for line in topChunk: #Searches for a line which contains any two characters at the start # of a line (hopefully numbers), then a forward slash "/", then two more # characters, then another "/", and then two more characters. Hopefully, the # only line that has this kind of string is the one that contains # the date and time if search("^../../..", line): datetime = line break #END FOR self.Date = datetime.split(" ")[0].strip() self.Time = datetime.split(" ")[1].strip()
def get_diners_per_hour(): hours_list = [] hours_to_count = 12 start_hour = 5 customter_count = 0 logs = diners_helper.get_access_logs_today() while start_hour <= hours_to_count: hour = { 'count': None, } for log in logs: datetime = str(log.access_to_room) date, time = datetime.split(" ") if (time.startswith("0" + str(start_hour))): customter_count += 1 hour['count'] = customter_count hours_list.append(hour) customter_count = 0 start_hour += 1 total_entries = 0 return json.dumps(hours_list)
def convert_to_ms(datetime): # get the millisecond from the date/time ms = datetime.split('.')[1].split('+')[0] parser_out = parser.parse(datetime) timeval = parser_out tuple = int(time.mktime(timeval.timetuple())) return (tuple * 1000 + int(ms) - (time.timezone * 1000))
def select_and_write_data(tree, maturity_level, year, xpath_selector, **kwargs): trs = tree.xpath(xpath_selector) if kwargs.get('reverse_order'): trs.reverse() # Prepare to write and loop on scrapped data for url with open(file_name(maturity_level, year), 'w') as csvfile: # Initialize csv writer csv_writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL) for tr in trs: # Get td from tr parent date = tr.getchildren()[0].text.strip() value = tr.getchildren()[1].text.replace('%', '').replace(',', '.').strip() splitted_date = date.split('-') if splitted_date[-1] != str( year ): # Current year page shows past year data as well, omit it continue if kwargs.get('dateformat') and kwargs.get( 'dateformat') == 'mm-dd-yyyy': splitted_date = [ splitted_date[2], splitted_date[0], splitted_date[1] ] else: splitted_date.reverse() iso_8601 = '-'.join(splitted_date) # If empty value (during 2013 change) #if '-' not in value: csv_writer.writerow([iso_8601, value, maturity_level, granularity])
def reformat_datetime(datetime): """ Reformat of datetime to humand readable. """ datetime = datetime.split('T') date = datetime[0] time = datetime[1].split('.')[0] return date + " " + time
def import_data(self, ids, context={}): obj = self.browse(ids)[0] if obj.import_type == 'auto': obj.import_auto() else: if not obj.file: raise Exception("File not found") if obj.file.split(".")[-1] != 'csv': raise Exception("Wrong File") fpath = get_file_path(obj.file) data = open(fpath, "r").read().split("\n") att_ids = [] records = {} for row in data: lines = row.split(",") if not lines: continue size = len(lines) if size < 2: continue if size > 2: raise Exception("Wrong File") att_id = lines[0] att_date = lines[1] if not records.get(att_id): records[att_id] = [] records[att_id].append(att_date) continue # TODO Check format date if att_id not in att_ids: att_ids.append(att_id) # self.set_att(ids,att_ids,context=context) emps = { emp['attendance_id']: emp['id'] for emp in get_model("hr.employee").search_read( [], ['attendance_id']) } att = get_model("hr.attendance") at_ids = att.search([]) # XXX testing att.delete(at_ids) for att_id, lines in records.items(): att_id = int(att_id) date_list = [] for line in lines: datetime = line date = datetime.split(" ")[0] action = 'sign_in' if date in date_list: action = 'sign_out' # FIXME find the last record and overwrite time date_list.append(date) att.create({ 'employee_id': emps[att_id], 'action': action, 'time': datetime, }) print("Done!")
def Date_Time(self, datetime): ''' Returns a list of all voyages on a specific day ''' newDate = datetime.split("T") Voyage_list = self.get_voyage_list() retList = [] for elem in Voyage_list: if newDate[0] in elem.flightOut.departure: retList.append(elem) return retList
def create_log_file(): path = "C:\\Temp\\" datetime = time.ctime() datetime = '-'.join(datetime.split()) datetime = datetime.replace(":", "") target_file = datetime[4:] file = open(path + target_file, 'a') file.write("Begin Log\n") file.close() return target_file
def convert_datetime(datetime: str, now: datetime) -> str: ''' Convert "{n} hour(s) ago" or "{n} minute(s) ago" to approximate datetime ''' extracted_datetime = datetime.split() if (extracted_datetime[1].find("giờ") == 0): return (now - timedelta(hours=int(extracted_datetime[0])) ).strftime("%H:%M %#d/%#m/%Y") elif (extracted_datetime[1].find("phút") == 0): return (now - timedelta(minutes=int(extracted_datetime[0])) ).strftime("%H:%M %#d/%#m/%Y") return datetime
def import_data(self, ids, context={}): obj = self.browse(ids)[0] if obj.import_type == 'auto': obj.import_auto() else: if not obj.file: raise Exception("File not found") if obj.file.split(".")[-1] != 'csv': raise Exception("Wrong File") fpath = get_file_path(obj.file) data = open(fpath, "r").read().split("\n") att_ids = [] records = {} for row in data: lines = row.split(",") if not lines: continue size = len(lines) if size < 2: continue if size > 2: raise Exception("Wrong File") att_id = lines[0] att_date = lines[1] if not records.get(att_id): records[att_id] = [] records[att_id].append(att_date) continue # TODO Check format date if att_id not in att_ids: att_ids.append(att_id) # self.set_att(ids,att_ids,context=context) emps = {emp['attendance_id']: emp['id'] for emp in get_model("hr.employee").search_read([], ['attendance_id'])} att = get_model("hr.attendance") at_ids = att.search([]) # XXX testing att.delete(at_ids) for att_id, lines in records.items(): att_id = int(att_id) date_list = [] for line in lines: datetime = line date = datetime.split(" ")[0] action = 'sign_in' if date in date_list: action = 'sign_out' # FIXME find the last record and overwrite time date_list.append(date) att.create({ 'employee_id': emps[att_id], 'action': action, 'time': datetime, }) print("Done!")
def covert_legoset(row): datetime = row['add_time'] dates = datetime.split() add_datetime = ' '.join(dates[:-1])+'.'+dates[-1] modi_datetime = add_datetime data = { 'number': row['number'], 'name': row['name'], 'add_datetime': add_datetime, 'modi_datetime': modi_datetime, } return data
def main(title, category, datetime): date, _ = ( datetime.split(' ')[0].split('-'), datetime.split(' ')[1].split(':'), ) filename = '{date}-{title}.markdown'.format( date='-'.join(date), title='-'.join(title.lower().split(' ')), ) if not os.path.isfile(filename): with open(filename, 'w') as f: f.write( template.format( title=title, category=category, datetime=datetime, ) ) else: print('File already exists') sys.exit(-1)
def start(): #focus on high impact get_data_today = today_data() high_impact_news = impact("High", get_data_today) for y in high_impact_news: if (y['forecast'] == ''): continue else: datetime = y["date"] datetime = datetime.split("T") time = datetime[1].split("-") time = time[0] print(time)
def today_data(): for x in range(len(data)): data_dict = data[x] datetime = data_dict["date"] datetime = datetime.split("T") date = datetime[0] time = datetime[1] dateStr = dateTimeObj.strftime("%Y-%m-%d") #if(str(date) != dateStr): data_today.append(data_dict) return data_today
def get_datetime(message): if len(message["date"].split(" ")) > 4: datetime = message["date"] if '(' in datetime: delLen = -len(datetime.split("(")[1]) - 2 datetime = datetime[:delLen] tempdate = str( re.search( r'([0-9]+) ([\w]+) ([0-9]+) ([0-9]+):([0-9]+):([0-9]+) ([\W]+[0-9]+)', str(datetime)).group()) tempdate = tempdate.split(' ') if len(tempdate[0]) < 4 and len(tempdate[2]) < 4: tempdate[2] = "20" + tempdate[2] datetime = ' '.join(tempdate) return datetime
def index(request): template = loader.get_template('polls/index.html') HOST = '172.16.5.2' # The remote host PORT = 3602 # The same port as used by the server s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((HOST, PORT)) s.send('MSTATUS\r'.encode()) status = s.recv(1024) s.send('RSDATETIME\r'.encode()) bdatetime = s.recv(1024) datetime = bdatetime.decode('UTF-8') list_datetime = datetime.split(',') s.send('RDMN\r'.encode()) model = s.recv(1024) s.send('RDSN\r'.encode()) serial = s.recv(1024) s.send('RMMESSAGES\r'.encode()) faultb = s.recv(1024) fault = faultb.decode('UTF-8') array_fault = fault.split(',') s.close() """resp = requests.get(url='http://172.16.5.1/temp_y_hum.ssi') data = data['fecha_y_hora'] data['temperatura'] data['humedad'] data['error'] """ context = { 'device_status': status.decode('UTF-8'), 'device_date': list_datetime[0], 'device_time': list_datetime[1], 'device_model': model.decode('UTF-8'), 'device_serial': serial.decode('UTF-8'), 'device_available_mem': array_fault[11], 'history_list': History.objects.order_by('-datetime')[:8], 'measuredata_list': MeasureData.objects.all(), 'operative_task': BackgroundTaskModel.objects.all() } return HttpResponse(template.render(context, request))
def process_post_info(self,post): username = post.find_element_by_xpath(".//div[@class=\"PostItem-username\"]").text datetime = post.find_element_by_xpath(".//div[@class=\"PostItem-date\"]").text datetime_split = datetime.split(' ') date = datetime_split[0] time = datetime_split[1] time_split = time.split(':') hour = time_split[0] minute = time_split[1] second = time_split[2] postinfo = { "poster":username, "post_date":date, "post_hour":hour, "post_minute":minute, "post_second":second } self.posts.append(postinfo)
def getFancyDate(self, date): month_name = { 'Jan.': 'enero', 'Feb.': 'febrero', 'Mar.': 'marzo', 'Apr.': 'abril', 'May': 'mayo', 'June': 'junio', 'July': 'julio', 'Aug.': 'agosto', 'Sep.': 'septiembre', 'Oct.': 'octubre', 'Nov.': 'noviembre', 'Dec.': 'diciembre' } datetime = date and date.pCommon() or '' if datetime: date_s = datetime.split(' ') return date_s[1].replace(',', '') + ' de ' + month_name[date_s[0]] + ', ' + date_s[3] + ' ' + date_s[4] + '.' or '' return datetime
if time.time() - start_time > 10: print("i", i) start_time = time.time() m = re.search( "\(([0-9]+), '([^']+)', ([0-9]+), '([^']+)', '([^']+)', datetime\.datetime\(([^\)]+)\), '([^']+)', ([0-9]+)\)", line) try: userid, name, id, eventName, eventType, datetime, JSONParams, contextualInfoId = m.groups( ) name = name.strip() year, month, day, *rest = [ int(item) for item in datetime.split(', ') ] datetime = "{} {} {}".format(year, month, day) except: import traceback print("cant parse:") print(line) traceback.print_exc() exit(1) try: params = eval(JSONParams) lesson = params["lesson"]
import os,sys import boto from boto.s3.key import Key from boto.s3.connection import S3Connection import tarfile from boto.exception import S3ResponseError from datetime import datetime, timedelta LOCAL_PATH = 'Local OS path' ####### Code for last N days ########### N = 2 datetime=datetime.now() - timedelta(days=N) datetime=str(datetime.date()) + "T" + str(datetime.time()) datetime=datetime.split(".") datetime=str(datetime[0]) ####################################### AWS_KEY = '' AWS_SECRET = '' aws_connection = S3Connection(AWS_KEY, AWS_SECRET) bucket = aws_connection.get_bucket('elbacesslog' , validate=False) for key in bucket.list(): l=key.last_modified l=l.split(".") l=str(l[0]) if ( l > datetime ):
def showTimeText(self): datetime = QDateTime.currentDateTime().toString() temptime = datetime.split(' ') #datetime:[星期,月份,日期,时间,年份] datetime = temptime[-1] + "年" + temptime[1] + temptime[2] + "日" + temptime[0] + temptime[-2] self.ui.label_time.setText(datetime)
def time_set(self, datetime): currentTime, currentDate = datetime.split('|') self.time.setText(currentTime) self.date.setText(currentDate)
def keep_date_remove_time(datetime): return datetime.split('T', 1)[0]
def extract_days(datetime) -> str: '''Given a string in the format: TuTh 11:00-12:20p, returns TuTh''' return datetime.split()[0].strip()
def date2time(datetime): t2 = datetime.split("T") t3 = t2[1].split("Z") t4 = t3[0] return t4
def toUTC(x): dt = (x).encode('utf-8') # get datetime format dTime = (dt.split(" "))[1] # gets the time not the date. dt = (dt.split(" "))[0] # get only the date related (not time) dt = datetime.strptime(dt, '%Y-%m-%d') # Convert it to DateTime format
def scrape(self): """ Scrapes letterboxd.com for review pages. Works very similarly to the basic scrape function. Takes in ids from imdb and checks to see if they exist on letterboxd. If not, it will hit an exception and move on. Movies with no reviews also hit an exception. This is much slower than imbd due to differing website design. """ id_list = self.get_ids() t = time.perf_counter() movie_id = [] rating = [] reviews = [] username = [] likes = [] date = [] review_id = [] iteration_counter = 0 broken = [] page_count = 0 query = """ SELECT movie_id, primary_title FROM movies """ curs, conn = self.connect_to_database() curs.execute(query) fetched = curs.fetchall() movie_ids = set([row[0] for row in fetched]) movie_titles = {row[0]: row[1] for row in fetched} for count, id in enumerate(id for id in id_list if id in movie_ids): print("----------------------------------------") try: t1 = time.perf_counter() review_count = 0 # self.locate(id) url_initial = f"https://www.letterboxd.com/imdb/{id}" time.sleep(randint(3, 6)) initial_response = requests.get(url_initial) title = "" try: soup = BeautifulSoup(initial_response.text, 'html.parser') title = soup.find( class_="headline-1 js-widont prettify").get_text() title = title.replace(" ", "-").lower() print("Found: ", title, movie_titles[id]) except Exception as e: print( f"Unable to find a title for this movie at index: {id}" ) print("This is normal and expected behavior") raise Exception(e) url_reviews = initial_response.url + 'reviews/by/activity/' print(url_reviews) # initially, I wanted to make this sorted by recency, but if # there are fewer than 12 reviews only sorting by popular is # available time.sleep(randint(3, 6)) response = requests.get(url_reviews) if response.status_code != 200: time.sleep(randint(3, 6)) response = requests.get(url_reviews) if response.status_code != 200: print(f"call to {url_reviews} failed with status \ code {response.status_code}!") continue soup = BeautifulSoup(response.text, 'html.parser') items = soup.find_all(class_='film-detail') if len(items) == 0: print(f"No reviews for {id} {title}") continue print(f"ID: {id} at index {self.all_ids.index(id)}") while True: if iteration_counter >= self.max_iter_count: df = self.letterboxd_dataframe(movie_id, review_id, rating, reviews, date, username) self.letterboxd_insert(df) movie_id.clear() rating.clear() reviews.clear() username.clear() likes.clear() date.clear() review_id.clear() df = df.iloc[0:0] iteration_counter = 0 iteration_counter += 1 for item in items: body = item.find( class_="body-text -prose collapsible-text") append = body['data-full-text-url'] if item.find(class_="reveal js-reveal") or item.find( class_="collapsed-text"): text_url = 'https://www.letterboxd.com' + append time.sleep(randint(3, 4)) fulltext = requests.get(text_url) if fulltext.status_code != 200: time.sleep(randint(3, 6)) fulltext = requests.get(text_url) if fulltext.status_code != 200: print(f"call to {text_url} failed with \ status code {fulltext.status_code}!") continue fulltext = re.sub(r'\<[^>]*\>', "", fulltext.text) reviews.append(fulltext) else: reviews.append(body.get_text()) review_count += 1 movie_id.append(id.replace("tt", "")) append = append.split(":", 1)[1].replace("/", "") review_id.append(append) try: rating1 = str(item.find(class_="attribution")) found = re.search(r'rating -green rated-\d+', rating1) found = found.group() text = found.split("-") rate = int(text[-1]) rating.append(rate) except Exception: rating.append(11) username.append(item.find(class_="name").get_text()) if item.find('span', '_nobr').get_text(): dates = item.find('span', '_nobr').get_text() date.append(dates) else: datetime = str( item.find('time', class_="localtime-dd-mmm-yyyy")) extract = datetime.split('"') dates = str(extract[3]) date.append(dates[:10]) if soup.find('a', class_="next"): page_count += 1 url_more = url_reviews + 'page/' + str(page_count + 1) + '/' print(url_more) time.sleep(randint(3, 6)) response = requests.get(url_more) if response.status_code != 200: time.sleep(randint(3, 6)) response = requests.get(url_more) if response.status_code != 200: print(f"call to {url_more} failed with status \ code {response.status_code}!") continue soup = BeautifulSoup(response.text, 'html.parser') items = soup.find_all(class_='film-detail') else: print('end of this movie') page_count = 0 break # While loop ends here t2 = time.perf_counter() finish = t2 - t1 # if count == 0 and os.path.exists(f"Logfile{self.scraper_instance}.txt"): # os.remove(f"Logfile{self.scraper_instance}.txt") # print("Logging") # self.create_log(title,review_count,None,finish) except Exception as e: broken.append(id) print("Broken!", id) err1, err2, tb = sys.exc_info() print(err1, err2) print(traceback.print_tb(tb)) continue try: df = self.letterboxd_dataframe(movie_id, review_id, rating, reviews, date, username) self.letterboxd_insert(df) except Exception as e: print("error while creating dataframe or inserting into database") raise Exception(e) t3 = time.perf_counter() total = t3 - t print(f"Scraped {count + 1} movies in {round(total,2)} seconds") print('All done!\n') print("The following IDs were not scraped succcessfully:") self.show(broken) if len(broken) == 0: print("none")
import pandas as pd from datetime import datetime import csv df = pd.read_csv("15to17data.csv") columns = df.columns.values.tolist() with open('0to8.csv', 'w') as outcsv0to8: writer0to8 = csv.writer(outcsv0to8) with open('9to17.csv', 'w') as outcsv0to8: writer9to17 = csv.writer(outcsv0to8) with open('18to24.csv', 'w') as outcsv0to8: writer18to24 = csv.writer(outcsv0to8) for index, row in df.iterrows(): datetime = row['Datetime'] split = datetime.split(" ", 2) hour = split[1] hour = hour.split(":", 2) hour_int = int(hour[0]) if (hour_int in range(0, 8)): writer0to8.writerow(row) elif (hour_int in range(9, 17)): writer9to17.writerow(row) else: writer18to24.writerow(row)
with codecs.open(get_file_name_from_dates('logs_player_filtered', dates), 'r', "utf-8-sig") as fin: for line in fin: i += 1 if time.time() - start_time > 10: print("i", i) start_time = time.time() m = re.search("\(([0-9]+), '([^']+)', ([0-9]+), '([^']+)', '([^']+)', datetime\.datetime\(([^\)]+)\), '([^']+)', ([0-9]+)\)", line) try: userid, name, id, eventName, eventType, datetime, JSONParams, contextualInfoId = m.groups(); name = name.strip() year, month, day, *rest = [int(item) for item in datetime.split(', ')] datetime = "{} {} {}".format(year, month, day) except: import traceback print("cant parse:") print(line) traceback.print_exc() break try: params = eval(JSONParams) lesson = params["lesson"] key = "{},{},{}".format(userid, lesson, datetime)