def get_host_metrics(connect, host):
    metrics = {}
    datetime = []
    cpu = []
    memory = []
    process = []

    cursor = connect.cursor()
    sql = "select timestamp,week_content from pm_metrics_points_week " \
          "where host_id='%s' and createtime > now() + '-30 day'" % (
        host['ip'])
    cursor.execute(sql)
    result = cursor.fetchall()

    for row in result:
        datetime.append(int(row[0]))

        content = JSON.loads(row[1])
        for record in content:
            kpiId = record['kpiId']

            try:
                if kpiId == '9001':
                    cpu.append(float(record['value']))
                elif kpiId == '9002':
                    memory.append(float(record['value']))
                elif kpiId == '9010':
                    process.append(int(record['value']))
            except Exception as e:
                pass

    metrics['datetime'] = datetime
    metrics['cpu'] = cpu
    metrics['memory'] = memory
    metrics['process'] = process

    cursor.close()

    avg_cpu = 0
    max_cpu = 0
    if len(cpu) > 0:
        avg_cpu = round(numpy.mean(cpu), 1)
        max_cpu = round(numpy.max(cpu), 1)

    avg_memory = 0
    max_memory = 0
    if len(memory) > 0:
        avg_memory = round(numpy.mean(memory), 1)
        max_memory = round(numpy.max(memory), 1)

    avg_process = 0
    if len(process) > 0:
        avg_process = int(numpy.mean(process))

    return metrics, avg_cpu, avg_memory, avg_process, max_cpu, max_memory
示例#2
0
def get_datetime():
    ask = ["Year", "Month", "Day", "Hour (24-Hour)", "Minute"]  # list containing strings to fill in the input statement
    datetime = []   # datetime list

    print("Enter the starting date and/or time for your countdown...")
    for word in ask:
        ans = input("%s: " % word)
        datetime.append(ans)

    # DEBUGGING
    debug(datetime)
    return
示例#3
0
def getBalanceHistory(coin_fee):
    txid, unix, datetime, balance = [], [], [], []
    for i in range(len(coin_fee.txid)):
        txid.append(coin_fee.txid[i])
    txid = np.sort(txid)
    for i in range(len(txid)):
        for j in range(len(coin_fee.txid)):
            if txid[i] == coin_fee.txid[j]:
                unix.append(coin_fee.unix[j])
                datetime.append(coin_fee.date[j] + coin_fee.time[j])
                balance.append(coin_fee.balance[j])
    balance_history = [unix, datetime, balance]
    return balance_history
示例#4
0
def get_event(url):

    name = []
    info = []
    datetime = []
    location = []
    contact = []
    phonenum = []
    category = []
    eventtype = []

    content = get_page(url)

    eventdetails = geteventname(content)
    name.append(eventdetails[0])
    content = eventdetails[1]

    eventdetails = geteventinfo(content)
    info.append(eventdetails[0])
    content = eventdetails[1]

    eventdetails = getdatetime(content)
    datetime.append(eventdetails[0])
    content = eventdetails[1]

    eventdetails = getlocation(content)
    location.append(eventdetails[0])
    content = eventdetails[1]

    eventdetails = getcontact(content)
    contact.append(eventdetails[0])
    content = eventdetails[1]

    eventdetails = getphonenum(content)
    phonenum.append(eventdetails[0])
    content = eventdetails[1]

    eventdetails = getcategory(content)
    category.append(eventdetails[0])
    content = eventdetails[1]

    eventdetails = geteventtype(content)
    eventtype.append(eventdetails[0])
    content = eventdetails[1]

    alldata = [
        name, info, datetime, location, contact, phonenum, category, eventtype
    ]

    writer = csv.writer(f, delimiter=',')
    writer.writerows([alldata])
示例#5
0
def tor_vr(start,end,code,para):
    sql = "select datetime,close,tor,vr,vol from t_daydata where datetime between '"+start+"' and '"+end+"' and code = '"+code+"'"
    title = code+" ["+start+","+end+" ] "+str(para)
    datetime = []
    close = []
    tor = []
    vr = []
    vol = []
    for row in hq._excutesql(sql):
        datetime.append(row[0])
        close.append(row[1])
        tor.append(row[2])
        vr.append(row[3])
        vol.append(row[4]/100000)
    # 创建子图
    fig, ax = plt.subplots()
    #fig.subplots_adjust(bottom=0.2)
    plt.figure(1,figsize=(150, 130))
    plt.subplot(212)
    # 设置X轴刻度为日期时间
    ax.xaxis_date()
    plt.title(title)
    plt.xticks()#pd.date_range(start,end))
    plt.yticks()
    #plt.xlabel("BLACK close,YELLOW tor,GREEN vr,BLUE vol")
    plt.ylabel("")
    #plt.plot(datetime,close,color = 'black')
    plt.plot(datetime, tor,color = 'yellow')
    plt.plot(datetime, vr,color = 'red')
    plt.xlabel("YELLOW tor,RED vr")
    #plt.plot(datetime, vol, color='blue')
    plt.grid()

    plt.subplot(221)
    plt.plot(datetime, vol, color='blue')
    plt.xlabel("BLUE vol")
    plt.grid()

    plt.subplot(222)
    plt.plot(datetime, close, color='black')
    plt.xlabel("BLACK close")
    plt.grid()

    fig.set_size_inches(15, 10)

    path = "C:/image/"+c.DATE.replace("-","")+"/"
    if not os.path.exists(path):
        os.mkdir(path)
    plt.savefig(path+title[:6]+".jpg")
    #plt.show()
    plt.close()
示例#6
0
def his_data(req):
    # 初始化
    response = []  #光照
    datetime = []  #日期
    # 通过objects这个模型管理器的all()获得所有数据行,相当于SQL中的SELECT * FROM
    list = HisData.objects.all()
    for var in list:
        response.append(float(var.Light_intensity))
        datetime.append(str(var.Time))
    return render_to_response("his_data.html", {
        'list': list,
        'lux': json.dumps(response),
        'dtime': json.dumps(datetime)
    })
示例#7
0
def get_event(url):

      name = []
      info = []
      datetime = []
      location = []
      contact = []
      phonenum = []
      category = []
      eventtype = []
      
      content = get_page(url)

      eventdetails = geteventname(content)
      name.append(eventdetails[0])
      content = eventdetails[1]

      eventdetails = geteventinfo(content)
      info.append(eventdetails[0])
      content = eventdetails[1]

      eventdetails = getdatetime(content)
      datetime.append(eventdetails[0])
      content = eventdetails[1]

      eventdetails = getlocation(content)
      location.append(eventdetails[0])
      content = eventdetails[1]

      eventdetails = getcontact(content)
      contact.append(eventdetails[0])
      content = eventdetails[1]

      eventdetails = getphonenum(content)
      phonenum.append(eventdetails[0])
      content = eventdetails[1]

      eventdetails = getcategory(content)
      category.append(eventdetails[0])
      content = eventdetails[1]

      eventdetails = geteventtype(content)
      eventtype.append(eventdetails[0])
      content = eventdetails[1]


      alldata = [name, info, datetime, location, contact, phonenum, category, eventtype]

      writer = csv.writer(f, delimiter = ',')
      writer.writerows([alldata])
示例#8
0
    def arrival_datetime_list(self, data):
        """ Retrieve list wich contains arrival datetimes """

        datetime = []
        try:
            for path, _, node in jxmlease.parse(
                    data,
                    generator=
                    "tir38:TravelItinerary/tir38:ItineraryInfo/tir38:ReservationItems/tir38:Item/tir38:FlightSegment"
            ):
                datetime1 = node.get_xml_attr('ArrivalDateTime')
                datetime.append(str(datetime1))
        except:
            datetime = ['N/A']
        return datetime
示例#9
0
def capture_ẗweet_data(tweets):
    # tweets is a list containing a number of BS4 tag objects
    user = []
    datetime = []
    tweet_text = []
    comments = []
    retweets = []
    likes = []
    tweet_url = []

    for index, tweet in enumerate(tweets):
        print("fetching tweet number", index)
        user.append(
            tweet.select_one(
                "div[class='css-1dbjc4n r-18u37iz r-dnmrzs']").text)
        if tweet.select_one("time") is None:
            datetime.append("no datetime found")
        else:
            datetime.append(tweet.select_one("time")['datetime'])
        if tweet.select_one(
                "div[class='css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0']"
        ) is None:
            tweet_text.append("no text found - could be vid of pic only")
        else:
            tweet_text.append(
                tweet.select_one(
                    "div[class='css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0']"
                ).text)
        social = tweet.select(
            "div[class='css-1dbjc4n r-1iusvr4 r-18u37iz r-16y2uox r-1h0z5md']")
        comments.append(social[0].text)
        retweets.append(social[1].text)
        likes.append(social[2].text)
        try:
            tweet_url.append(
                tweet.select_one(
                    "a[class='css-4rbku5 css-18t94o4 css-901oao r-1re7ezh r-1loqt21 r-1q142lx r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-3s2u2q r-qvutc0']"
                )['href'])
        except:
            tweet_url.append("no url found")

    kle = pd.DataFrame(list(
        zip(user, datetime, tweet_text, comments, retweets, likes, tweet_url)),
                       columns=[
                           "sender", "datetime", "text", "comments",
                           "retweets", "likes", "tweet_url"
                       ])
    return kle
示例#10
0
    def market_index_kdd(self):
        Data = DataFrame()

        url_dict = {'미국 USD':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW',
                    '일본 JPY':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_JPYKRW',
                    '유럽연합 EUR':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_EURKRW',
                    '중국 CNY':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_CNYKRW'}
        for key in url_dict.keys():
            
            date = []
            value = []

            for i in range(1,1000):
                url = re.get(url_dict[key] + '&page=%s'%i)
                url = url.content

                html = BeautifulSoup(url,'html.parser')

                tbody = html.find('tbody')
                tr = tbody.find_all('tr')
                
                
                '''마지막 페이지 까지 받기'''
                if len(tbody.text.strip()) > 3:
                    
                    for r in tr:
                        temp_date = r.find('td',{'class':'date'}).text.replace('.','-').strip()
                        temp_value = r.find('td',{'class':'num'}).text.strip()
                
                        date.append(temp_date)
                        value.append(temp_value)
                else:

                    temp = DataFrame(value, index = date, columns = [key])
                    
                    Data = pd.merge(Data, temp, how='outer', left_index=True, right_index=True)
                    
                    print(key + '자료 수집 완료')
                    time.sleep(10)
                    break
        print('=================== test 2 =================')
        # Data.to_csv('%s/market_index.csv'%(my_folder))
        Data.to_csv('exchange_index.csv', encoding='utf-8-sig')
        print('==================== 환율 ok ============================')
        print(Data)
        return Data
示例#11
0
def discretizeData(input_data, output_data):
    trans = preprocessing.LabelEncoder()
    output_data = trans.fit_transform(output_data)

    datetime, day, time = [], [], []
    for inp in input_data:
        datetime.append(inp[0])
        day.append(inp[1])
        time.append(inp[2])
    day = trans.fit_transform(day)
    time = trans.fit_transform(time)

    for i in range(len(input_data)):
        sensor_values = input_data[i][3:]
        input_data[i] = [datetime[i], day[i], time[i]]
        for value in sensor_values:
            input_data[i].append(value)
    return input_data, output_data
def pair_GPSGRACE(GPS_TS, GRACE_TS):
    # This resamples the GRACE data to match GPS that is within the range of GRACE, and forms a common time axis.
    gps_decyear = get_float_times(GPS_TS.dtarray)
    decyear = []
    dt = []
    north_gps = []
    east_gps = []
    vert_gps = []
    N_err = []
    E_err = []
    V_err = []
    u = []
    v = []
    w = []
    for i in range(
            len(GPS_TS.dtarray)
    ):  # this if-statement is happening because GPS is more current than GRACE
        if GPS_TS.dtarray[i] > min(
                GRACE_TS.dtarray) and GPS_TS.dtarray[i] < max(
                    GRACE_TS.dtarray):
            decyear.append(gps_decyear[i])
            dt.append(GPS_TS.dtarray[i])
            north_gps.append(GPS_TS.dN[i])
            east_gps.append(GPS_TS.dE[i])
            vert_gps.append(GPS_TS.dU[i])
            N_err.append(GPS_TS.Sn[i])
            E_err.append(GPS_TS.Se[i])
            V_err.append(GPS_TS.Su[i])
    grace_u = np.interp(decyear, GRACE_TS.decyear, GRACE_TS.u)
    grace_v = np.interp(decyear, GRACE_TS.decyear, GRACE_TS.v)
    grace_w = np.interp(decyear, GRACE_TS.decyear, GRACE_TS.w)
    my_paired_ts = Paired_TS(dtarray=dt,
                             north=north_gps,
                             east=east_gps,
                             vert=vert_gps,
                             N_err=N_err,
                             E_err=E_err,
                             V_err=V_err,
                             u=grace_u,
                             v=grace_v,
                             w=grace_w)
    return my_paired_ts
示例#13
0
def market_index_crawling():
    Data = DataFrame()
    
    url_dict = {'미국 USD':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW',
                '국제 금':'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2'}

    for key in url_dict.keys():
    
        date = []
        value = []

        for i in range(1,1000):
            url = re.get(url_dict[key] + '&page=%s'%i)
            url = url.content

            html = BeautifulSoup(url,'html.parser')

            tbody = html.find('tbody')
            tr = tbody.find_all('tr')
            
            
            '''마지막 페이지 까지 받기'''
            if len(tbody.text.strip()) > 3:
                
                for r in tr:
                    temp_date = r.find('td',{'class':'date'}).text.replace('.','-').strip()
                    temp_value = r.find('td',{'class':'num'}).text.strip()
            
                    date.append(temp_date)
                    value.append(temp_value)
            else:

                temp = DataFrame(value, index = date, columns = [key])
                
                Data = pd.merge(Data,temp, how='outer', left_index=True, right_index=True)        
                
                print(key + '자료 수집 완료')
                time.sleep(10)
                break

    Data.to_csv('%s/market_index.csv'%(folder_adress))
    return Data
示例#14
0
def useASOS(fname, RT):
    errorFile = open('%s_use_error_log.txt' % fname.split('.')[0], 'w')

    dataFile = open(fname, 'r')
    STA = fname.split('.')[0]

    dt = []
    wind = []
    bp = []
    tdh = []
    p = []
    for x in dataFile:
        dt.append(dateASOS(x))
        wind.append(windASOS(x, errorFile))
        bp.append(baroASOS(x, errorFile))
        tdh.append(tempASOS(x, errorFile))
        p.append(rainASOS(x))

    dataFile.close()
    errorFile.close()
    return dt, p, wind, bp, tdh
示例#15
0
    def other(self):
        sql = "select consume_time,sum(fee) fee,b.name from fee_record a,member b \
		  	   where a.member_id = b.id group by consume_time,name order by consume_time"
        dateList = db.query(sql).list()
        datetime = []
        categories = {}
        dict1 = {}
        dict2 = {}
        for d in dateList:
            datetime.append(d.consume_time)
        categories["categories"] = datetime
        data1 = [12, 20]
        data2 = [20, 30]
        dict1["name"] = "张三"
        dict1["data"] = data1
        dict2["name"] = "李死"
        dict2["data"] = data2

        data = [dict1, dict2]
        categories["series"] = data
        web.header("Content-Type", "application/json")
        return json.dumps(categories, default=dthandle)
示例#16
0
def search_next_empty_reserves_from_emptystate(cfg, cookies, headers, form_data):
    """
    利用日時と利用目的、地域を入力して空き予約を検索する
    """
    global http_req_num
    global page_unit
    # フォームデータから年月日と開始時間を取得する
    datetime = []
    datetime.append(str(form_data['layoutChildBody:childForm:year']))
    datetime.append(str(form_data['layoutChildBody:childForm:month']))
    datetime.append(str(form_data['layoutChildBody:childForm:day']))
    datetime.append(str(form_data['layoutChildBody:childForm:stime']))
    datetime.append(str(form_data['layoutChildBody:childForm:offset']))
    # フォームデータを変更する
    # doPagerの値をsubmitに変更する
    form_data['layoutChildBody:childForm:doPager'] = 'submit'
    # 不要なフォームデータを削除する
    ## 「予約カートに追加」を削除する
    for _index in range(page_unit - 1):
        index_string_doAddCart = f'layoutChildBody:childForm:rsvEmptyStateItems:{_index}:doAddCart'
        #print(f'delete formdata: {index_string_doAddCart}')
        del form_data[f'{index_string_doAddCart}']
    # 「予約カートの内容を確認」を削除する
    del form_data['layoutChildBody:childForm:jumpRsvCartList']
    #print(form_data)
    # フォームデータからPOSTリクエストに含めるフォームデータをURLエンコードする
    params = urllib.parse.urlencode(form_data)
    # フォームデータを使って、空き予約を検索する
    response = requests.post(cfg['empty_state_url'], headers=headers, cookies=cookies, data=params)
    http_req_num += 1
    # デバッグ用としてhtmlファイルとして保存する
    _datetime_string = str(datetime[0]) + str(datetime[1]).zfill(2) + str(datetime[2]).zfill(2) + str(datetime[3]).zfill(2) + str(datetime[4]).zfill(2)
    _file_name = f'result_{_datetime_string}.html'
    #print(_file_name)
    _file = reserve_tools.save_html_to_filename(response, _file_name)
    # レスポンスを返す
    return response
def LevenshteinDistance(string1, string2):
    n1=len(string1)
    n2=len(string2)
    if string1==string2:
        return 0
    if n1==0 or n2==0:
        return n1+n2
    #The d matriz will hold the Leventshtein distance
    d = []
    for i in range(n1+1):
        row = []
        for j in range(n2+1):
            if i==0 or j==0:
                row.append(i+j)                 #target prefixes
            else:
                if string1[i-1]==string2[j-1]:  #same letter, no operation
                    row.append(d[i-1][j-1])
                else:
                    minimo=min(d[i-1][j] + 1,   #a deletion
                               row[j-1]+1,      #an insertion
                               d[i-1][j-1] + 1) #a substitution
                    row.append(minimo)
        d.append(row)
    return d[n1][n2], d, n1, n2
示例#18
0
    'rmax': rmax_slice,
    'vmax': vmax_slice,
    'roci': roci_slice,
    'roci_miles': roci_slice_miles,
    'lat': lat_slice,
    'lon': lon_slice,
    'pc': pc_slice,
    'b': b_slice
})

hurricane_df_clean = hurricane_df[hurricane_df.rmax != 0]

datetime = []

for length in range(1, len(hurricane_df_clean), 4):
    datetime.append(int(hurricane_df_clean['datetime'].values[length].item()))

conn_string = "dbname='hamlethurricane' user=postgres port='5432' host='127.0.0.1' password='******'"

try:
    conn = psycopg2.connect(conn_string)
except Exception as e:
    print str(e)
    sys.exit()

impact_cur = conn.cursor()

for key in range(1, (len(datetime) - 1)):

    sql = """create or replace view vw_county_impact_{} as
	select a.ctfips, avg(b."Windspeed")
示例#19
0
import json 
import numpy as np 
import matplotlib.pyplot as plt 

conn = sqlite3.connect("sensehat.db")
cursor = conn.cursor()
sql = "select * from temp_humid;"
cursor.execute(sql)
result = cursor.fetchall()
temp=[]
humid=[]
datetime=[]
for tuple in result:
    temp.append(tuple[1])
    humid.append(tuple[2])
    date=tuple[3]
    dates=int(date[8:10])
    datetime.append(dates)
    
    print(temp)

fig = plt.figure()
plt.plot(temp)
plt.plot(humid)
plt.xlabel('x-Axis')
plt.ylabel('Temperature and Humidity ')
plt.title('Temperature & Humidity Reading')

plt.show()
fig.savefig('Temperature_Humidity.png')
conn.close()
示例#20
0
    ren_check = important_data[1][
        x] in renewables  #checks to see if the technology is a renewable
    if (ren_check == True):
        renewable_techs.append(important_data[1][x])
        sizesax3.append(data[0][x])

labelax2 = ["Renewables", "Non-Renewables"]  #percentage of renewable usage
sizesax2 = [float(data[2][0]), 100 - float(data[2][0])]

current_time = str(datetime.datetime.now())  #find the current date and time
datetime = []

for x in range(0, 4, 1):  #split probelm into 4 parts: day,month,year and time
    if (x == 0):
        datetime.append(current_time[8] + current_time[9] + "/")
    if (x == 1):
        datetime.append(current_time[5] + current_time[6] + "/")
    if (x == 2):
        datetime.append(current_time[0] + current_time[1] + current_time[2] +
                        current_time[3])
    if (x == 3):
        datetime.append(" " + current_time[11] + current_time[12] +
                        current_time[13] + current_time[14] + current_time[15])

datetime = ''.join(
    datetime)  #join all the elements of the array together without spaces ('')

fig = plt.figure()

#Renewable techs
示例#21
0
def warframe_crawling(item, path, path_0):

    get_item = item
    get_path = path
    get_path_0 = path_0

    site = 'https://api.warframe.market/v1/items/{get_item}/statistics'.format(get_item = get_item)
    res = requests.get(site)

    html = res.text
    soup = bs(html, 'html.parser')

    with open('/home/ec2-user/environment/warframets/data/json/warframe_data_v2.json', 'w') as file:
        data = str(soup)
        json_data = json.loads(data)
        json_data_1 = json.dumps(json_data, indent = 4)
        file.write(json_data_1)

    warframe_data = json_data_1

    json_data = json.loads(warframe_data)
    result_data = pd.DataFrame(json_data['payload']['statistics_closed']['90days'])

    datetime = []
    avg_price = []
    volume = []

    for i in result_data['datetime']:
        datecut = str(i)
        datetime.append(datecut[0:10])

    for i in result_data['moving_avg']:
        avg_price.append(str(i))

    for i in result_data['volume']:
        volume.append(str(i))

    all_data_list = pd.DataFrame({'datetime' : datetime, 'avg_price' : avg_price, 'volume' : volume})

    def make_file(item, path):
        get_item = item
        get_path = path
        if os.path.isfile(get_path):
            all_data_list.to_csv(get_path, mode = 'a', header = False)
            re_result = pd.read_csv(get_path, index_col = 0, error_bad_lines = False)
            all_result = re_result.drop_duplicates('datetime', keep = 'first')
            all_result.to_csv(get_path, mode = 'w')
            value = pandas_value.pandas_value(get_item, 'warframe')
            value.to_csv(get_path, mode = 'w')
            #print('데이터 업데이트를 완료했습니다.')
        else:
            all_data_list.to_csv(get_path, mode = 'w')
            value = pandas_value.pandas_value(get_item, 'warframe')
            value.to_csv(get_path, mode = 'w')
            #print('새로운 데이터를 저장했습니다.') 
    
    if os.path.isdir(get_path_0):
        make_file(get_item, get_path)
    else:
        #print('폴더가 없음으로 새로 만들었습니다.')
        os.makedirs(get_path_0)
        make_file(get_item, get_path)
    
    print(str(get_item) + ' 업데이트를 하였습니다.')
示例#22
0
def main():
    st.set_option('deprecation.showfileUploaderEncoding', False)
    st.title('Análise mensagens Whatsapp')
    st.write(
        'Obs: Nenhuma mensagem será salva ou utilizada, sua privacidade está totalmente segura.'
    )
    st.write(
        'Essa aplicação se trata de um código aberto que pode ser encontrado no Github: '
    )
    st.write('Meu LinkedIn: https://www.linkedin.com/in/andr%C3%A9-elias/')

    st.text(
        "__________________________________________________________________________________________"
    )
    nltk.download('stopwords')
    st.write('Como conseguir o arquivo da conversa:')
    st.image('export.jpeg', width=250)
    st.write(
        '''Dentro da conversa aperte os '...' e depois clique em Exportar Conversa (SEM MÍDIA)'''
    )

    arquivoConversa = st.file_uploader('FAÇA O UPLOAD AQUI')
    if arquivoConversa is not None:
        pat = re.compile(
            r'^(\d\d\/\d\d\/\d\d\d\d.*?)(?=^^\d\d\/\d\d\/\d\d\d\d|\Z)',
            re.S | re.M)
        with arquivoConversa as f:
            data = [
                m.group(1).strip().replace('\n', ' ')
                for m in pat.finditer(f.read())
            ]

        data.pop(0)
        sender = []
        message = []
        datetime = []
        for row in data:

            datetime.append(row.split(' - ')[0])

            try:
                s = re.search('- (.*?):', row).group(1)
                sender.append(s)
            except:
                sender.append('')

            try:
                message.append(row.split(': ', 1)[1])
            except:
                message.append('')

        df = pd.DataFrame(zip(datetime, sender, message),
                          columns=['datetime', 'sender', 'message'])
        df['datetime'] = pd.to_datetime(df.datetime, format='%d/%m/%Y %H:%M')
        df['date'] = df['datetime'].dt.date
        df['time'] = df['datetime'].dt.time
        df['weekDay'] = df['datetime'].dt.dayofweek
        df['timeHour'] = df['datetime'].dt.hour
        df['weekDay'] = df['weekDay'].replace({
            0: 'Segunda',
            1: 'Terça',
            2: 'Quarta',
            3: 'Quinta',
            4: 'Sexta',
            5: 'Sábado',
            6: 'Domingo'
        })
        df['message'] = df['message'].replace(
            {'<Arquivo de mídia oculto>': '-MÍDIA-'})
        names = df['sender'].unique()
        yourName = names[0]
        hisName = names[1]

        numMessage = df.groupby(['sender'])['message'].count().reset_index()

        st.text(
            "__________________________________________________________________________________________"
        )
        st.subheader('Distribuição de mensagens')

        plt.figure(figsize=(15, 4))
        ax = sns.barplot(x="message", y="sender", data=numMessage)
        ax.set(xlabel='Mensagens enviadas', ylabel='Remetente')
        sns.set(style="white", context="talk")
        st.pyplot()

        df['characters'] = df.message.apply(len)
        df['words'] = df.message.apply(lambda x: len(x.split()))

        textMean = df.groupby(['sender'
                               ])['characters',
                                  'words'].mean().round(2).reset_index()

        ax = sns.barplot(x="characters", y="sender", data=textMean)
        ax.set(xlabel='Média de caracteres por msg', ylabel='Remetente')
        st.pyplot()

        ax = sns.barplot(x="words", y="sender", data=textMean)
        ax.set(xlabel='Média de palavras por msg', ylabel='Remetente')
        st.pyplot()

        numMessageDay = df.groupby(['date'])['message'].count().reset_index()

        st.text(
            "__________________________________________________________________________________________"
        )
        st.subheader('Distribuição tempo')
        plt.figure(figsize=(15, 4))
        ax = sns.lineplot(data=numMessageDay,
                          x="date",
                          y="message",
                          linewidth=5)
        ax.set(xlabel='Data', ylabel='Mensagens por dia')
        plt.setp(ax.get_xticklabels(), rotation=45)
        st.pyplot()

        numMessageHour = df.groupby(['timeHour'
                                     ])['message'].count().reset_index()

        plt.figure(figsize=(7, 7))

        ax = sns.barplot(data=numMessageHour, x="timeHour", y="message")
        ax.set(xlabel='Hora do dia', ylabel='Mensagens')
        sns.set(style="white", context="talk")
        st.pyplot()

        numMessageWeek = df.groupby(['weekDay'
                                     ])['message'].count().reset_index()
        numMessageWeek['weekDay'] = pd.Categorical(numMessageWeek['weekDay'],
                                                   categories=[
                                                       'Segunda', 'Terça',
                                                       'Quarta', 'Quinta',
                                                       'Sexta', 'Sábado',
                                                       'Domingo'
                                                   ],
                                                   ordered=True)

        plt.figure(figsize=(15, 4))
        sns.set(style="white", context="talk")
        ax = sns.barplot(data=numMessageWeek, x="message", y="weekDay")
        ax.set(xlabel='Dia da semana', ylabel='Mensagens')
        st.pyplot()

        yourWords = []
        hisWords = []
        for x in range(len(df['sender'])):
            if df['sender'][x] == yourName:
                yourWords.append(df['message'][x])
            elif df['sender'][x] == hisName:
                hisWords.append(df['message'][x])

        st.text(
            "__________________________________________________________________________________________"
        )
        st.subheader('Mensagens em números')
        st.write('Total Mensagens: ', len(yourWords) + len(hisWords))
        st.write('Suas mensagens: ', len(yourWords))
        st.write('Mensagens do outro: ', len(hisWords))

        s = ' '
        totalYourWords = s.join(yourWords)
        totalHisWords = s.join(hisWords)

        pattern = re.compile('k*|-MÍDIA-|Kk*')
        totalYourWords = pattern.sub('', totalYourWords)
        totalHisWords = pattern.sub('', totalHisWords)

        stopWords = stopwords.words('portuguese')
        newStop = [
            'pra', 'tô', 'aí', 'tá', 'então', 'deu', 'aqui', 'né', 'vou',
            'bem', 'coisa', 'tmb', 'vai'
        ]
        for x in newStop:
            stopWords.append(x)

        mapaCores = ListedColormap(['red', 'magenta', 'blue', 'green'])

        mask = np.array(Image.open('mask-cloud.png'))

        st.text(
            "__________________________________________________________________________________________"
        )
        st.subheader('Nuvem de palavras')

        nuvem = WordCloud(width=1000,
                          height=600,
                          background_color='white',
                          colormap=mapaCores,
                          stopwords=stopWords,
                          max_words=60,
                          mask=mask)
        nuvem.generate(totalYourWords)
        plt.figure(figsize=(10, 10))
        plt.imshow(nuvem)
        st.pyplot()

        mapaCores = ListedColormap(['red', 'magenta', 'blue', 'green'])

        nuvem = WordCloud(width=1000,
                          height=600,
                          background_color='white',
                          colormap=mapaCores,
                          stopwords=stopWords,
                          max_words=60,
                          mask=mask)
        nuvem.generate(totalHisWords)
        plt.figure(figsize=(10, 10))
        plt.imshow(nuvem)
        st.pyplot()

        st.text(
            "__________________________________________________________________________________________"
        )
        st.subheader('Emojis')

        yourEmoji = list(''.join(c for c in totalYourWords
                                 if c in emoji.UNICODE_EMOJI))

        countYourEmoji = {i: yourEmoji.count(i) for i in yourEmoji}

        hisEmoji = list(''.join(c for c in totalHisWords
                                if c in emoji.UNICODE_EMOJI))

        countHisEmoji = {i: hisEmoji.count(i) for i in hisEmoji}

        dfYourEmoji = pd.DataFrame(countYourEmoji.items(),
                                   columns=['Emoji', 'Count'])

        dfYourEmoji = dfYourEmoji.sort_values(by=['Count'], ascending=False)

        st.table(dfYourEmoji)

        dfHisEmoji = pd.DataFrame(countHisEmoji.items(),
                                  columns=['Emoji', 'Count'])

        dfHisEmoji = dfHisEmoji.sort_values(by=['Count'], ascending=False)

        st.table(dfHisEmoji)
    def market_index_crawling(self):

        folder_adress = '.'

        Data = DataFrame()

        url_dict = {
            '미국 USD':
            'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW',
            '일본 JPY':
            'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_JPYKRW',
            '유럽연합 EUR':
            'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_EURKRW',
            '중국 CNY':
            'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_CNYKRW',
            'WTI':
            'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2',
            '국제 금':
            'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2'
        }

        for key in url_dict.keys():

            date = []
            value = []

            for i in range(1, 1000):
                url = re.get(url_dict[key] + '&page=%s' % i)
                url = url.content

                html = BeautifulSoup(url, 'html.parser')

                tbody = html.find('tbody')
                tr = tbody.find_all('tr')
                '''마지막 페이지 까지 받기'''
                if len(tbody.text.strip()) > 3:

                    for r in tr:
                        temp_date = r.find('td', {
                            'class': 'date'
                        }).text.replace('.', '-').strip()
                        temp_value = r.find('td', {
                            'class': 'num'
                        }).text.strip()

                        date.append(temp_date)
                        value.append(temp_value)
                else:

                    temp = DataFrame(value, index=date, columns=[key])

                    Data = pd.merge(Data,
                                    temp,
                                    how='outer',
                                    left_index=True,
                                    right_index=True)

                    print(key + '자료 수집 완료')
                    time.sleep(10)
                    break

        Data.columns = [
            '미국_USD', '일본_JPY', '유럽연합_EUR', '중국_CNY', 'WTI', '국제_금'
        ]

        # 데이터프레임의 인덱스를 일자컬럼으로 레벨을 변환시켜준다.
        #Data.columns = Data.columns.droplevel()
        Data = Data.reset_index()
        Data.rename(columns={'index': '일자'}, inplace=True)

        Data.to_csv('%s/market_world_index.csv' % folder_adress)
        #
        # DB저장
        from sqlalchemy import create_engine

        # echo=True를 선언할 경우 실제 테이블 생성 쿼리문을 보여준다
        engine = create_engine('sqlite:///itm_master.db', echo=True)

        #1. SQLite DB에 연결
        #SQLite DB에 저장하기 위해 DB와 연결을 한다
        con = sqlite3.connect("./itm_master.db")
        cursor = con.cursor()

        # DB CREAETE
        cursor.execute("drop table market_world_index ")
        cursor.execute(
            "create table market_world_index (일자,미국_USD,일본_JPY,유럽연합_EUR,중국_CNY,WTI,국제_금)"
        )
        cursor.execute("delete from  market_world_index ")
        # 지우고 다시 시작하자
        con.commit()

        # 2. to_sql함수를 이용해서 DB에 저장

        # sql 문장들
        for ix, r in Data.iterrows():
            # print (r)
            values = u"('%s','%s','%s','%s','%s','%s','%s')" % (
                r['일자'], r['미국_USD'], r['일본_JPY'], r['유럽연합_EUR'], r['중국_CNY'],
                r['WTI'], r['국제_금'])

            insert_sql = u"insert into market_world_index( 일자,미국_USD,일본_JPY,유럽연합_EUR,중국_CNY,WTI,국제_금 ) values %s ;" % (
                u"".join(values))
            print(insert_sql)
            con.execute(insert_sql)
            con.commit()

        return Data
示例#24
0
文件: LSTM2509.py 项目: HuLyu/codeSet
lt = []
for i in range(len(dl)):
    t2 = []
    l2 = []
    step = 7
    t = [
        dl[i][j:j + step] for j in range(0, len(dl[i]))
        if len(dl[i][j:j + step]) == step
    ]
    l = [list(map(itemgetter(0), i)) for i in t]
    for h in range(len(t) - step):
        dataall.append([m for m in t[h]])
        label.append(max(l[h + step]))
        t2.append([m for m in t[h]])
        l2.append(max(l[h + step]))
    dt.append(t2)
    lt.append(l2)

# In[72]:

dl2 = [i.values.tolist() for i in datanew2]
dataall2 = []
label2 = []
dt2 = []
lt2 = []
for i in range(len(dl2)):
    t2 = []
    l2 = []
    step = 7
    t = [
        dl2[i][j:j + step] for j in range(0, len(dl2[i]))
示例#25
0
def identify_nearest_puv():
    # for testing
    lat = 14.64887259
    lon = 121.06900107
    destination = (lon, lat)

    destination_segment = locate_segment(coordinates = destination)
    print('Destination Segment: ', destination_segment)

    # read route data
    with open("ikot_route_test.json", "r") as read_file:
        route = json.load(read_file)

    # store information in the following lists
    PUV_id = []
    segment_id = []
    datetime = []
    location = []
    segs_before_dest = []

    # query all segments
    for segment in route:
        # use midpoint as reference
        midpoint_latitude = segment["midpoint"][0]
        midpoint_longitude = segment["midpoint"][1]
        seg_length = segment["length"]
        # fetch data of cars near the segment
        NIMPA_URL = 'https://nimpala.me'
        NIMPA_CREDENTIALS = ('root', 'root')
        # Sean - not using &time=[seconds_elapsed] URL parameter.
        rest_operation = '/latest_area?lat={}&lon={}&time={}&radius={}'.format(str(midpoint_latitude), str(midpoint_longitude), 15, seg_length)
        complete_URL = NIMPA_URL + rest_operation
        # print('--- SENDING REQUEST TO:', complete_URL,' ----')
        response = requests.get(complete_URL, auth=NIMPA_CREDENTIALS)
        # print(response)

        # parse the fetched data
        data = 0
        try:
            data = response.json()
        except Exception as e:
            pass
        data = response.text
        print(data)
        parsed = json.loads(data)

        # iterate through lists of PUV data
        if len(parsed) > 0:
            for PUV_data in parsed:
                PUV_id.append(PUV_data['vehicle_id'])
                segment_id.append(segment['segment_id'])
                datetime.append(PUV_data['datetime']['$date'])
                location.append(PUV_data['geojson']['coordinates'])

                # compute distance wrt destination segment
                segment_distance = destination_segment - segment['segment_id']
                if segment_distance < 0:
                    segment_distance = len(route) + segment_distance

                segs_before_dest.append(segment_distance)


    # convert PUV information dict to pandas DataFrame
    PUV_info_dict = {'PUV_id':PUV_id, 'segment_id':segment_id, 'datetime':datetime, 'location':location, 'segs_before_dest':segs_before_dest}
    PUV_df = pd.DataFrame.from_dict(PUV_info_dict)

    PUVs_to_compute_eta = []
    if len(PUV_df) > 0:
        # group by PUV_id, sort by datetime
        df_by_PUV_id = PUV_df.groupby('PUV_id', as_index=False) \
               .apply(lambda x: x.nlargest(1, columns=['datetime'])) \
               .reset_index(level=1, drop=1)
               # .reset_index()
        # df_by_PUV_id = PUV_df.groupby('PUV_id').apply(pd.DataFrame.sort_values, 'datetime')
        # df_by_PUV_id = PUV_df.groupby('PUV_id').apply(lambda x: x.sort_values(['datetime'])).reset_index(drop=True)

        # print(df_by_PUV_id.loc[0])
        # for name in df_by_PUV_id.index:
        #     print(name)
        #     print('segment: ', df_by_PUV_id['segment_id'].loc[name])
        #     print('location: ', df_by_PUV_id['location'].loc[name])
        #     print('no. of segments before dest: ', df_by_PUV_id['segs_before_dest'].loc[name])
        # print(df_by_PUV_id.head())

        # create new dataframe from aggregated groups
        df_by_PUV_id.index.name = None
        df_by_PUV_id.columns = ['PUV_id', 'segment_id', 'datetime', 'location', 'segs_before_dest']

        top_puvs = df_by_PUV_id.sort_values('segs_before_dest').reset_index(drop=1).head(3)
        PUVs_to_compute_eta = top_puvs['PUV_id'].tolist()
        locs_to_vizualize = top_puvs['location'].tolist()
        print(top_puvs['location'].tolist())

    return PUVs_to_compute_eta
示例#26
0
# hourly domain basemaps, this takes lots of time if doing hourly. Switch to daily could be prudent over a long timespan
############################################
#save maps into the pdf file (two maps in single page)
for i, sp in enumerate(var_list):
    t_days = int(len(airpact[sp])/24)
    temp = np.empty( ( t_days, 90, 90), '|U18')
    df_daily = {}
    for t in range(0,t_days):
        
        # Do daily average and MD8HA
        if sp == 'PMIJ':
            days=t
            t = t*24
            temp[days] = (airpact[sp][t,:,:]+airpact[sp][t+1,:,:]+airpact[sp][t+2,:,:]+airpact[sp][t+3,:,:]+airpact[sp][t+4,:,:]+airpact[sp][t+5,:,:]+airpact[sp][t+6,:,:]+airpact[sp][t+7,:,:]+airpact[sp][t+8,:,:]+airpact[sp][t+9,:,:]+airpact[sp][t+10,:,:]+airpact[sp][t+11,:,:]+airpact[sp][t+12,:,:]+airpact[sp][t+13,:,:]+airpact[sp][t+14,:,:]+airpact[sp][t+15,:,:]+airpact[sp][t+16,:,:]+airpact[sp][t+17,:,:]+airpact[sp][t+18,:,:]+airpact[sp][t+19,:,:]+airpact[sp][t+20,:,:]+airpact[sp][t+21,:,:]+airpact[sp][t+22,:,:]+airpact[sp][t+23,:,:])/24  
            df_daily[sp] = temp
            datetime.append(airpact['DateTime'][t,0,0])
            
        else:
            days=t
            t = t*24
            temp[days] = (airpact[sp][t,:,:]+airpact[sp][t+1,:,:]+airpact[sp][t+2,:,:]+airpact[sp][t+3,:,:]+airpact[sp][t+4,:,:]+airpact[sp][t+5,:,:]+airpact[sp][t+6,:,:]+airpact[sp][t+7,:,:]+airpact[sp][t+8,:,:]+airpact[sp][t+9,:,:]+airpact[sp][t+10,:,:]+airpact[sp][t+11,:,:]+airpact[sp][t+12,:,:]+airpact[sp][t+13,:,:]+airpact[sp][t+14,:,:]+airpact[sp][t+15,:,:]+airpact[sp][t+16,:,:]+airpact[sp][t+17,:,:]+airpact[sp][t+18,:,:]+airpact[sp][t+19,:,:]+airpact[sp][t+20,:,:]+airpact[sp][t+21,:,:]+airpact[sp][t+22,:,:]+airpact[sp][t+23,:,:])/24  
            df_daily[sp] = temp
            datetime.append(airpact['DateTime'][t,0,0])
            
    df_daily[sp] = df_daily[sp].astype(np.float)
    for t in range(0, len(df_daily[sp])): 
        plt.style.use("dark_background")
           
        outpng = base_dir +'maps/daily_basemap/airpact_daily_basemap_tiled_' + sp + '_%05d.png' % t
        print(outpng)
        pm_max = 35
示例#27
0
datetime = []
description = []
for source in soup.find_all(
        'ytd-video-renderer',
        class_='style-scope ytd-expanded-shelf-contents-renderer',
        limit=10):
    title.append(source.find_all('a', {'id': 'video-title'})[0].string)
    channel.append(
        source.find_all(
            'a', class_='yt-simple-endpoint style-scope yt-formatted-string')
        [0].string)
    view.append(
        source.find_all('span',
                        class_='style-scope ytd-video-meta-block')[0].string)
    datetime.append(
        source.find_all('span',
                        class_='style-scope ytd-video-meta-block')[1].string)
    description.append(
        source.find_all('yt-formatted-string',
                        {'id': 'description-text'})[0].string)
driver.close()

youtube_output = []
for i in range(10):
    youtube_output.append(
        str(i + 1) + ". ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ \n" + str(title[i].strip()) +
        " \n" + str(channel[i]) + " | " + str(view[i]) + " | " +
        str(datetime[i]) + " \n\n" + str(description[i]) + " \n\n")

token = 'your_token'
slack = Slacker(token)
示例#28
0
def _excread(path, encoding="utf-8"):
    """Dont call this directly, use excread() instead."""
    logger = logging.getLogger('glodap.util.excread')
    skipfooter = 0
    first = True
    signature = ''
    file_type = ''
    column_headers = []
    column_units = []
    line = None
    headerlines = 0
    comments = ""
    sampl_depth_columns = [
        'CTDDEPTH',
        'CTDDEP',
	'CTDPRS',
    ]

    # Loop over the header to collect metadata and remove file type info
    with open(path, encoding=encoding) as excfile:
        while True:
            headerlines += 1
            line = excfile.readline().strip()
            # Get the file type and signature
            if (
                    first
                    and (
                        line.startswith('CTD')
                        or line.startswith('BOTTLE')
                    )
            ):
                first = False
                matches = re.search('((BOTTLE)|(CTD))[, ](.*)$', line)
                signature = matches.group(4)
                file_type = matches.group(1)
                continue
            # ignore empty lines
            elif not line.strip():
                continue
            # Keep comments as metadata
            elif line.startswith('#'):
                comments += line + "\n"
                continue
            else:
                # Register header lines
                if line.startswith('EXPOCODE'):
                    column_headers = [s.strip() for s in line.split(',')]
                elif line.startswith(',,,'):
                    column_units =  [s.strip() for s in line.split(',')]
                else:
                    break

    with FileReadBackwards(path, encoding=encoding) as fin:
        for line in fin:
            skipfooter += 1
            if line.strip() == 'END_DATA':
                break

    data_types = {
        'EXPOCODE': str,
        'SECT_ID': str,
        'DATE': str,
        'TIME': str,
    }
    dataframe = pd.read_csv(
        path,
        names=column_headers,
        dtype=data_types,
        skiprows=headerlines,
        skipfooter=skipfooter,
        engine='python',
        encoding=encoding,
    )

    # Strip leading and trailing whitespaces from string columns
    df_obj = dataframe.select_dtypes(['object'])
    dataframe[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())

    # If 'TIME' not present but 'HOUR' and 'MINUTE' is, then make time :)
    if (not 'TIME' in dataframe.columns
            and 'HOUR' in dataframe.columns
            and 'MINUTE' in dataframe.columns):
        dataframe['TIME'] = [
            f'{d.HOUR:02}{d.MINUTE:02}' for i, d in dataframe.iterrows()
        ]

    # Add a datetime column
    if 'DATE' in dataframe.columns and 'TIME' in dataframe.columns:
        datetime = []
        for ix, d in enumerate(dataframe['DATE']):
            try:
                t = dataframe['TIME'][ix]
                date='{}-{}-{}'.format(d[:4], d[4:6], d[6:])
                time = '{}:{}'.format(t[:2], t[2:])
                datetime.append(pd.to_datetime('{} {}'.format(date, time), utc=True))
            except Exception as e:
                logger.error(
                    'Time format error (date: {}) (time: {}) on line {}'
                            .format(
                            d,
                            t,
                            ix + headerlines
                    )
                )
                raise e
        dataframe['EXC_DATETIME'] = datetime

    # Try multiple sampling depth columns
    for name in sampl_depth_columns:
        if name in dataframe.columns:
            dataframe['EXC_CTDDEPTH'] = dataframe[name]
            break

    # Replace -9999, -999, -99, -9 with np.nan
    dataframe = dataframe.replace([-9999, -999, -99, -9], np.nan)

    # Add some extra metadata to the dataframe
    dataframe.whp_exchange.column_units = column_units
    dataframe.whp_exchange.signature = signature
    dataframe.whp_exchange.file_type = file_type
    dataframe.whp_exchange.comments = comments

    return dataframe
## python file i/o
tlf = open("train_data/trainData_List.txt", "r")
a = tlf.read().split('\n')
a.pop(len(a) - 1)
a.sort(reverse=True)
for idx, val in enumerate(a):
    print(str(idx) + " : " + val)

tf = open("train_data/" + "trainData-2020-1213-120627-916633" + ".txt", "r")
dt = []
dataArr = tf.read().split("\n\n")
dataArr.pop(len(dataArr) - 1)
for data in dataArr:
    data = data.split("\n")
    dt.append(data)

## Numpy Mat Ops
A = np.array([[1, 1], [2, 3]])
B = np.array([[1, 0], [0, 1]])
print(A.T)

x = [1, 2.2, 3]
y = np.array([[3, 6, 4]]).T
pi = np.array([np.ones(len(x)).T, np.array(x).T]).T
print(y.shape)
print(pi)
print(pi.shape)
print(inv(pi.T @ pi) @ pi.T @ y)

示例#30
0
def _excread(excfile):
    """Dont call this directly, use excread() instead."""

    logger = logging.getLogger('glodap.util.excread')
    rewindto = 0
    first = True
    signature = ''
    file_type = ''
    column_headers = []
    column_units = []
    line = None
    headerlines = 0
    comments = ""

    # Loop over the header to collect metadata and remove file type info
    while True:
        headerlines += 1
        rewindto = excfile.tell()
        line = excfile.readline().strip()
        # Get the file type and signature
        if (
                first
                and (
                    line.startswith('CTD')
                    or line.startswith('BOTTLE')
                )
        ):
            first = False
            matches = re.search('((BOTTLE)|(CTD)),(.*)$', line)
            signature = matches.group(4)
            file_type = matches.group(1)
            continue
        # ignore empty lines
        elif not line:
            continue
        # Keep comments as metadata
        elif line.startswith('#'):
            comments += line + "\n"
            continue
        else:
            # Register header lines
            if line.startswith('EXPOCODE'):
                column_headers = line.split(',')
            elif line.startswith(',,,'):
                column_units = line.split(',')
            else:
                break

    excfile.seek(rewindto)
    data_types = {
        'EXPOCODE': str,
        'SECT_ID': str,
        'DATE': str,
        'TIME': str,
    }
    dataframe = pd.read_csv(
        excfile,
        names=column_headers,
        dtype=data_types,
    )

    # Strip leading and trailing whitespaces from string columns
    df_obj = dataframe.select_dtypes(['object'])
    dataframe[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())

    # drop lines after and including END_DATA
    drop_lines = []
    for val in reversed(range(len(dataframe))):
        drop_lines.append(val)
        if 'END_DATA' in dataframe.iloc[val, 0]:
            for line in drop_lines:
                dataframe = dataframe.drop(line, axis=0)
            break

    # Add a datetime column. If time is not present, time is set to 00:00
    if 'DATE' in dataframe.columns and 'TIME' in dataframe.columns:
        datetime = []
        for ix, d in enumerate(dataframe['DATE']):
            try:
                t = dataframe['TIME'][ix]
                date='{}-{}-{}'.format(d[:4], d[4:6], d[6:])
                time = '{}:{}'.format(t[:2], t[2:])
                datetime.append('{} {}'.format(date, time))
                pd.to_datetime(datetime)
            except Exception as e:
                logger.error(
                    'Timer format error (date: {}) (time: {}) on line {}'
                            .format(
                            d,
                            t,
                            ix + headerlines
                    )
                )
                raise e
        dataframe['EXC_DATETIME'] = datetime

    # Replace -9999, -999, -99, -9 with np.nan
    dataframe = dataframe.replace([-9999, -999, -99, -9], np.nan)

    # Add some extra metadata to the dataframe
    dataframe.whp_exchange.column_units = column_units
    dataframe.whp_exchange.signature = signature
    dataframe.whp_exchange.file_type = file_type
    dataframe.whp_exchange.comments = comments

    return dataframe