Exemplo n.º 1
0
def lb():
    rr_dict ={}
    rr_set = set()
    kl = util.get_key_list()
    for key in kl:
        print(key)
        wl = util.get_list_from_file('D:\文档\项目\数据采集文档\词频分布//r//' + key +'r.txt')
        i = 0
        r_set = set()
        while i < 8000 and i < len(wl):
            wwl = wl[i].split('\t')
            num = rr_dict.get(wwl[0])
            r_set.add(wwl[0])
            if num is None:
                rr_dict[wwl[0]] = wwl[1]
            else:
                rr_dict[wwl[0]] = int(num) + int(wwl[1])
            i += 1
        if len(rr_set) == 0:
            rr_set = rr_set | r_set
        else:
            rr_set = rr_set & r_set

    kl_list = []
    srl = []
    for ww in rr_set:
        num = rr_dict.get(ww)
        kl_list.append({'word': ww, 'num': num})
    kl_list.sort(key=lambda obj: obj.get('num'), reverse=True)
    for rrl in kl_list:
        srl.append(rrl['word'] + '\t' + str(rrl['num']))
    util.save_file('D:\文档\项目\数据采集文档\词频分布//' + 'result' + 'r.txt', srl)
Exemplo n.º 2
0
def move_files(base_dir, instance, domain, target_dir, use_vanilla):
    """ Moves the domain and instance description files plus additional data files to the translation directory """
    definition_dir = target_dir + '/definition'
    data_dir = target_dir + '/data'

    # Copy the domain and instance file to the subfolder "definition" on the destination dir
    util.mkdirp(definition_dir)
    shutil.copy(instance, definition_dir)
    shutil.copy(domain, definition_dir)

    is_external_defined = os.path.isfile(base_dir + '/external.hxx')

    if is_external_defined and use_vanilla:
        raise RuntimeError("An external definitions file was found at '{}', but the runner script determined"
                           "that no external files were needed. Something is wrong.")

    if not use_vanilla:
        # The ad-hoc external definitions file - if it does not exist, we use the default.
        if is_external_defined:
            shutil.copy(base_dir + '/external.hxx', target_dir)
            if os.path.isfile(base_dir + '/external.cxx'):  # We also copy a possible cxx implementation file
                shutil.copy(base_dir + '/external.cxx', target_dir)

        else:
            default = tplManager.get('external_default.hxx').substitute()  # No substitutions for the default template
            util.save_file(target_dir + '/external.hxx', default)

    # Copy, if they exist, all data files
    origin_data_dir = base_dir + '/data'
    if os.path.isdir(origin_data_dir):
        for filename in glob.glob(os.path.join(origin_data_dir, '*')):
            if os.path.isfile(filename):
                shutil.copy(filename, data_dir)
Exemplo n.º 3
0
def get_oracle(url):
  source = util.get_source(url)
  html = lxml.html.document_fromstring(source)
  html.make_links_absolute(url, resolve_base_href=True)
  util.save_file(lxml.html.tostring(html), 'oracle.html')
  util.screenshot('oracle.html', 'oracle.png')
  return html
Exemplo n.º 4
0
def evaluate(MLResult, labelpath, resultArray, timeIndex, threshold):
    """
    evalute the results
    return F score of prediction and truth.
    """
    groundTruthArray = []
    TPArray = []
    num = 0
    for fileName in read_file(MLResult.system_path("mid_summary"),
                              "TestFileNameList.txt"):
        with open(labelpath + fileName, "r") as f:
            fline = f.readlines()
            for line in fline:
                count = line.strip("\n")
                if int(count) == 1 and num in timeIndex:
                    groundTruthArray.append(num)
                num += 1

    TP = 0
    for i in resultArray:
        if i in groundTruthArray:
            TP += 1
            TPArray.append(i)

    FP = len(resultArray) - TP
    FN = len(groundTruthArray) - TP
    Precision = TP / (float(TP + FP)) if TP + FP != 0 else 1
    Recall = TP / (float(TP + FN)) if TP + FN != 0 else 1
    F = 0 if Recall + Precision == 0 else (2 * Recall *
                                           Precision) / (Recall + Precision)
    save_file(MLResult.system_path("result_summary"), "PRF.txt",
              [threshold, F, Precision, Recall, TP, FP, FN, "\n"], ",", "a")
    return F
Exemplo n.º 5
0
def download_price(theater_id,movie_id):
    # http://bj.nuomi.com/pcindex/main/timetable?cinemaid=1c2e250a3e9691059ee32187&mid=9762&needMovieInfo=0&tploption=5&_=1448004690864#j-movie-list1
    url = "http://bj.nuomi.com/pcindex/main/timetable?cinemaid=%s&mid=%s&needMovieInfo=0"%(theater_id,movie_id)
    lfile = '%snuomi/price_%s_%s.html' % (download_dir,theater_id,movie_id) 
    respHtml = browser.downad_and_save(url,lfile)  
    soup = BeautifulSoup(respHtml, fromEncoding=htmlCharset)

    li = []
    dom_divs = soup.findAll('div',attrs = {'class': 'list'})
    for day,div in enumerate(dom_divs):        
        trs = div.findAll('tr')        
        rows = []
        for tr in trs: 
            tds = tr.findAll('td') 
            
            if not tds: continue

            p = tds[3].find('span')
            pp = p.contents[0].split(';')[-1]
            order_url = completeInnerUrl("http://bj.nuomi.com/", tds[4].find('a')['href']) 

            li.append(','.join([str(day),theater_id,movie_id,tds[0].contents[0].strip(),tds[1].contents[0],pp.strip(),order_url]))
          

        csv_file = '%snuomi/price_%s_%s.csv' % (download_dir,theater_id,movie_id) 
        save_file(csv_file,li) 
Exemplo n.º 6
0
def seed_html_fault(html, elements, prop, value):
    while elements:
        e = random.choice(elements)
        elements.remove(e)
        original_style = None
        if prop in e.attrib:
            original_value = e.attrib[prop]
            e.attrib[prop] = value
            util.save_file(lxml.html.tostring(html, doctype=html.doctype),
                           'test.html')
            util.screenshot('test.html', 'test.png')
            e.attrib[prop] = original_value
        else:
            e.attrib[prop] = value
            util.save_file(lxml.html.tostring(html, doctype=html.doctype),
                           'test.html')
            util.screenshot('test.html', 'test.png')
            del e.attrib[prop]
        if not filecmp.cmp('oracle.png', 'test.png'):
            xpath = get_xpath(e)
            default_value = util.get_default_value('oracle.html',
                                                   xpath,
                                                   prop,
                                                   css=False)
            return (xpath, default_value, value)
            break
    return (None, None, None)
Exemplo n.º 7
0
def timbra_xml(xml):
    log.info('Enviando a timbrar...')
    id_timbrado = util.get_epoch()
    ok, data = util.timbra_xml(atributos['emisor']['rfc'], xml, id_timbrado)
    if ok:
        name = '{}/{}.xml'.format(PATH['TIMBRADAS'], data['UUID'])
        util.save_file(name, data['XML'])
        log.info('Factura timbrada correctamente: {}'.format(name))
    return
Exemplo n.º 8
0
    def sim_user(self):
        simi_users = {}
        for i in self.train_data.keys():
            simi_users.setdefault(i, [])

        for user in self.train_data:
            scores = self.calulate_similarity(self.train_data ,user)
             # using person as the key for its scores
            simi_users[user] = scores
        util.save_file(simi_users, save_file)
        self.simi_data = simi_users
Exemplo n.º 9
0
def test():
    stream_test = StreamParser()
    stream_test.open_mic()
    import time
    time.sleep(5)
    stream_test.close_mic()
    time.sleep(5)
    import util
    data = "".join(stream_test.cache_frames)
    util.save_file(data)
    stream_test.play_stream(data)
Exemplo n.º 10
0
 def start(self):
     try:
         logger = open('log', 'a')
         for attachId in range(self.initId + 1, self.endId):
             attachment = get_attach(self.rootUrl + str(attachId))
             if attachment:
                 save_file(attachment)
             logging(logger, attachId)
     finally:
         if logger:
             logger.close()
Exemplo n.º 11
0
 def sim_item(self):
     simi_items = {}
     re_train_data = self.switch_key()
     # print(self.train_data)
     for i in re_train_data.keys():
         simi_items[i] = []
     for user in re_train_data:
         scores = self.calulate_similarity(re_train_data, user)
         simi_items[user] = scores
     util.save_file(simi_items, save_file)
     self.simi_data = simi_items
Exemplo n.º 12
0
def change_property(htmlfile, xpath, prop, value):
    html = lxml.html.parse(htmlfile)
    elements = html.xpath(xpath)
    for e in elements:
        if 'style' in e.attrib:
            original_style = e.attrib['style']
            e.attrib['style'] += ';{0}:{1};'.format(prop, value)
        else:
            e.attrib['style'] = '{0}:{1};'.format(prop, value)
        util.save_file(lxml.html.tostring(html), 'temp.html')
        util.screenshot('temp.html', 'temp.png')
Exemplo n.º 13
0
def change_property(htmlfile, xpath, prop, value):
    html = lxml.html.parse(htmlfile)
    elements = html.xpath(xpath)
    for e in elements:
        if "style" in e.attrib:
            original_style = e.attrib["style"]
            e.attrib["style"] += ";{0}:{1};".format(prop, value)
        else:
            e.attrib["style"] = "{0}:{1};".format(prop, value)
        util.save_file(lxml.html.tostring(html), "temp.html")
        util.screenshot("temp.html", "temp.png")
Exemplo n.º 14
0
def test():
    stream_test = StreamParser()
    stream_test.open_mic()
    import time
    time.sleep(5)
    stream_test.close_mic()
    time.sleep(5)
    import util
    data = "".join(stream_test.cache_frames)      
    util.save_file(data)
    stream_test.play_stream(data)
Exemplo n.º 15
0
    def download(self, id, path, name):
        """ Download a file with it's id == <id> and stores it to path/name """

        request = self.service.files().get_media(fileId=id)
        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request)

        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print("Downloading: {}% ...".format(int(status.progress() * 100)))

        util.save_file(fh, path, name)
Exemplo n.º 16
0
 def build(self):
     print '[debug]: Starting [building] function'
     #database = zip(self.get_data()[0], self.get_video(), self.get_data()[1], self.get_desc(),self.get_mobile())
     database = {
         "Episodes": [{
             "title": a,
             "url": b,
             "thumb": c,
             "mobile": d
         } for a, b, c, d in zip(self.get_data()[0], self.get_video(),
                                 self.get_data()[1], self.get_mobile())]
     }
     save_file(database, 'data/Season_23.json')
Exemplo n.º 17
0
def la():
    kl = ['吐槽', '淡定', '自拍']
    for key in kl:
        rl = []
        srl = []
        wl = util.get_list_from_file('D:\文档\项目\数据采集文档\词频分布//' + key +'.txt')
        for w in wl:
            wwl = w.split('\t')
            if len(wwl[0]) > 1 and int(wwl[1]) > 10:
                rl.append({'word':wwl[0],'num':int(wwl[1])})
        rl.sort(key=lambda obj: obj.get('num'), reverse=True)
        for rrl in rl:
            srl.append(rrl['word']+'\t'+str(rrl['num']))
        util.save_file('D:\文档\项目\数据采集文档\词频分布//' + key +'r.txt',srl)
Exemplo n.º 18
0
Arquivo: gen.py Projeto: jineli/websee
def process(url, ads_xpath):
  test_dir = urlparse.urlparse(url).hostname
  if os.path.isdir(test_dir):
    shutil.rmtree(test_dir)
  os.mkdir(test_dir)
  if os.path.isfile(test_dir + 'description.txt'):
    os.remove(test_dir + 'description.txt')
  oracle_html = get_oracle(url)
  for xpath in ads_xpath:
    oracle_html = hide_advertisement(oracle_html, xpath)
  util.save_file(lxml.html.tostring(oracle_html, doctype=oracle_html.doctype), 'oracle.html')
  util.screenshot('oracle.html', 'oracle.png')
  total = 0
  total = process_css(test_dir, oracle_html, total)
  total = process_html(test_dir, oracle_html, total)
Exemplo n.º 19
0
    def record_file(self, filename, seconds=10, format=pyaudio.paInt16, \
                    channels=1, rate=16000, buffer=None):
        """
        录制音频到指定文件
        """
        if buffer is None:
            buffer = self.chunk
        print("开始录制文件:\n"
              "filename = %s\n"
              "record time = %s\n"
              "format = %s\n"
              "channels = %s\n"
              "rate = %s\n"
              "buffer = %s\n") % (filename, seconds, format, channels, rate,
                                  buffer)
        if filename and os.path.isfile(filename):
            print "文件名%s 已经存在" % filename
            return 1
        stream = self.audio.open(format=format,
                                 channels=channels,
                                 rate=rate,
                                 input=True,
                                 frames_per_buffer=buffer)
        frames = []

        for _num in range(int(rate * 1.0 / buffer * seconds) + 3):
            data = stream.read(buffer)
            frames.append(data)
        stream.stop_stream()
        stream.close()
        return save_file(b''.join(frames), filename,
                         self.audio.get_sample_size(format), channels, rate)
Exemplo n.º 20
0
 def record_file(self, filename, seconds=10, format=pyaudio.paInt16, \
                 channels=1, rate=16000, buffer=None):
     """
     录制音频到指定文件
     """
     if buffer is None:
         buffer = self.chunk
     print ("开始录制文件:\n"
            "filename = %s\n"
            "record time = %s\n"
            "format = %s\n"
            "channels = %s\n"
            "rate = %s\n"
            "buffer = %s\n") % (filename, seconds, format, channels, rate, 
                                buffer)
     if filename and os.path.isfile(filename):
         print "文件名%s 已经存在" % filename
         return 1
     stream = self.audio.open(format=format,
             channels=channels,
             rate=rate,
             input=True,
             frames_per_buffer=buffer)
     frames = []
 
     for _num in range(int(rate*1.0 / buffer * seconds)+3):
         data = stream.read(buffer)
         frames.append(data)
     stream.stop_stream()
     stream.close()
     return save_file(b''.join(frames), filename, 
                      self.audio.get_sample_size(format), channels, rate)
Exemplo n.º 21
0
def app():
    st.markdown("""
        # Create Strategy

        Here you can create your strategy!

        You will need to create a function called `strategy` that takes in a
        dictionary named `bot_positions` (e.g. `{"Bot1": 1, "Bot2": 3}`) as argument
        and should return either *"walk"* or *"sabotage"*.

        See the ***Example page*** for examples.

        You can either:
        """)

    with st.beta_expander("Write Code Directly"):
        bot_name = st.text_input(label="Bot Name")
        strategy_code = st.text_area(
            label="Strategy Code: ",
            value=inspect.cleandoc("""
        import random
            
        def strategy(bot_positions):
            return random.choice(["walk", "sabotage"])
        """),
            height=320,
        )
        if st.button("Submit"):
            if bot_name:
                fp = util.save_code_to_file(code=strategy_code,
                                            filename=bot_name)
                util.validate_file(fp)
                st.success("File uploaded and validated successfully, "
                           "go to `Race Page` to run the Game")
            else:
                st.error("Please provide a name for the Bot")

    with st.beta_expander("Upload a file"):
        file_buffer = st.file_uploader(
            "Upload a strategy file (.py)",
            help="The filename will be used to name the Bot",
        )
        if file_buffer:
            fp = util.save_file(filename=file_buffer.name,
                                filebytes=file_buffer.getvalue())
            util.validate_file(fp)
            st.success("File uploaded and validated successfully, "
                       "go to `Race Page` to run the Game")

    st.markdown(f"## Current Competitors:")
    competitors = util.build_all_bots()
    if competitors:
        st.markdown("\n".join([f"\t- {c}" for c in competitors]))
    else:
        st.markdown("no competitors saved yet")

    if st.button("Add example bots"):
        util.add_example_bots()
        st.experimental_rerun()
Exemplo n.º 22
0
def process(url, ads_xpath):
    test_dir = urlparse.urlparse(url).hostname
    if os.path.isdir(test_dir):
        shutil.rmtree(test_dir)
    os.mkdir(test_dir)
    if os.path.isfile(test_dir + 'description.txt'):
        os.remove(test_dir + 'description.txt')
    oracle_html = get_oracle(url)
    for xpath in ads_xpath:
        oracle_html = hide_advertisement(oracle_html, xpath)
    util.save_file(
        lxml.html.tostring(oracle_html, doctype=oracle_html.doctype),
        'oracle.html')
    util.screenshot('oracle.html', 'oracle.png')
    total = 0
    total = process_css(test_dir, oracle_html, total)
    total = process_html(test_dir, oracle_html, total)
Exemplo n.º 23
0
def download_movie_playing():
    url = "http://bj.nuomi.com/movie/" 
    lfile = '%smovies_playing.html' % (root_dir) 

    respHtml = browser.downad_and_save(url,lfile)  
    soup = BeautifulSoup(respHtml, fromEncoding=htmlCharset)

    li = []
    # <div class="section-item clearfix no-top-border">
    dom_movies = soup.find('div',attrs = {'class': 'section-item clearfix no-top-border'})
    dom_a = dom_movies.findAll('a')
    for m in dom_a:
        # dom_a = m.find('a')
        uid = m['href'].split('/')[-1]
        uri = completeInnerUrl("http://bj.nuomi.com/",m['href'])
        name = m.contents[0]
        li.append("Movie,%s,%s,%s"%(uid,uri,name))   
    
    csv_file = '%snuomi/movie_result.csv' % (download_dir) 
    save_file(csv_file,li)
Exemplo n.º 24
0
def download_mtheater_list(page_index=1,proxy=None): 
    url = "http://t.dianping.com/movie/beijing/tab_cinema?pageno=%d" % (page_index)
    lfile = '%smtheater_list_%d.html' % (root_dir,page_index) 

    respHtml = browser.downad_and_save(url,lfile,proxy)  
    soup = BeautifulSoup(respHtml, fromEncoding=htmlCharset)

    li = []
    # <div class="index-cinema-list">
    # <li class="item Fix">
    dom_theaterlist = soup.find('div',attrs = {'class': 'index-cinema-list'}) 
    dom_mtheaters = dom_theaterlist.findAll('li',attrs = {'class': 'item Fix'})
    for mt in dom_mtheaters:
        dom_a = mt.findAll('a')[1]
        uid = dom_a['href'].split('/')[-1]
        uri = completeInnerUrl("http://t.dianping.com/",dom_a['href'])
        name = dom_a.contents[0] 
        li.append("MovieTheater,北京,%s,%s,%s"%(uid,uri,name))   
    
    csv_file = '%smtheater_result_%d.csv' % (root_dir,page_index) 
    save_file(csv_file,li)
Exemplo n.º 25
0
Arquivo: gen.py Projeto: jineli/websee
def seed_css_fault(html, elements, prop, value):
  while elements:
    e = random.choice(elements)
    elements.remove(e)
    original_style = None
    if 'style' in e.attrib:
      original_style = e.attrib['style']
      e.attrib['style'] += ';{0}:{1};'.format(prop, value)
    else:
      e.attrib['style'] = '{0}:{1};'.format(prop, value)
    util.save_file(lxml.html.tostring(html, doctype=html.doctype), 'test.html')
    util.screenshot('test.html', 'test.png')
    if original_style is not None:
      e.attrib['style'] = original_style
    else:
      del e.attrib['style']
    if not filecmp.cmp('oracle.png', 'test.png'):
      xpath = get_xpath(e)
      default_value = util.get_default_value('oracle.html', xpath, prop)
      return (xpath, default_value, value)
      break
  return (None, None, None)
Exemplo n.º 26
0
def seed_error(html, elements, prop, value):
  while elements:
    e = random.choice(elements)
    elements.remove(e)
    #print 'Trying ', e
    original_style = None
    if 'style' in e.attrib:
      original_style = e.attrib['style']
      e.attrib['style'] += ';{0}:{1};'.format(prop, value)
    else:
      e.attrib['style'] = '{0}:{1};'.format(prop, value)
    util.save_file(lxml.html.tostring(html), 'test.html')
    util.screenshot('test.html', 'test.png')
    if original_style:
      e.attrib['style'] = original_style
    else:
      del e.attrib['style']
    if not filecmp.cmp('oracle.png', 'test.png'):
      xpath = get_xpath(e)
      default_value = util.get_default_value('oracle.html', xpath, prop)
      return (xpath, default_value, value)
      break
  return (None, None, None)
Exemplo n.º 27
0
Arquivo: gen.py Projeto: jineli/websee
def seed_html_fault(html, elements, prop, value):
  while elements:
    e = random.choice(elements)
    elements.remove(e)
    original_style = None
    if prop in e.attrib:
      original_value = e.attrib[prop]
      e.attrib[prop] = value
      util.save_file(lxml.html.tostring(html, doctype=html.doctype), 'test.html')
      util.screenshot('test.html', 'test.png')
      e.attrib[prop] = original_value
    else:
      e.attrib[prop] = value
      util.save_file(lxml.html.tostring(html, doctype=html.doctype), 'test.html')
      util.screenshot('test.html', 'test.png')
      del e.attrib[prop]
    if not filecmp.cmp('oracle.png', 'test.png'):
      xpath = get_xpath(e)
      default_value = util.get_default_value('oracle.html', xpath, prop, 
        css=False)
      return (xpath, default_value, value)
      break
  return (None, None, None)
Exemplo n.º 28
0
def move_files(base_dir, instance, domain, target_dir, use_vanilla):
    """ Moves the domain and instance description files plus additional data files to the translation directory """
    definition_dir = target_dir + '/definition'
    data_dir = target_dir + '/data'

    # Copy the domain and instance file to the subfolder "definition" on the destination dir
    util.mkdirp(definition_dir)
    shutil.copy(instance, definition_dir)
    shutil.copy(domain, definition_dir)

    is_external_defined = os.path.isfile(base_dir + '/external.hxx')

    if is_external_defined and use_vanilla:
        raise RuntimeError(
            "An external definitions file was found at '{}', but the runner script determined"
            "that no external files were needed. Something is wrong.")

    if not use_vanilla:
        # The ad-hoc external definitions file - if it does not exist, we use the default.
        if is_external_defined:
            shutil.copy(base_dir + '/external.hxx', target_dir)
            if os.path.isfile(
                    base_dir + '/external.cxx'
            ):  # We also copy a possible cxx implementation file
                shutil.copy(base_dir + '/external.cxx', target_dir)

        else:
            default = tplManager.get('external_default.hxx').substitute(
            )  # No substitutions for the default template
            util.save_file(target_dir + '/external.hxx', default)

    # Copy, if they exist, all data files
    origin_data_dir = base_dir + '/data'
    if os.path.isdir(origin_data_dir):
        for filename in glob.glob(os.path.join(origin_data_dir, '*')):
            if os.path.isfile(filename):
                shutil.copy(filename, data_dir)
Exemplo n.º 29
0
def train_test_videos(path_train_violence, path_test_violence,
                      path_train_raw_nonviolence, path_train_new_nonviolence,
                      path_test_raw_nonviolence, path_test_new_nonviolence,
                      proportion_norm_videos, min_num_frames):
    """ load train-test split from original dataset """
    train_names = []
    train_labels = []
    test_names = []
    test_labels = []
    train_bbox_files = []
    test_bbox_files = []

    train_names_violence = util.read_file(path_train_violence)
    train_names_new_nonviolence = util.read_file(path_train_new_nonviolence)
    train_names_raw_nonviolence = util.read_file(path_train_raw_nonviolence)
    test_names_violence = util.read_file(path_test_violence)
    test_names_new_nonviolence = util.read_file(path_test_new_nonviolence)
    test_names_raw_nonviolence = util.read_file(path_test_raw_nonviolence)

    ##Remove normal videos of short duration
    train_names_new_nonviolence = remove_short_videos(
        constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE,
        train_names_new_nonviolence, min_num_frames)
    train_names_raw_nonviolence = remove_short_videos(
        constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE,
        train_names_raw_nonviolence, min_num_frames)
    test_names_new_nonviolence = remove_short_videos(
        constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE,
        test_names_new_nonviolence, min_num_frames)
    test_names_raw_nonviolence = remove_short_videos(
        constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE,
        test_names_raw_nonviolence, min_num_frames)

    new_split = False
    ### Train
    # print('Train names: ', len(train_names_violence))
    for tr_name in train_names_violence:
        train_names.append(
            os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_VIOLENCE,
                         tr_name))
        train_labels.append(1)
        video_name = re.findall(r'[\w\.-]+-', tr_name)[0][:-1]
        train_bbox_files.append(
            os.path.join(constants.PATH_VIOLENCECRIME2LOCAL_BBOX_ANNOTATIONS,
                         video_name + '.txt'))

    ##ramdom normal samples
    negative_samples = []
    if not os.path.exists(constants.PATH_FINAL_RANDOM_NONVIOLENCE_TRAIN_SPLIT):
        print('Creating Random Normal Train examples file...')
        num_new_samples = int(len(train_names) * proportion_norm_videos)
        train_names_new_nonviolence = random.choices(
            train_names_new_nonviolence, k=num_new_samples)
        train_names_raw_nonviolence = random.choices(
            train_names_raw_nonviolence, k=len(train_names) - num_new_samples)
        if len(train_names_new_nonviolence) == 0:
            print('Using only raw non violence videos...')
        for neagtive_name in train_names_new_nonviolence:
            negative_samples.append(
                os.path.join(
                    constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE,
                    neagtive_name))
            # train_names.append(os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, neagtive_name))
            # train_bbox_files.append(None)
        for neagtive_name in train_names_raw_nonviolence:
            negative_samples.append(
                os.path.join(
                    constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE,
                    neagtive_name))
        util.save_file(negative_samples,
                       constants.PATH_FINAL_RANDOM_NONVIOLENCE_TRAIN_SPLIT)
        # train_names.append(os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, neagtive_name))
        # train_bbox_files.append(None)
        new_split = True
    else:
        negative_samples = util.read_file(
            constants.PATH_FINAL_RANDOM_NONVIOLENCE_TRAIN_SPLIT)

    for sample in negative_samples:
        train_names.append(sample)
        train_bbox_files.append(None)
    negative_labels = [0 for i in range(len(negative_samples))]
    train_labels.extend(negative_labels)
    NumFrames_train = [
        len(glob.glob1(train_names[i], "*.jpg"))
        for i in range(len(train_names))
    ]

    ### Test
    for ts_name in test_names_violence:
        test_names.append(
            os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_VIOLENCE,
                         ts_name))
        test_labels.append(1)
        video_name = re.findall(r'[\w\.-]+-', ts_name)[0][:-1]
        test_bbox_files.append(
            os.path.join(constants.PATH_VIOLENCECRIME2LOCAL_BBOX_ANNOTATIONS,
                         video_name + '.txt'))

    negative_samples = []
    if not os.path.exists(constants.PATH_FINAL_RANDOM_NONVIOLENCE_TEST_SPLIT):
        print('Creating Random Normal Test examples file...')
        num_samples = int(len(test_names) * proportion_norm_videos)
        test_names_new_nonviolence = random.choices(test_names_new_nonviolence,
                                                    k=num_samples)
        test_names_raw_nonviolence = random.choices(test_names_raw_nonviolence,
                                                    k=len(test_names) -
                                                    num_new_samples)
        for neagtive_name in test_names_new_nonviolence:
            negative_samples.append(
                os.path.join(
                    constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE,
                    neagtive_name))

        for neagtive_name in test_names_raw_nonviolence:
            negative_samples.append(
                os.path.join(
                    constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE,
                    neagtive_name))
        util.save_file(negative_samples,
                       constants.PATH_FINAL_RANDOM_NONVIOLENCE_TEST_SPLIT)
        new_split = True
    else:
        negative_samples = util.read_file(
            constants.PATH_FINAL_RANDOM_NONVIOLENCE_TEST_SPLIT)
    for sample in negative_samples:
        test_names.append(sample)
        test_bbox_files.append(None)
    negative_labels = [0 for i in range(len(negative_samples))]
    test_labels.extend(negative_labels)
    NumFrames_test = [
        len(glob.glob1(test_names[i], "*.jpg")) for i in range(len(test_names))
    ]

    print('Train Split: ', len(train_names), len(train_labels),
          len(NumFrames_train), len(train_bbox_files), ', Test Split: ',
          len(test_names), len(test_labels), len(NumFrames_test),
          len(test_bbox_files))
    data = {
        'train_names': train_names,
        'train_labels': train_labels,
        'NumFrames_train': NumFrames_train,
        'train_bbox_files': train_bbox_files,
        'test_names': test_names,
        'test_labels': test_labels,
        'NumFrames_test': NumFrames_test,
        'test_bbox_files': test_bbox_files
    }
    return data, new_split
Exemplo n.º 30
0
wg = nx.Graph()
pg = nx.Graph()
ii = 0
time_cost = []
# time_cost1 = []

print(time.strftime('%H:%M:%S', time.localtime(time.time())))

for sentence in s_list:
    # 创建整句话的网络
    ll = util.input_filer(sentence)

    start = time.clock()
    wg = add_s2g(wg, ' '.join(ll))
    end = time.clock()
    tc = str(end - start)
    ii += 1
    if ii == 50:
        ii = 0
        print(tc)
    time_cost.append(tc)
    # time_cost1.append(tc1)
    # 只创建关键词所在的分句的网络
    # for ss in ll:
    #     if ('淡定' in ss):
    #         pg = add_s2g(pg, ss)

print(time.strftime('%H:%M:%S', time.localtime(time.time())))

util.save_file('D:\semantic analysis\c_date/zw_record1.txt', time_cost)
Exemplo n.º 31
0
 def down_music(self, url, song_name, singer, suffix):
     response = self.session.get(url)
     util.save_file(response, 'files/' + song_name + '-' + singer + suffix)
Exemplo n.º 32
0
def mplayer(data):
   import os
   import time
   filename = save_file(data)
   os.system("mplayer %s" % filename)
   time.sleep(1)
Exemplo n.º 33
0
    import time
    time.sleep(5)
    stream_test.close_mic()
    time.sleep(5)
    import util
    data = "".join(stream_test.cache_frames)
    util.save_file(data)
    stream_test.play_stream(data)


def main():
    stream_test = StreamParser()
    stream_test.open_mic()
    import time
    while 1:

        try:
            time.sleep(5)
        except KeyboardInterrupt, e:
            #time.sleep(5)
            stream_test.close_mic()
            break
    import util
    data = "".join(stream_test.cache_frames)
    util.save_file(data)
    stream_test.play_stream(data)


if __name__ == "__main__":
    main()
Exemplo n.º 34
0
    import util
    data = "".join(stream_test.cache_frames)      
    util.save_file(data)
    stream_test.play_stream(data)

def main():
    stream_test = StreamParser()
    stream_test.open_mic()
    import time
    while 1:
        
        try:
            time.sleep(5)
        except KeyboardInterrupt, e:
            #time.sleep(5)
            stream_test.close_mic()
            break
    import util
    data = "".join(stream_test.cache_frames)      
    util.save_file(data)
    stream_test.play_stream(data)

if __name__ == "__main__":
    main()
    





Exemplo n.º 35
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    model_file = config.result_dir + "/" + os.path.basename(__file__).split(".py")[0] + "_" + \
                 str(config.noExp) + ".model"
    dirName = os.path.basename(__file__).split(".py")[0] + "_" + str(
        config.noExp)
    results = MLResults(os.path.join(config.result_dir, dirName))
    results.save_config(config)  # save experiment settings
    results.make_dirs('train_summary', exist_ok=True)
    results.make_dirs('result_summary', exist_ok=True)
    results.make_dirs('mid_summary', exist_ok=True)

    # os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_number

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, ) + config.x_shape,
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate',
                                          config.initial_lr,
                                          config.lr_anneal_factor,
                                          min_value=1e-6)
    multi_gpu = MultiGPU(disable_prebuild=True)
    # multi_gpu = MultiGPU()

    # derive the training operation
    gradses = []
    grad_vars = []
    train_losses = []
    BATCH_SIZE = get_batch_size(input_x)

    for dev, pre_build, [dev_input_x
                         ] in multi_gpu.data_parallel(BATCH_SIZE, [input_x]):
        with tf.device(dev), multi_gpu.maybe_name_scope(dev):
            # derive the loss for initializing
            with tf.name_scope('initialization'), \
                    arg_scope([p_net, q_net], is_initializing=True), \
                    spt.utils.scoped_set_config(spt.settings, auto_histogram=False):
                init_q_net = q_net(dev_input_x, n_z=config.train_n_samples)
                init_chain = init_q_net.chain(p_net,
                                              latent_axis=0,
                                              observed={'x': dev_input_x})
                init_loss = tf.reduce_mean(init_chain.vi.training.vimco())

            # derive the loss and lower-bound for training
            with tf.name_scope('training'), \
                    arg_scope([p_net, q_net], is_training=True):
                train_q_net = q_net(dev_input_x, n_z=config.train_n_samples)
                train_chain = train_q_net.chain(p_net,
                                                latent_axis=0,
                                                observed={'x': dev_input_x})
                train_loss = (tf.reduce_mean(train_chain.vi.training.vimco()) +
                              tf.losses.get_regularization_loss())
                train_losses.append(train_loss)

            # derive the logits output for testing
            with tf.name_scope('testing'):
                test_q_net = q_net(dev_input_x, n_z=config.test_n_z)
                test_chain = test_q_net.chain(p_net,
                                              latent_axis=0,
                                              observed={'x': dev_input_x})
                # log_prob of X and each univariate time series of X
                log_prob = tf.reduce_mean(
                    test_chain.model['x'].distribution.log_prob(dev_input_x),
                    0)
                log_prob_per_element = tf.reduce_sum(log_prob)
                log_prob_per_element_univariate_TS = tf.reduce_sum(
                    log_prob, [0, 1, 3])
                log_prob_per_element_univariate_TS_All = tf.reduce_sum(
                    log_prob, [1, 3])

            # derive the optimizer
            with tf.name_scope('optimizing'):
                params = tf.trainable_variables()
                optimizer = tf.train.AdamOptimizer(learning_rate)
                grads = optimizer.compute_gradients(train_loss, params)
                for grad, var in grads:
                    if grad is not None and var is not None:
                        if config.grad_clip_norm:
                            grad = tf.clip_by_norm(grad, config.grad_clip_norm)
                        if config.check_numerics:
                            grad = tf.check_numerics(
                                grad,
                                'gradient for {} has numeric issue'.format(
                                    var.name))
                        grad_vars.append((grad, var))
                gradses.append(grad_vars)

    # merge multi-gpu outputs and operations
    [train_loss] = multi_gpu.average([train_losses], BATCH_SIZE)
    train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(gradses),
                                     optimizer=optimizer,
                                     control_inputs=tf.get_collection(
                                         tf.GraphKeys.UPDATE_OPS))

    # sort the contribution of each univariate_TS of input
    SORT_UNIVARIATE_TS_INPUT = tf.placeholder(dtype=tf.float32,
                                              shape=(None, None),
                                              name='SORT_UNIVARIATE_TS_INPUT')
    SORT_UNIVARIATE_TS = tf.nn.top_k(SORT_UNIVARIATE_TS_INPUT,
                                     k=config.metricNumber).indices + 1

    # load the training and testing data
    print("=" * 10 + "Shape of Input data" + "=" * 10)
    x, time_indexs, x_test, time_indexs2 = load_matrix_allData(
        config.dataReadformat, config.datapathForTrain, config.datapathForTest,
        config.timeLength, config.metricNumber, "TrainFileNameList.txt",
        "TestFileNameList.txt", results, config.norm)

    x_test = x_test.reshape([-1, config.timeLength, config.metricNumber, 1])
    print("Test:", x_test.shape)
    if config.batchTest:
        test_flow = DataFlow.arrays(
            [x_test], config.test_batch_size)  # DataFlow is iterator
        del x_test
    x_train, x_val = split_numpy_array(x, portion=config.VALID_PORTION)
    x_train = x_train.reshape([-1, config.timeLength, config.metricNumber, 1])
    x_val = x_val.reshape([-1, config.timeLength, config.metricNumber, 1])
    train_flow = DataFlow.arrays([x_train],
                                 config.batch_size,
                                 shuffle=False,
                                 skip_incomplete=True)
    val_flow = DataFlow.arrays([x_val], config.test_batch_size)
    print("Note:", config.x_dim,
          ", x_dim = size of datapoint = timeLength * metricNumber")
    print("Input data shape:", x.shape, "Train data shape:", x_train.shape,
          "Validation data shape:", x_val.shape)
    del x_train, x_val, x

    # training part
    with spt.utils.create_session().as_default() as session:
        spt.utils.ensure_variables_initialized()
        saver = CheckpointSaver(tf.trainable_variables(), model_file)
        if os.path.exists(model_file):
            # load the parameters of trained model
            saver.restore_latest()
        else:
            # initialize the network
            while True:
                breakFlag = 0
                for [x] in train_flow:
                    INITLOSS = session.run(init_loss, feed_dict={input_x: x})
                    print('Network initialized, first-batch loss is {:.6g}.'.
                          format(INITLOSS))
                    if np.isnan(INITLOSS) or np.isinf(
                            INITLOSS) or INITLOSS > 10**5:
                        pass
                    else:
                        breakFlag = 1
                        break
                if breakFlag:
                    break

            # train the network
            with train_flow.threaded(10) as train_flow:
                with spt.TrainLoop(
                        params,
                        var_groups=['q_net', 'p_net'],
                        max_epoch=config.max_epoch,
                        max_step=config.max_step,
                        summary_dir=(results.system_path('train_summary')
                                     if config.write_summary else None),
                        summary_graph=tf.get_default_graph(),
                        early_stopping=True) as loop:
                    trainer = spt.Trainer(loop,
                                          train_op, [input_x],
                                          train_flow,
                                          metrics={'loss': train_loss},
                                          summaries=tf.summary.merge_all(
                                              spt.GraphKeys.AUTO_HISTOGRAM))
                    # anneal the learning rate
                    trainer.anneal_after(learning_rate,
                                         epochs=config.lr_anneal_epoch_freq,
                                         steps=config.lr_anneal_step_freq)
                    validator = spt.Validator(
                        loop,
                        train_loss,
                        [input_x],
                        val_flow,
                    )
                    trainer.evaluate_after_epochs(validator, freq=10)
                    trainer.log_after_epochs(freq=1)
                    trainer.run()
                saver.save()

            # save the training infomation
            firWrite = True
            num = 0
            time0 = time.time()
            for [x_train] in train_flow:
                if config.savetrainDS:
                    # log prob of each metric of each instance
                    log_prob_per_element_univariate_TS_list_item_Train = (
                        session.run(log_prob_per_element_univariate_TS_All,
                                    feed_dict={input_x: x_train}))
                    log_prob_per_element_univariate_TS_list_Train = log_prob_per_element_univariate_TS_list_item_Train
                    log_prob_per_element_list_Train = np.sum(np.array(
                        log_prob_per_element_univariate_TS_list_item_Train),
                                                             axis=1).tolist()
                    if firWrite:
                        save_file(
                            results.system_path("train_summary"),
                            "OutlierScores_metric.txt",
                            log_prob_per_element_univariate_TS_list_Train)
                        save_file(results.system_path("train_summary"),
                                  "OutlierScores.txt",
                                  log_prob_per_element_list_Train)
                    else:
                        save_file(
                            results.system_path("train_summary"),
                            "OutlierScores_metric.txt",
                            log_prob_per_element_univariate_TS_list_Train,
                            "\n", "a")
                        save_file(results.system_path("train_summary"),
                                  "OutlierScores.txt",
                                  log_prob_per_element_list_Train, "\n", "a")

                firWrite = False
                num += 1
                if num % 1000 == 0:
                    print(
                        "-----Train %s >>>>>:Sum time of batch instances:%s" %
                        (num, float(time.time() - time0) / float(num)))
            del train_flow, val_flow

        # online test
        time2 = time.time()
        log_prob_per_element_list, log_prob_per_element_univariate_TS_list = [], []
        if config.batchTest:
            num = 0
            for [x_test] in test_flow:
                if config.savetestDS:
                    # log prob of each metric of each instance
                    log_prob_per_element_univariate_TS_list_item = (
                        session.run(log_prob_per_element_univariate_TS_All,
                                    feed_dict={input_x: x_test}))
                    log_prob_per_element_univariate_TS_list += log_prob_per_element_univariate_TS_list_item.tolist(
                    )
                    log_prob_per_element_list += np.sum(
                        np.array(log_prob_per_element_univariate_TS_list_item),
                        axis=1).tolist()

                num += 1
                if num % 200 == 0:
                    print("-----Test %s >>>>>:Sum time of batch instances:%s" %
                          (num, float(time.time() - time2) / float(num)))
        else:
            num = 1
            for batch_x in x_test:
                if config.savetestTS:
                    log_prob_per_element_list_item = (session.run(
                        log_prob_per_element, feed_dict={input_x: [batch_x]}))
                    log_prob_per_element_list.append(
                        log_prob_per_element_list_item)

                if config.savetestDS:
                    log_prob_per_element_univariate_TS_list_item = (
                        session.run(log_prob_per_element_univariate_TS,
                                    feed_dict={input_x: [batch_x]}))
                    log_prob_per_element_univariate_TS_list.append(
                        log_prob_per_element_univariate_TS_list_item)
                    log_prob_per_element_list.append(
                        sum(log_prob_per_element_univariate_TS_list_item))

                if num % 200 == 0:
                    print(
                        "-----Test>>>>>:%d, average time of each instance:%s" %
                        (num, float(time.time() - time2) / float(num)))
                num += 1

        # get the lable file name and its line cnt number
        allLabelFileNameLineCntList = get_machineID(results, config.labelpath)

        print("No of OutlierScores for all dataPoint:(%s):" %
              len(log_prob_per_element_list))
        if config.savetestDS:
            save_file(
                results.system_path("result_summary"),
                "OutlierScores_metric.txt",
                cat_List(allLabelFileNameLineCntList,
                         log_prob_per_element_univariate_TS_list))
        save_file(
            results.system_path("result_summary"), "OutlierScores.txt",
            cat_List(allLabelFileNameLineCntList, log_prob_per_element_list))

        if config.evaluation:
            # Prepraration for the hitory two-metric results
            twoMetricScore = read_file(results.system_path("train_summary"),
                                       "OutlierScores_metric.txt")
            ave_twoMetricScore = np.mean(np.array(twoMetricScore),
                                         axis=0).tolist()
            save_file(results.system_path("result_summary"), "PRF.txt",
                      ["Average score of each univariate time series", "\n"],
                      ",")
            save_file(results.system_path("result_summary"), "PRF.txt",
                      ave_twoMetricScore + ["\n"], ",", "a")
            save_file(results.system_path("result_summary"), "PRF.txt", [
                "Threshold", "F", "Precision", "Recall", "TP", "FP", "FN", "\n"
            ], ",", "a")

            # get the sorted item each metric by change score
            twoMetricScoreList = cal_scoreChanges(
                log_prob_per_element_list, ave_twoMetricScore,
                log_prob_per_element_univariate_TS_list)
            MetricResult = session.run(
                SORT_UNIVARIATE_TS,
                feed_dict={SORT_UNIVARIATE_TS_INPUT: twoMetricScoreList})
            save_file(results.system_path("result_summary"),
                      "MetricResult.txt",
                      cat_List(allLabelFileNameLineCntList, MetricResult))

            # POT evalution
            POT_TH = pot_eval(
                read_file(results.system_path("train_summary"),
                          "OutlierScores.txt", "float"), config.q,
                config.level)
            resultArray, outlierLabelfileNameLineCntList = cal_binaryResult(
                log_prob_per_element_list, POT_TH, time_indexs2,
                config.saveMetricInfo, allLabelFileNameLineCntList)
            evaluate(results, config.labelpath, resultArray, time_indexs2,
                     POT_TH)

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()

    interpretation_hit_ratio(truth_filepath=config.interpret_filepath,
                             prediction_filepath=os.path.join(
                                 config.result_dir, dirName, "result_summary",
                                 "MetricResult.txt"))
Exemplo n.º 36
0
# 用于对新词添加权重
import util

slist = util.get_list_from_file(
    'D://semantic analysis//分词//词库//ok//result_dict.txt')

rlist = []
for ll in slist:
    rlist.append(ll + ' 3')

util.save_file('D://semantic analysis//分词//词库//ok//result_dict1.txt', rlist)
Exemplo n.º 37
0
def count_word(it, w_dict):
    for node in it:
        num = w_dict.get(node)
        if num is None:
            w_dict[node] = 1
        else:
            w_dict[node] = num + 1
    return w_dict


klist = ['正能量', '完爆', '扯淡', '达人', '接地气', '纠结', '吐槽', '淡定', '自拍']

txt_dir = 'D:\semantic analysis\c_date//'
jieba.set_dictionary("D://fc\dict1.txt")
jieba.initialize()
for key in klist:
    r_dict = {}
    r_list = []
    print(key)
    # 获取文件列表
    f_list = util.get_file_list(txt_dir + key, '.txt')
    for file in f_list:
        w_list = util.get_list_from_file(txt_dir + key + '//' + file)
        rl = get_word_list(w_list)
        count_word(rl, r_dict)

    # 保存结果
    for (k, v) in r_dict.items():
        r_list.append(k + '\t' + str(v))
    util.save_file('D://fc//result//' + key + '.txt', r_list)
Exemplo n.º 38
0
 def get_qrlogin_pic(self):
     heads = {
     }
     url = self._get_polling_url('ptqrshow')
     response = self.session.get(url)
     util.save_file(response, "login_qr.png")
Exemplo n.º 39
0
# 过滤微博中的无用字符
def input_filer(temp):
    # 匹配url
    temp = re.sub("(分享自@\S+)", '', temp)
    temp = re.sub("(@\S+)", '', temp)
    # 将正则表达式编译成Pattern对象
    pattern = re.compile(r'[\u4e00-\u9fa5]+')
    # 使用Pattern匹配文本,获得匹配结果,无法匹配时将返回None
    match = pattern.findall(temp)
    return ' '.join(match)


rr_list = []

with open('2013-02-03.txt', 'r', encoding='utf-8') as r_file:
    r_list = r_file.readlines()
    for sentence in r_list:
        sentence = sentence.strip().rstrip('\n')
        if sentence is not None and sentence != '':
            rr_list.append(input_filer(sentence))

util.save_file('result.txt', rr_list)

# jieba.load_userdict("D:\semantic analysis\分词\词库\导出结果\dict1.txt")
# jieba.set_dictionary("D:\semantic analysis\分词\词库\导出结果\dict1.txt")
# jieba.initialize()
# seg_list = jieba.cut(test_str, cut_all=False)

# print("Default Mode: " + "/ ".join(seg_list))  # 精确模式
#
# print(" ".join(seg_list))
Exemplo n.º 40
0
def mplayer(data):
    import os
    import time
    filename = save_file(data)
    os.system("mplayer %s" % filename)
    time.sleep(1)