示例#1
0
def scrape_asx_companies():
	print("\nStart time: " + datetime.datetime.now().strftime("%H:%M:%S") + "\n")
	# get_asx_companies()
	get_asx_companies()
	print("\nFinished getting the companies at " + datetime.datetime.now().strftime("%H:%M:%S") + "\n")
	
	today = datetime.date.today().strftime("%Y-%m-%d")
	get_statistics(today)
	print("\nFinished scraping stats at " + datetime.datetime.now().strftime("%H:%M:%S") + "\n")

	print("Appending stats to the database...")
	append_stats(today)
	print("\n\nFinished appending stats at " + datetime.datetime.now().strftime("%H:%M:%S") + "\n")	

	get_latest_prices()
	print("\n\nFinished scraping prices at " + datetime.datetime.now().strftime("%H:%M:%S") + "\n")	
示例#2
0
文件: document.py 项目: CheggEng/brat
def get_next_unnanotated(collection, start):
    directory = collection
    start = int(start)
    new_pos = start
    real_dir = real_directory(directory)

    assert_allowed_to_read(real_dir)

    # Get the document names
    base_names = [fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt')]

    try:
        stats_types, doc_stats = get_statistics(real_dir, base_names)
    except OSError:
        # something like missing access permissions?
        raise CollectionNotAccessibleError

    if start < len(doc_stats):
        for i in range(start,
                       len(doc_stats)):  # Have to account for "." , ".."
            if sum(doc_stats[i]) == 0:
                new_pos = i + 1
                break
    return {"new_pos": new_pos}
示例#3
0
文件: document.py 项目: CheggEng/brat
def get_directory_information(collection):
    directory = collection

    real_dir = real_directory(directory)

    assert_allowed_to_read(real_dir)

    # Get the document names
    base_names = [fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt')]

    doclist = base_names[:]
    doclist_header = [("Document", "string")]

    # Then get the modification times
    doclist_with_time = []
    for file_name in doclist:
        file_path = path_join(DATA_DIR, real_dir,
                              file_name + "." + JOINED_ANN_FILE_SUFF)
        doclist_with_time.append([file_name, _getmtime(file_path)])
    doclist = doclist_with_time
    doclist_header.append(("Modified", "time"))

    try:
        stats_types, doc_stats = get_statistics(real_dir, base_names)
    except OSError:
        # something like missing access permissions?
        raise CollectionNotAccessibleError

    doclist = [doclist[i] + doc_stats[i] for i in range(len(doclist))]
    doclist_header += stats_types

    dirlist = [
        dir for dir in _listdir(real_dir) if isdir(path_join(real_dir, dir))
    ]
    # just in case, and for generality
    dirlist = [[dir] for dir in dirlist]

    # check whether at root, ignoring e.g. possible trailing slashes
    if normpath(real_dir) != normpath(DATA_DIR):
        parent = abspath(path_join(real_dir, '..'))[len(DATA_DIR) + 1:]
        # to get consistent processing client-side, add explicitly to list
        dirlist.append([".."])
    else:
        parent = None

    # combine document and directory lists, adding a column
    # differentiating files from directories and an unused column (can
    # point to a specific annotation) required by the protocol.  The
    # values filled here for the first are "c" for "collection"
    # (i.e. directory) and "d" for "document".
    combolist = []
    for i in dirlist:
        combolist.append(["c", None] + i)
    for i in doclist:
        combolist.append(["d", None] + i)

    # plug in the search config too
    search_config = get_search_config(real_dir)

    # ... and the disambiguator config ... this is getting a bit much
    disambiguator_config = get_disambiguator_config(real_dir)

    # ... and the normalization config (TODO: rethink)
    normalization_config = get_normalization_config(real_dir)

    # read in README (if any) to send as a description of the
    # collection
    try:
        with open_textfile(path_join(real_dir, "README")) as txt_file:
            readme_text = txt_file.read()
    except IOError:
        readme_text = None

    # fill in a flag for whether annotator logging is active so that
    # the client knows whether to invoke timing actions
    ann_logging = annotation_logging_active(real_dir)

    # fill in NER services, if any
    ner_taggers = get_annotator_config(real_dir)

    #send logging directory:
    logging = options_get_annlogfile(real_dir)

    return _inject_annotation_type_conf(real_dir,
                                        json_dic={
                                            'items': combolist,
                                            'header': doclist_header,
                                            'parent': parent,
                                            'messages': [],
                                            'description': readme_text,
                                            'search_config': search_config,
                                            'disambiguator_config':
                                            disambiguator_config,
                                            'normalization_config':
                                            normalization_config,
                                            'annotation_logging': ann_logging,
                                            'ner_taggers': ner_taggers,
                                            'logging': logging,
                                        })
def get_directory_information(collection):
    directory = collection

    real_dir = real_directory(directory)
    
    assert_allowed_to_read(real_dir)
    
    # Get the document names
    base_names = [fn[0:-4] for fn in _listdir(real_dir)
            if fn.endswith('txt')]

    doclist = base_names[:]
    doclist_header = [("Document", "string")]

    # Then get the modification times
    doclist_with_time = []
    for file_name in doclist:
        file_path = path_join(DATA_DIR, real_dir,
            file_name + "." + JOINED_ANN_FILE_SUFF)
        doclist_with_time.append([file_name, _getmtime(file_path)])
    doclist = doclist_with_time
    doclist_header.append(("Modified", "time"))

    try:
        stats_types, doc_stats = get_statistics(real_dir, base_names)
    except OSError:
        # something like missing access permissions?
        raise CollectionNotAccessibleError
                
    doclist = [doclist[i] + doc_stats[i] for i in range(len(doclist))]
    doclist_header += stats_types

    dirlist = [dir for dir in _listdir(real_dir)
            if isdir(path_join(real_dir, dir))]
    # just in case, and for generality
    dirlist = [[dir] for dir in dirlist]

    # check whether at root, ignoring e.g. possible trailing slashes
    if normpath(real_dir) != normpath(DATA_DIR):
        parent = abspath(path_join(real_dir, '..'))[len(DATA_DIR) + 1:]
        # to get consistent processing client-side, add explicitly to list
        dirlist.append([".."])
    else:
        parent = None

    # combine document and directory lists, adding a column
    # differentiating files from directories and an unused column (can
    # point to a specific annotation) required by the protocol.  The
    # values filled here for the first are "c" for "collection"
    # (i.e. directory) and "d" for "document".
    combolist = []
    for i in dirlist:
        combolist.append(["c", None]+i)
    for i in doclist:
        combolist.append(["d", None]+i)

    # plug in the search config too
    search_config = get_search_config(real_dir)

    # ... and the disambiguator config ... this is getting a bit much
    disambiguator_config = get_disambiguator_config(real_dir)

    # ... and the normalization config (TODO: rethink)
    normalization_config = get_normalization_config(real_dir)

    # read in README (if any) to send as a description of the
    # collection
    try:
        with open_textfile(path_join(real_dir, "README")) as txt_file:
            readme_text = txt_file.read()
    except IOError:
        readme_text = None

    # fill in a flag for whether annotator logging is active so that
    # the client knows whether to invoke timing actions
    ann_logging = annotation_logging_active(real_dir)

    # fill in NER services, if any
    ner_taggers = get_annotator_config(real_dir)

    return _inject_annotation_type_conf(real_dir, json_dic={
            'items': combolist,
            'header' : doclist_header,
            'parent': parent,
            'messages': [],
            'description': readme_text,
            'search_config': search_config,
            'disambiguator_config' : disambiguator_config,
            'normalization_config' : normalization_config,
            'annotation_logging': ann_logging,
            'ner_taggers': ner_taggers,
            })
示例#5
0
def get_directory_information(collection):
    directory = collection

    real_dir = real_directory(directory)

    assert_allowed_to_read(real_dir)

    # Get the document names
    base_names = [fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt')]

    doclist = base_names[:]
    doclist_header = [("Document", "string")]

    # Then get the modification times
    from os.path import getmtime, join
    doclist_with_time = []
    for file in doclist:
        try:
            from annotation import JOINED_ANN_FILE_SUFF
            mtime = getmtime(
                join(DATA_DIR, join(real_dir,
                                    file + "." + JOINED_ANN_FILE_SUFF)))
        except:
            # The file did not exist (or similar problem)
            mtime = -1
        doclist_with_time.append([file, mtime])
    doclist = doclist_with_time
    doclist_header.append(("Modified", "time"))

    try:
        stats_types, doc_stats = get_statistics(real_dir, base_names)
    except OSError:
        # something like missing access permissions?
        raise CollectionNotAccessibleError

    doclist = [doclist[i] + doc_stats[i] for i in range(len(doclist))]
    doclist_header += stats_types

    dirlist = [
        dir for dir in _listdir(real_dir) if isdir(path_join(real_dir, dir))
    ]
    # just in case, and for generality
    dirlist = [[dir] for dir in dirlist]

    if real_dir != DATA_DIR:
        parent = abspath(path_join(real_dir, '..'))[len(DATA_DIR) + 1:]
        # to get consistent processing client-side, add explicitly to list
        dirlist.append([".."])
    else:
        parent = None

    # combine document and directory lists, adding a column
    # differentiating files from directories and an unused column (can
    # point to a specific annotation) required by the protocol.  The
    # values filled here for the first are "c" for "collection"
    # (i.e. directory) and "d" for "document".
    combolist = []
    for i in dirlist:
        combolist.append(["c", None] + i)
    for i in doclist:
        combolist.append(["d", None] + i)

    event_types, entity_types, attribute_types, relation_types, unconf_types = get_span_types(
        real_dir)

    # read in README (if any) to send as a description of the
    # collection
    try:
        with open_textfile(path_join(real_dir, "README")) as txt_file:
            readme_text = txt_file.read()
    except IOError:
        readme_text = None

    json_dic = {
        'items': combolist,
        'header': doclist_header,
        'parent': parent,
        'messages': [],
        'event_types': event_types,
        'entity_types': entity_types,
        'attribute_types': attribute_types,
        'relation_types': relation_types,
        'unconfigured_types': unconf_types,
        'description': readme_text,
    }
    return json_dic
示例#6
0
文件: document.py 项目: WeSIG/Delta
def get_directory_information(collection):
    directory = collection
    real_dir = real_directory(directory)
    assert_allowed_to_read(real_dir)

    # Get the document names
    user = get_session().get('user')
    if user is None or user == 'guest':
        base_names = []
    # # 可以从配置文件获取用户。
    elif user in USER_PASSWORD:
        base_names = [fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt')]
    else:
        db = DBlite()
        base_names = db.get_AnnNull_files(directory)
        names_ING = db.get_AnnING_files(directory, user)
        print("names_ING", names_ING, file=sys.stderr)
        base_names.extend(names_ING)


    doclist = base_names[:]
    doclist_header = [("文档", "string")]

    # Then get the modification times
    doclist_with_time = []
    for file_name in doclist:
        file_path = path_join(DATA_DIR, real_dir,
                              file_name + "." + JOINED_ANN_FILE_SUFF)
        doclist_with_time.append([file_name, _getmtime(file_path)])
    doclist = doclist_with_time
    doclist_header.append(("修改时间", "time"))

    """
        stats_types: [('Entities', 'int'), ('Relations', 'int'), ('Events', 'int')]
        doc_stats: [[29, 0, 0], [97, 0, 0], [22, 0, 0], [8, 0, 0], [17, 0, 0], [22, 0, 0], [14, 0, 0], [24, 0, 0], [22, 0, 0], [21, 0, 0]]
        doclist: [['ned.train-doc-184', 1555259780.624325, 29, 0, 0], ['ned.train-doc-181', 1555259780.623239, 97, 0, 0], ['ned.train-doc-236'
    """
    try:
        stats_types, doc_stats = get_statistics(real_dir, base_names)
        print("stats_types:", stats_types, file=sys.stderr)
        print("doc_stats:", doc_stats, file=sys.stderr)
    except OSError:
        # something like missing access permissions?
        raise CollectionNotAccessibleError

    doclist = [doclist[i] + doc_stats[i] for i in range(len(doclist))]
    print("doclist:", doclist, file=sys.stderr)
    doclist_header += stats_types
    # doclist_header.append(("修改者", "string"))
    print("doclist_header:", doclist_header, file=sys.stderr)

    if user is None or user == 'guest':
        dirlist = []
    elif user in USER_PASSWORD:
        dirlist = [dir for dir in _listdir(real_dir) if isdir(path_join(real_dir, dir))]
    else: # for user ACL
        dirlist = [dir for dir in _listdir(real_dir) if isdir(path_join(real_dir, dir))]
    # just in case, and for generality
    dirlist = [[dir] for dir in dirlist]
    # print("---------------dirlist------------------", dirlist, file=sys.stderr)
    # 打开最后的文件目录结构时出现
    # 文件名  修改时间   实体 关系 事件
    # [['esp.train-doc-46', 1555259780.6167455, 104, 0, 0], ['esp.train-doc-989', 1555259780.6174483, 34, 0, 0],
    # print(doclist, file=sys.stderr)

    # check whether at root, ignoring e.g. possible trailing slashes
    if normpath(real_dir) != normpath(DATA_DIR):
        parent = abspath(path_join(real_dir, '..'))[len(DATA_DIR) + 1:]
        # to get consistent processing client-side, add explicitly to list
        dirlist.append([".."])
    else:
        parent = None

    # combine document and directory lists, adding a column
    # differentiating files from directories and an unused column (can
    # point to a specific annotation) required by the protocol.  The
    # values filled here for the first are "c" for "collection"
    # (i.e. directory) and "d" for "document".
    combolist = []
    for i in dirlist:
        combolist.append(["c", None] + i)
    for i in doclist:
        combolist.append(["d", None] + i)

    # plug in the search config too
    search_config = get_search_config(real_dir)

    # ... and the disambiguator config ... this is getting a bit much
    disambiguator_config = get_disambiguator_config(real_dir)

    # ... and the normalization config (TODO: rethink)
    normalization_config = get_normalization_config(real_dir)

    # read in README (if any) to send as a description of the
    # collection
    try:
        with open_textfile(path_join(real_dir, "README")) as txt_file:
            readme_text = txt_file.read()
    except IOError:
        readme_text = None

    # fill in a flag for whether annotator logging is active so that
    # the client knows whether to invoke timing actions
    ann_logging = annotation_logging_active(real_dir)

    # fill in NER services, if any
    ner_taggers = get_annotator_config(real_dir)

    return _inject_annotation_type_conf(real_dir, json_dic={
        'items': combolist,
        'header': doclist_header,
        'parent': parent,
        'messages': [],
        'description': readme_text,
        'search_config': search_config,
        'disambiguator_config': disambiguator_config,
        'normalization_config': normalization_config,
        'annotation_logging': ann_logging,
        'ner_taggers': ner_taggers,
    })
示例#7
0
def get_directory_information(collection):
    directory = collection

    real_dir = real_directory(directory)
    
    assert_allowed_to_read(real_dir)
    
    # Get the document names
    base_names = [fn[0:-4] for fn in _listdir(real_dir)
            if fn.endswith('txt')]

    doclist = base_names[:]
    doclist_header = [("Document", "string")]

    # Then get the modification times
    from os.path import getmtime, join
    doclist_with_time = []
    for file in doclist:
        try:
            from annotation import JOINED_ANN_FILE_SUFF
            mtime = getmtime(join(DATA_DIR,
                join(real_dir, file + "." + JOINED_ANN_FILE_SUFF)))
        except:
            # The file did not exist (or similar problem)
            mtime = -1
        doclist_with_time.append([file, mtime])
    doclist = doclist_with_time
    doclist_header.append(("Modified", "time"))

    try:
        stats_types, doc_stats = get_statistics(real_dir, base_names)
    except OSError:
        # something like missing access permissions?
        raise CollectionNotAccessibleError
                
    doclist = [doclist[i] + doc_stats[i] for i in range(len(doclist))]
    doclist_header += stats_types

    dirlist = [dir for dir in _listdir(real_dir)
            if isdir(path_join(real_dir, dir))]
    # just in case, and for generality
    dirlist = [[dir] for dir in dirlist]

    if real_dir != DATA_DIR:
        parent = abspath(path_join(real_dir, '..'))[len(DATA_DIR) + 1:]
        # to get consistent processing client-side, add explicitly to list
        dirlist.append([".."])
    else:
        parent = None

    # combine document and directory lists, adding a column
    # differentiating files from directories and an unused column (can
    # point to a specific annotation) required by the protocol.  The
    # values filled here for the first are "c" for "collection"
    # (i.e. directory) and "d" for "document".
    combolist = []
    for i in dirlist:
        combolist.append(["c", None]+i)
    for i in doclist:
        combolist.append(["d", None]+i)

    event_types, entity_types, attribute_types, relation_types, unconf_types = get_span_types(real_dir)

    # read in README (if any) to send as a description of the
    # collection
    try:
        with open_textfile(path_join(real_dir, "README")) as txt_file:
            readme_text = txt_file.read()
    except IOError:
        readme_text = None

    json_dic = {
            'items': combolist,
            'header' : doclist_header,
            'parent': parent,
            'messages': [],
            'event_types': event_types,
            'entity_types': entity_types,
            'attribute_types': attribute_types,
            'relation_types': relation_types,
            'unconfigured_types': unconf_types,
            'description': readme_text,
            }
    return json_dic
示例#8
0
def get_stats(user_id, max_no):
    return stats.get_statistics(int(user_id), int(max_no))