示例#1
0
def test_implementation_constructor():
    # check that both calls are valid
    fn = DATA_DIR / "example_wos.ris"

    with open(fn, "r") as f:
        entries1 = rispy.load(f, implementation="wok")

    with open(fn, "r") as f:
        entries2 = rispy.load(f, implementation=rispy.RisImplementation.WOK)

    assert entries1 == entries2
示例#2
0
def test_load_example_full_ris_without_whitespace():

    # Parse files without whitespace after ER tag.
    # Resolves https://github.com/MrTango/rispy/pull/25

    filepath = DATA_DIR / "example_full_without_whitespace.ris"
    expected = [
        {
            "type_of_reference": "JOUR",
            "id": "12345",
            "primary_title": "Title of reference",
            "first_authors": ["Marx, Karl", "Lindgren, Astrid"],
            "secondary_authors": ["Glattauer, Daniel"],
            "publication_year": "2014//",
            "notes_abstract":
            "BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.",  # noqa: E501
            "keywords": ["Pippi", "Nordwind", "Piraten"],
            "alternate_title3": "Lorem",
            "alternate_title2": "lorem",
            "volume": "9",
            "number": "3",
            "start_page": "e0815",
            "place_published": "United States",
            "publisher": "Fun Factory",
            "issn": "1932-6208",
            "note": "1008150341",
            "file_attachments2": "http://example.com",
            "url": "http://example_url.com",
        },
        {
            "type_of_reference": "JOUR",
            "id": "12345",
            "primary_title": "The title of the reference",
            "first_authors": ["Marxus, Karlus", "Lindgren, Astrid"],
            "secondary_authors": ["Glattauer, Daniel"],
            "publication_year": "2006//",
            "notes_abstract":
            "BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.",  # noqa: E501
            "keywords": ["Pippi Langstrumpf", "Nordwind", "Piraten"],
            "alternate_title3": "Lorem",
            "alternate_title2": "lorem",
            "volume": "6",
            "number": "3",
            "start_page": "e0815341",
            "place_published": "Germany",
            "publisher": "Dark Factory",
            "issn": "1732-4208",
            "note": "1228150341",
            "file_attachments2": "http://example2.com",
            "url": "http://example_url.com",
        },
    ]

    with open(filepath, "r") as f:
        entries = rispy.load(f)
    assert expected == entries
示例#3
0
def test_list_tag_enforcement():
    filepath = DATA_DIR / "example_custom_list_tags.ris"

    expected = {
        "type_of_reference": "JOUR",
        "authors": ["Marx, Karl", "Marxus, Karlus"],
        "issn": ["12345", "ABCDEFG", "666666"],
    }

    entries = rispy.load(filepath, enforce_list_tags=False, list_tags=[])
    assert expected == entries[0]
示例#4
0
def test_strip_bom():
    expected = {"type_of_reference": "JOUR", "doi": "10.1186/s40981-020-0316-0"}

    filepath = DATA_DIR / "example_bom.ris"

    # we properly decode the content of this file as UTF-8, but leave the BOM
    with open(filepath, "r", encoding="utf-8") as f:
        entries = rispy.load(f)

    print(entries)
    assert expected == entries[0]
示例#5
0
def test_wos_ris():
    fn = DATA_DIR / "example_wos.ris"
    with open(fn, "r") as f:
        entries = rispy.load(f, implementation=rispy.WokParser)

    assert len(entries) == 2

    title = "Interactions stabilizing the structure of the core light-harvesting complex (LHl) of photosynthetic bacteria and its subunit (B820)"  # noqa: E501
    assert entries[0]["document_title"] == title

    title = "Proximal and distal influences on ligand binding kinetics in microperoxidase and heme model compounds"  # noqa: E501
    assert entries[1]["document_title"] == title
示例#6
0
def test_file_implementation_write():
    class CustomParser(rispy.RisParser):
        DEFAULT_IGNORE = ["JF", "ID", "KW"]

    class CustomWriter(rispy.RisWriter):
        DEFAULT_IGNORE = ["JF", "ID", "KW"]

    list_tags = ["SN", "T1", "A1", "UR"]

    fn = DATA_DIR / "example_full.ris"
    with open(fn, "r") as f:
        entries = rispy.load(f, implementation=CustomParser, list_tags=list_tags)

    fn_write = DATA_DIR / "example_full_write.ris"

    with open(fn_write, "w") as f:
        rispy.dump(entries, f, implementation=CustomWriter, list_tags=list_tags)

    with open(fn_write, "r") as f:
        reload = rispy.load(f, implementation=CustomParser, list_tags=list_tags)

    assert reload == entries
示例#7
0
def test_load_example_basic_ris():
    filepath = DATA_DIR / "example_basic.ris"
    expected = {
        "type_of_reference": "JOUR",
        "authors": ["Shannon,Claude E."],
        "year": "1948/07//",
        "title": "A Mathematical Theory of Communication",
        "alternate_title3": "Bell System Technical Journal",
        "start_page": "379",
        "end_page": "423",
        "volume": "27",
    }

    # test with file object
    with open(filepath, "r") as f:
        entries = rispy.load(f)
    assert expected == entries[0]

    # test with pathlib object
    p = Path(filepath)
    entries = rispy.load(p)
    assert expected == entries[0]
示例#8
0
def test_unkown_skip():
    filepath = DATA_DIR / "example_multi_unknown_tags.ris"
    expected = {
        "type_of_reference": "JOUR",
        "authors": ["Shannon,Claude E."],
        "year": "1948/07//",
        "title": "A Mathematical Theory of Communication",
        "alternate_title3": "Bell System Technical Journal",
        "end_page": "423",
        "volume": "27",
    }

    with open(filepath, "r") as f:
        entries = rispy.load(f, skip_unknown_tags=True)
    assert expected == entries[0]
示例#9
0
def read_ris(fp):
    """RIS file reader.

    Parameters
    ----------
    fp: str, pathlib.Path
        File path to the RIS file.
    label: bool
        Check for label. If None, this is automatic.

    Returns
    -------
    pandas.DataFrame:
        Dataframe with entries.

    """

    encodings = ['ISO-8859-1', 'utf-8', 'utf-8-sig']
    entries = None
    for encoding in encodings:
        try:
            with open(fp, 'r', encoding=encoding) as bibliography_file:
                mapping = _tag_key_mapping(reverse=False)
                entries = list(rispy.load(bibliography_file, mapping=mapping))
                break
        except UnicodeDecodeError:
            pass
        except IOError as e:
            logging.warning(e)

    if entries is None:
        raise ValueError("Cannot find proper encoding for data file.")

    df = pd.DataFrame(entries)

    def converter(x):
        try:
            return ", ".join(x)
        except TypeError:
            return ""

    for tag in LIST_TYPE_TAGS:
        key = TAG_KEY_MAPPING[tag]
        if key in df:
            df[key] = df[key].apply(converter)
    return standardize_dataframe(df)
示例#10
0
def test_load_multiline_ris():
    filepath = DATA_DIR / "multiline.ris"
    expected = {
        "type_of_reference": "JOUR",
        "authors": ["Shannon,Claude E."],
        "year": "1948/07//",
        "title": "A Mathematical Theory of Communication",
        "alternate_title3": "Bell System Technical Journal",
        "start_page": "379",
        "end_page": "423",
        "notes_abstract": "first line, then second line and at the end the last line",
        "notes": ["first line", "* second line", "* last line"],
        "volume": "27",
    }
    with open(filepath, "r") as f:
        entries = rispy.load(f)

    assert expected == entries[0]
示例#11
0
def test_load_single_unknown_tag_ris():
    filepath = DATA_DIR / "example_single_unknown_tag.ris"
    expected = {
        "type_of_reference": "JOUR",
        "authors": ["Shannon,Claude E."],
        "year": "1948/07//",
        "title": "A Mathematical Theory of Communication",
        "alternate_title3": "Bell System Technical Journal",
        "start_page": "379",
        "end_page": "423",
        "volume": "27",
        "unknown_tag": {"JP": ["CRISPR", "Direct Current"]},
    }

    with open(filepath, "r") as f:
        entries = rispy.load(f)

    assert expected == entries[0]
示例#12
0
    def create(self, request):
        # ler o arquivo da request
        arq = request.FILES['file']

        #cria um arquivo temporario com os dados do arquivo
        with open('/code/temp/temp.ris', 'wb+') as destination:
            for chunk in arq.chunks():
                destination.write(chunk)

        #ler o arquivo temporario salvo acima
        with open('/code/temp/temp.ris', 'r') as bibliography_file:
            entries = rispy.load(bibliography_file)
            for entry in entries:
                if Article.objects.filter(id=entry['id']):
                    del entry['id']
                #cria os objetos mapeados pela rispy
                Article.objects.create(**entry)

        #retorna mensagem de sucesso
        return Response({'message': 'Articles criados com sucesso!!'})
示例#13
0
def main():

    # Get input arguments
    parser = argparse.ArgumentParser(
        description=
        'Given a list of accession numbers in a file, extract records that match those ids'
    )

    parser.add_argument(
        "-i",
        "--input_file",
        type=str,
        help=
        "The path to the file that contains the accession numbers to search for",
        required=True)
    parser.add_argument(
        "-d",
        "--data_file",
        type=str,
        help="The path to the data file. Should be in .ris format",
        required=True)
    parser.add_argument(
        "-o",
        "--output_file",
        type=str,
        help=
        "The path to the output file for records taht match. Will be in .ris format",
        required=True)

    input_file = ''
    data_file = ''
    output_file = ''

    try:
        args = parser.parse_args()

        input_file = args.input_file
        data_file = args.data_file
        output_file = args.output_file

    except:
        parser.print_help()
        sys.exit(0)

    # Get list of accession ids from input file
    accession_no_list = []
    try:
        accession_no_list = [line.strip() for line in open(input_file)]
    except:
        print(f'Unable to open input file {input_file}')
        sys.exit(0)

    # Interate over data file and if ids is one specificed in the id file add to list
    selected_records = []
    try:
        with open(data_file, 'r') as bibliography_file:
            entries = rispy.load(bibliography_file)
            for entry in entries:
                if entry['accession_number'] in accession_no_list:
                    selected_records.append(entry)
    except OSError as err:
        print("OS error: {0}".format(err))
        sys.exit(0)
    except:
        print("Unexpected error:", sys.exc_info()[0])
        raise

    # This is pretty inefficient. We are storing the selected records to a list
    # and this list could get huge!
    try:
        with open(output_file, 'w') as output_file:
            rispy.dump(selected_records, output_file)
    except OSError as err:
        print("OS error: {0}".format(err))
        sys.exit(0)
    except:
        print("Unexpected error:", sys.exc_info()[0])
        raise
示例#14
0
def test_starting_newline():
    fn = DATA_DIR / "example_starting_newlines.ris"
    with open(fn, "r") as f:
        entries = rispy.load(f)
    assert len(entries) == 1
示例#15
0
from pprint import pprint
import rispy

# define file path for local RIS files
filepath = 'C:\\Users\\mobarget\\Google Drive\\RIS_export_PolishNationalLibrary'

# define filepath as directory containing iterable files and read each file

titles = []
for f in os.listdir(filepath):
    print(f)  # return file names, e.g. Primo_RIS_Export.ris
    f_path = os.path.join(filepath, f)
    with open(f_path, 'r', encoding="utf-8") as bibliography_file:
        print(bibliography_file)

        entries = rispy.load(bibliography_file)

        # entries are called based on standard RIS format
        # for files deviating from this format, you may need to use a tag-key-mapper
        # check rispy documentation for further details

        for entry in entries:
            title = entry['primary_title']
            print(
                title
            )  # special characters are shown correctly in Jupyter notebook
            titles.append(title)

outpath = 'C:\\Users\\mobarget\\Google Drive\\'
out = open(
    os.path.join(outpath, 'ris_out.csv'), 'w', encoding="utf_8_sig"
outpath = 'C:\\XXXXX'

# define keyword and output lists
keyword = "yourstring"
titles = []
wrong_ids = []

# define filepath as directory containing iterable files and read each file
for f in os.listdir(filepath):
    #print(f) # return file names, e.g. Primo_RIS_Export.ris
    f_path = os.path.join(filepath, f)
    with open(f_path, 'r', encoding="latin-1") as bibliography_file:
        #print(bibliography_file) # returns RIS meta-information for file
        try:
            data = rispy.load(
                bibliography_file,
                strict=False)  # accepts non-standard RIS if "strict=False"
            finddata(data)  # get data via function
        except:
            OSError
            continue

# entries are called based on standard RIS format
# for files deviating from this format, you may need to use a tag-key-mapper
# check rispy documentation for further details


def finddata(x):
    try:
        title = x[0][
            'primary_title']  # get first and only item from list and dictionary data by key