def getFiles(args): tmpRC = metaknowledge.RecordCollection(name = '') if len(args.files) > 0: for f in args.files: path = os.path.abspath(os.path.expanduser(f)) if os.path.exists(path): tmpRC |= metaknowledge.RecordCollection(path, extension = args.suffix) else: raise TypeError(path + " is not an existing file or directory") if args.name: tmpRC._repr = args.name else: FileNames = [os.path.basename(nm) for nm in args.files] if len(FileNames) == 1: tmpRC._repr = "WOS files from: {}".format(FileNames[0]) else: tmpRC._repr = "WOS files from: {0} and {1}".format(', '.join(FileNames[:-1]), FileNames[-1]) return tmpRC else: nflist = input("What files or directorys do you want to extract a network from: ") paths = [os.path.abspath(os.path.expanduser(f)) for f in nflist.split(' ')] badPaths = [f for f in paths if not os.path.exists(f)] if len(badPaths) > 1: print(', '.join(badPaths[:-1]) + " and " + badPaths[-1] + " are not existing files or directorys") args.files = [] return getFiles(args) elif len(badPaths) == 1: print(badPaths[0] + " is not an existing file or directory") args.files = [] return getFiles(args) else: args.files = paths return getFiles(args)
def test_directoryRead(self): self.assertEqual(len(metaknowledge.RecordCollection('.')), 0) self.assertTrue( metaknowledge.RecordCollection('metaknowledge/tests/') >= self.RC) self.assertTrue( metaknowledge.RecordCollection('metaknowledge/tests/', extension='.txt') <= self.RC)
def test_bad(self): self.assertTrue( metaknowledge.RecordCollection( 'metaknowledge/tests/badFile.isi').bad) with self.assertRaises(metaknowledge.mkExceptions.RCTypeError): metaknowledge.RecordCollection('metaknowledge/tests/testFile.isi', extension='.txt') self.assertEqual(self.RCbad | self.RC, self.RCbad | self.RC) self.assertEqual(len(self.RCbad | self.RCbad), 32) self.assertFalse(self.RCbad == self.RC) self.assertEqual('metaknowledge/tests/badFile.isi', self.RCbad.errors.keys().__iter__().__next__())
def test_caching(self): RC = metaknowledge.RecordCollection("metaknowledge/tests/", cached=True, name='testingCache', extension='testFile.isi') self.assertTrue( os.path.isfile( "metaknowledge/tests/tests.[testFile.isi].mkRecordDirCache")) accessTime = os.stat("metaknowledge/tests/testFile.isi").st_atime RC2 = metaknowledge.RecordCollection("metaknowledge/tests/", cached=True, name='testingCache', extension='testFile.isi') self.assertEqual(accessTime, os.stat("metaknowledge/tests/testFile.isi").st_atime) RC.dropBadEntries() RC2.dropBadEntries() self.assertEqual(RC, RC2) os.remove("metaknowledge/tests/tests.[testFile.isi].mkRecordDirCache")
def test_write(self): fileName = 'OnePaper2.isi' RC = metaknowledge.RecordCollection('metaknowledge/tests/' + fileName) RC.writeFile(fileName + '.tmp') RC.writeFile() self.assertTrue( filecmp.cmp('metaknowledge/tests/' + fileName, fileName + '.tmp')) self.assertTrue( filecmp.cmp('metaknowledge/tests/' + fileName, RC.name + '.txt')) os.remove(fileName + '.tmp') os.remove(RC.name + '.txt')
def test_addRec(self): l = len(self.RC) R = self.RC.pop() self.assertEqual(len(self.RC), l - 1) self.RC.add(R) self.assertEqual(len(self.RC), l) RC2 = metaknowledge.RecordCollection( "metaknowledge/tests/TwoPaper.isi") self.RC |= RC2 self.assertEqual(len(self.RC), l + 2) with self.assertRaises(metaknowledge.CollectionTypeError): self.RC.add(1)
def test_newOps(self): l = len(self.RC) for i in range(10): self.RCbad.pop() lb = len(self.RCbad) RC = metaknowledge.RecordCollection([]) RC.bad = True RC3 = self.RC | RC self.assertEqual(self.RC, RC3) RC4 = RC3 - self.RC self.assertNotEqual(self.RC, RC4) RC5 = RC4 ^ self.RC self.assertEqual(self.RC, RC5) RC6 = RC5 & self.RCbad self.assertNotEqual(self.RC, RC6)
def test_equOps(self): l = len(self.RC) for i in range(10): self.RCbad.pop() lb = len(self.RCbad) RC = metaknowledge.RecordCollection([]) RC.bad = True RC |= self.RC self.assertEqual(self.RC, RC) RC -= self.RC self.assertNotEqual(self.RC, RC) RC ^= self.RC self.assertEqual(self.RC, RC) RC &= self.RCbad self.assertNotEqual(self.RC, RC)
def test_citeFilter(self): RCmin = self.RC.citeFilter('', reverse=True) RCmax = self.RC.citeFilter('') RCanon = self.RC.citeFilter('', 'anonymous') RC1970 = self.RC.citeFilter(1970, 'year') RCno1970 = self.RC.citeFilter(1970, 'year', reverse=True) RCMELLER = self.RC.citeFilter('meller', 'author') self.assertEqual(len(RCmin), 0) self.assertEqual(len(RCmax), len(self.RC)) self.assertEqual(len(RCanon), 1) self.assertEqual(len(RC1970), 15) self.assertEqual(len(RC1970) + len(RCno1970), len(self.RC)) self.assertEqual(len(RCMELLER), 1) RCnocite = metaknowledge.RecordCollection( 'metaknowledge/tests/OnePaperNoCites.isi') self.assertEqual(len(RCnocite.citeFilter('')), 0)
def setUpClass(cls): cls.sysArgs = sys.argv cls.RCmain = metaknowledge.RecordCollection("metaknowledge/tests/testFile.isi") cls.Gmain = cls.RCmain.networkCoAuthor()
def setUp(self): metaknowledge.VERBOSE_MODE = False self.RC = metaknowledge.RecordCollection( "metaknowledge/tests/scopus_testing.csv.scopus") self.R = self.RC.peek()
def test_create(self): fileName = 'tempTestFile' named = argparse.Namespace() named.name = fileName unnamed = argparse.Namespace() unnamed.name = None with unittest.mock.patch('builtins.print'): with unittest.mock.patch('builtins.input', new_callable = MockInput) as m: m.calledVals = ['1'] self.assertTrue(metaknowledge.bin.metaknowledgeCLI.getWhatToDo(named, self.RC)) self.assertEqual(len(m.calledVals), 0) m.calledVals = ['2'] self.RC.dropBadEntries() self.assertFalse(metaknowledge.bin.metaknowledgeCLI.getWhatToDo(named, self.RC)) self.assertEqual(os.path.getsize(fileName+ '.txt'), 88160) self.assertEqual(len(m.calledVals), 0) m.calledVals = ['3', fileName, 'n', fileName, 'g', 'y'] self.RC.dropBadEntries() self.assertTrue(metaknowledge.bin.metaknowledgeCLI.getWhatToDo(unnamed, self.RC)) self.assertEqual(os.path.getsize(fileName+ '.txt'), 88160) os.remove(fileName + '.txt') self.assertEqual(len(m.calledVals), 0) m.calledVals = ['4'] self.RC.dropBadEntries() self.assertFalse(metaknowledge.bin.metaknowledgeCLI.getWhatToDo(named, self.RC)) self.assertEqual(os.path.getsize(fileName+ '.csv'), 86330) os.remove(fileName + '.csv') self.assertEqual(len(m.calledVals), 0) m.calledVals = ['6', 'y'] self.RC.dropBadEntries() self.assertFalse(metaknowledge.bin.metaknowledgeCLI.getWhatToDo(named, self.RC)) self.assertEqual(os.path.getsize(fileName+ '.csv'), 20123) os.remove(fileName + '.csv') self.assertEqual(len(m.calledVals), 0) with self.assertRaises(KeyboardInterrupt): #Don't want to mess with these too much m.calledVals = ['7', '', KeyboardInterrupt] self.assertFalse(metaknowledge.bin.metaknowledgeCLI.getWhatToDo(named, self.RC)) self.assertEqual(len(m.calledVals), 0) m.calledVals = ['7', ''] self.assertFalse(metaknowledge.bin.metaknowledgeCLI.getWhatToDo(named, metaknowledge.RecordCollection())) self.assertEqual(len(m.calledVals), 0)
def test_isCollection(self): self.assertIsInstance(self.RC, metaknowledge.RecordCollection) self.assertEqual(str(metaknowledge.RecordCollection()), "RecordCollection(Empty)") self.assertTrue(self.RC == self.RC)
def setUp(self): metaknowledge.VERBOSE_MODE = False self.RC = metaknowledge.RecordCollection( "metaknowledge/tests/ProQuest_TestFile.testtxt") self.R = self.RC.peek()
import metaknowledge as mk RC = mk.RecordCollection('.') for i, R in enumerate(RC, start=1): print('Working on Record number: {}'.format(i), end='\r') with open("imetrics_tm/{}-{}.txt".format(R.id[:3], R.id[4:]), 'w') as f: f.write(R.get('AB', '')) print('\nDone')
# of a GNU General Public License as published by the Free Software Foundation. metaknowledged3 # is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even # the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with metaknowledged3. # If not, see <http://www.gnu.org/licenses/>. # ********************************************************************************************* import metaknowledge as mk import pandas minYear = 1900 # Specify the minimum year you want to consider maxYear = 2016 # Specify the maximum year you want to consider years = range(minYear, maxYear + 1) RC = mk.RecordCollection( "/Users/filepath") # Create the RecordCollection you want to analyze # *************************** # Create the multiRPYS file # *************************** dictionary = { "CPY": [], "abs_deviation": [], "num_cites": [], "rank": [], "RPY": [] } for i in years: try: RCyear = RC.yearSplit(i, i) if len(RCyear) > 0:
def setUp(self): metaknowledge.VERBOSE_MODE = False self.RC = metaknowledge.RecordCollection( "metaknowledge/tests/medline_test.medline") self.R = self.RC.peek()
import metaknowledge as mk import networkx as nx import matplotlib.pyplot as plt import metaknowledge.contour.plotting as mkv RC = mk.RecordCollection('./tmp/savedrecs.txt') CoCitation = RC.networkCoCitation() print(mk.graphStats(CoCitation, makeString=True)) # print(CoCitation.nodes(data = True)[0]) # print(CoCitation.edges(data = True)[0]) coCiteJournals = RC.networkCoCitation(nodeType='journal', dropNonJournals=True) print(mk.graphStats(coCiteJournals)) nx.draw_spring(coCiteJournals)
def test_contentType(self): RC = metaknowledge.RecordCollection('metaknowledge/tests/') self.assertEqual( RC._collectedTypes, {'MedlineRecord', 'WOSRecord', 'ProQuestRecord', 'ScopusRecord'}) self.assertEqual(self.RC._collectedTypes, {'WOSRecord'})
from gensim.models import ldamodel from gensim.models import CoherenceModel import re import pyLDAvis import pyLDAvis.gensim import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') import seaborn as sns from pprint import pprint import warnings warnings.filterwarnings("ignore") # In[ ]: # Importing the information science and bibliometrics dataset RC = mk.RecordCollection("../input/mk/raw_data/imetrics/") len(RC) # In[ ]: RC # The data is currently stored as a RecordCollection object and must be converted into a dataframe if we want to see its contents. We can do this in two ways: with Pandas or with Metaknowledge's makeDict() function. # In[ ]: # Saving the dataset as a csv file RC.writeCSV("records.csv") # Reading in the data as a Pandas dataframe data = pd.read_csv("records.csv") data.head(3)
def test_fullRead(self): RC = metaknowledge.RecordCollection("metaknowledge/tests/") self.assertEqual(len(RC), 1032)
def setUp(self): metaknowledge.VERBOSE_MODE = False self.RC = metaknowledge.RecordCollection( "metaknowledge/tests/testFile.isi")
def setUpClass(cls): cls.RCmain = metaknowledge.RecordCollection( "metaknowledge/tests/testFile.isi") cls.Gmain = cls.RCmain.networkCoCitation()
def setUpClass(cls): metaknowledge.VERBOSE_MODE = False cls.RCmain = metaknowledge.RecordCollection( "metaknowledge/tests/testFile.isi") cls.RCbadmain = metaknowledge.RecordCollection( "metaknowledge/tests/badFile.isi")
def test_hash(self): self.assertNotEqual(hash(self.RC), hash(self.RCbad)) R = self.RC.pop() RC = metaknowledge.RecordCollection([R]) self.assertEqual(hash(RC), hash(hash(R)))
import metaknowledge as mk import networkx as Nx import os os.chdir("/Users/Yanish/Documents/Fall_2015/Integ_475/final/Integ475Final") RC = mk.RecordCollection("data/") coAuth = RC.coAuthNetwork() Net = RC.coCiteNetwork() Dat = RC.writeCSV(fname="data/dat.csv") Net = mk.drop_edges(Net, minWeight=3) Nx.write_graphml(Net, "networks/net.graphml")