def main():
    impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots"  # args["ip"]
    compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites"  # args["cp"]
    goodUris = []

    with open("gooduris_20160225.txt", "r") as read:
        for uri in map(lambda line: line.rstrip("\n"), read):
            goodUris.append(uri)

    compisits = get_files(compath, lambda f: "allTheSame" not in f and check_if_goodURI(f, goodUris) and
                                             "interval" not in f)
    method_composites = defaultdict(dict)

    for comp in sorted(compisits):
        site = comp[comp.find("_") + 1:comp.rfind("_")]
        method_composites[comp[:comp.index("_")]][site] = comp

    # composite_only_histogram(method_composites,compath)


    files = get_and_process_thumbs(impath, method_composites, goodUris)
    print(type(files))

    # print(method_composites)
    impath += "/"

    methods = {'random': MethodCompThums('random', impath, files["random"]),
               'temporalInterval': MethodCompThums('temporalInterval', impath, files["temporalInterval"]),
               'alSum': MethodCompThums('alSum', impath, files["alSum"])}

    # thumbThumbAnalysis(methods['alSum'], methods['random'], methods['temporalInterval'])
    temporalPairs(methods['alSum'], methods['random'], methods['temporalInterval'])
def colorAnalysis():
    impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots"  # args["ip"]
    compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites"  # args["cp"]
    goodUris = []

    with open("gooduris_20160225.txt", "r") as read:
        for uri in map(lambda line: line.rstrip("\n"), read):
            goodUris.append(uri)

    compisits = get_files(compath, lambda f: "allTheSame" not in f and check_if_goodURI(f, goodUris) and
                                             "interval" not in f)
    method_composites = defaultdict(dict)

    for comp in sorted(compisits):
        site = comp[comp.find("_") + 1:comp.rfind("_")]
        method_composites[comp[:comp.index("_")]][site] = comp

    # composite_only_histogram(method_composites,compath)


    files = get_and_process_thumbs(impath, method_composites, goodUris)
    print(type(files))

    # print(method_composites)
    impath += "/"

    methods = {'random': MethodCompThums('random', impath, files["random"]),
               'temporalInterval': MethodCompThums('temporalInterval', impath, files["temporalInterval"]),
               'alSum': MethodCompThums('alSum', impath, files["alSum"])} # type: dict[str,MethodCompThums]

    alsum = methods['alSum']

    out = {} # type: dict[str,dict[str,CompositeColorResulst]]
    for mname, method in methods.items():
        print(mname,method)
        dcm = method.get_composite_dom_colors()
        out[mname] = dcm


    try:
        with open("colorResults2.json","w+") as wout:
            wout.write(json.dumps(out,indent=1,default=lambda x:x.to_jdic()))
    except TypeError as e:
        print("Wow bad thing happened",e)

    for k,v in out.items():
        print("+++++++++++++++++++++++++++++++++++++++++++++++++")
        print(k)
        for site,ret in v.items():
            print("site: ",site)
            for date,color in ret.results.items():
                print(date,''.join(color))
示例#3
0
def long():
    impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots"  # args["ip"]
    compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites"  # args["cp"]
    goodUris = []
    origuris = []
    with open("gooduris_20160225.txt", "r") as read:
        for uri in map(lambda line: line.rstrip("\n"), read):
            goodUris.append(uri)

    with open("origuris.txt", "r") as read:
        for uri in map(lambda line: line.rstrip("\n"), read):
            origuris.append(uri)

    compisits = get_files(impath, lambda f: filterASI(f) and check_if_goodURI(f, goodUris))
    useragent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:44.0) Gecko/20100101 Firefox/44.01'
    session = requests.Session()
    session.headers.update({'User-Agent': useragent})
    got = {}
    gotURIs = []
    with open("tms2.json", "w+") as out:
        out.write("{ tms:[")
        for it in sorted(origuris):
            # tm = TimeMap("www.%s"%it)
            # print(tm.mementos)
            request = session.get("http://web.archive.org/web/timemap/json/www.%s" % it)
            # got[it] = {"tmuri":"http://web.archive.org/web/timemap/json/www.%s"%it,'uri':it,"tms":json.loads(
            # request.text)}
            try:
                got = json.loads(request.text)
                jsn = json.dumps(got)
                print(jsn + "\n")
                out.write("[" + jsn + "],\n")
                gotURIs.append(it)
            except ValueError:

                print(request.text)
                print(request.headers)
                print("\n\n")
    session.close()
示例#4
0
def generate():
    impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots"  # args["ip"]
    compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites"  # args["cp"]
    with open("tms2.json", "r") as tm:
        it = json.load(tm)

    goodUris = []
    with open("gooduris_20160225.txt", "r") as read:
        for uri in map(lambda line: line.rstrip("\n"), read):
            goodUris.append(uri)

    color = {} # type: dict[str,cs]
    with open("temporalPairs.csv","r") as read:
        reader = csv.DictReader(read)
        for row in reader:
            color[row['site']] = cs(row)


    with open("compositeToComposite.csv","r") as read:
        reader = csv.DictReader(read)
        for row in reader:
            arsim = row['alsumRandomSim']
            atsim = row['alsumTemporalSim']
            color[row['site']].ctcRsim = arsim
            color[row['site']].ctcTsim = atsim

    with open("alSumVSrandom_onetoone.csv","r") as read:
        reader = csv.DictReader(read)
        for row in reader:
            arsim = row['average']
            color[row['site']].otoRsim = arsim

    with open("alSumVStemporalInterval_onetoone.csv","r") as read:
        reader = csv.DictReader(read)
        for row in reader:
            arsim = row['average']
            color[row['site']].otoTsim = arsim


    with open("wins.csv","r") as read:
        reader = csv.DictReader(read)
        for row in reader:
            if color.get(row['site'],None) is not None:
                color[row['site']].won['r'] = row['awr']
                color[row['site']].won['ti'] = row['awt']



    tms = it['tms']
    timeMaps = {}  # type: dict[str,TM]

    for s in tms:
        it = TM(s)
        timeMaps[it.getURIKey()] = it

    tmk = list(filter(lambda x: len(x) > 2, timeMaps.keys()))


    compisits = get_files(compath, lambda f: "allTheSame" not in f and check_if_goodURI(f, goodUris) and
                                             "interval" not in f)


    print(compisits)

    uniqueComposite = set()
    for c in compisits:
       uniqueComposite.add(gsite(c))
    compisits = sorted(list(uniqueComposite))


    # 640 641

            # self.site, self.alSum, self.random, self.aVr, self.temporal, self.aVt,
            # self.tmNumMementos, self.tmTimeSpan,
            # self.tmNumM2k, self.tmTimeSpan2k, self.tmNumM05k, self.tmTimeSpan05k
            # self.won['Random'],
            # self.won['TemporalInterval']
    with open("allTm2.csv","w+") as out:
        out.write("site,ah,mh,mdif,nmemento,timespan,nummtwo,twotimespan,numof,timespanof,aWP,moto,mtcr,method\n")
        for c in sorted(compisits):
            # print(c)
            for tmkey in filter(lambda x: len(x) > 2, tmk):
                if tmkey in c:
                    print(timeMaps[tmkey].timeSpan(), timeMaps[tmkey].numMentos,
                          timeMaps[tmkey].timeSpanAfter(2000), timeMaps[tmkey].numMementosAfter(2000),
                          timeMaps[tmkey].timeSpanAfter(2005), timeMaps[tmkey].numMementosAfter(2005))
                    cc = color[tmkey]
                    cc.setTMInfo(timeMaps[tmkey].timeSpan(), timeMaps[tmkey].numMentos,
                          timeMaps[tmkey].timeSpanAfter(2000), timeMaps[tmkey].numMementosAfter(2000),
                          timeMaps[tmkey].timeSpanAfter(2005), timeMaps[tmkey].numMementosAfter(2005))
                    out.write(cc.getRString())
                    out.write(cc.getTString())
                    print("______________________________")