def main(argv): global maxId # Named Entity ids: <Name, id> ids = loadDataSet() maxId = getMaxId(ids) newRelations = [] nlp = spacy.load("en_core_web_sm") # Processes all wikipedia articles and prompts user to annotate for relations for entry in os.scandir('../wikipediaArticles/'): if entry.path.endswith('.txt') and entry.is_file(): lines = loadFile(entry.path) for line in lines: doc = nlp(line) for idx, token in enumerate(doc): print(idx, token, sep='_', end=' ') print('\n') print(doc) tokens = [token.text for token in doc] addRelations(tokens, ids, newRelations) # Write new relations to data file for relation in newRelations: with open('newRelations.txt', 'a') as the_file: the_file.write(json.dumps(jsons.dump(relation)) + '\n')
def getFontPostscriptName(self, filename): # we load at most 10 MB to avoid a denial-of-service attack by # passing around scripts containing references to fonts with # filenames like "/dev/zero" etc. no real font that I know of is # this big so it shouldn't hurt. fontProgram = util.loadFile(filename, cfgFrame, 10 * 1024 * 1024) if fontProgram is None: return "" f = truetype.Font(fontProgram) if not f.isOK(): wx.MessageBox( "File '%s'\n" "does not appear to be a valid TrueType font." % filename, "Error", wx.OK, cfgFrame ) return "" if not f.allowsEmbedding(): wx.MessageBox( "Font '%s'\n" "does not allow embedding in its license terms.\n" "You may encounter problems using this font" " embedded." % filename, "Error", wx.OK, cfgFrame, ) return f.getPostscriptName()
def importAstx(fileName, frame): # astx files are xml files. The textlines can be found under # AdobeStory/document/stream/section/scene/paragraph which contain # one or more textRun/break elements, to be joined. The paragraph # attribute "element" gives us the element style. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None elemMap = { "Action" : screenplay.ACTION, "Character" : screenplay.CHARACTER, "Dialog" : screenplay.DIALOGUE, "Parenthetical" : screenplay.PAREN, "SceneHeading" : screenplay.SCENE, "Shot" : screenplay.SHOT, "Transition" : screenplay.TRANSITION, } try: root = etree.XML(data) except etree.XMLSyntaxError, e: wx.MessageBox("Error parsing file: %s" %e, "Error", wx.OK, frame) return None
def importAstx(fileName, frame): # astx files are xml files. The textlines can be found under # AdobeStory/document/stream/section/scene/paragraph which contain # one or more textRun/break elements, to be joined. The paragraph # attribute "element" gives us the element style. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None elemMap = { "Action": screenplay.ACTION, "Character": screenplay.CHARACTER, "Dialog": screenplay.DIALOGUE, "Parenthetical": screenplay.PAREN, "SceneHeading": screenplay.SCENE, "Shot": screenplay.SHOT, "Transition": screenplay.TRANSITION, } try: root = etree.XML(data) except etree.XMLSyntaxError, e: wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame) return None
def baselinermse(namesearch, directories, isArm=True): filenames = list(set(util.getFilenames(directories, parentDir=util.comparisonDir, namesearch=namesearch))) rmse = [] rmseZero = [] rmseMean = [] for filename in filenames: data = util.loadFile(filename) predForces = np.array(data['predForces']) predActivations = np.array(data['predActivations']) yForces = np.array(data['yForces']) yActivations = np.array(data['yActivations']) predForces[predActivations < 0] = 0 predForces[predForces < 0] = 0 yForces[yActivations < 0] = 0 rmse.append(np.sqrt(np.mean(np.square(yForces - predForces)))) rmseZero.append(np.sqrt(np.mean(np.square(yForces)))) rmseMean.append(np.sqrt(np.mean(np.square(yForces - np.mean(yForces))))) print '-'*5, 'Baseline Gown Simulation' if isArm else 'Baseline Shorts Simulation', '-'*5 print 'RMSE across all %d sequences:' % len(filenames), np.mean(rmse) print 'RMSE for estimation of zero:', np.mean(rmseZero) print 'RMSE for estimation of mean of sequence:', np.mean(rmseMean) print 'RMSE standard deviation:', np.std(rmse) return np.mean(rmse)
def importCeltx(fileName, frame): # Celtx files are zipfiles, and the script content is within a file # called "script-xxx.html", where xxx can be random. # the 5 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None buf = StringIO.StringIO(data) try: z = zipfile.ZipFile(buf) except: wx.MessageBox("File is not a valid Celtx script file.", "Error", wx.OK, frame) return None files = z.namelist() scripts = [s for s in files if s.startswith("script")] if len(scripts) == 0: wx.MessageBox("Unable to find script in this Celtx file.", "Error", wx.OK, frame) return None f = z.open(scripts[0]) content = f.read() z.close() if not content: wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame) return None elemMap = { "action": screenplay.ACTION, "character": screenplay.CHARACTER, "dialog": screenplay.DIALOGUE, "parenthetical": screenplay.PAREN, "sceneheading": screenplay.SCENE, "shot": screenplay.SHOT, "transition": screenplay.TRANSITION, "act": screenplay.ACTBREAK, } try: parser = etree.HTMLParser() root = etree.XML(content, parser) except etree.XMLSyntaxError, e: wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame) return None
def importCeltx(fileName, frame): # Celtx files are zipfiles, and the script content is within a file # called "script-xxx.html", where xxx can be random. # the 5 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None buf = StringIO.StringIO(data) try: z = zipfile.ZipFile(buf) except: wx.MessageBox("File is not a valid Celtx script file.", "Error", wx.OK, frame) return None files = z.namelist() scripts = [s for s in files if s.startswith("script") ] if len(scripts) == 0: wx.MessageBox("Unable to find script in this Celtx file.", "Error", wx.OK, frame) return None f = z.open(scripts[0]) content = f.read() z.close() if not content: wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame) return None elemMap = { "action" : screenplay.ACTION, "character" : screenplay.CHARACTER, "dialog" : screenplay.DIALOGUE, "parenthetical" : screenplay.PAREN, "sceneheading" : screenplay.SCENE, "shot" : screenplay.SHOT, "transition" : screenplay.TRANSITION, "act" : screenplay.ACTBREAK, } try: parser = etree.HTMLParser() root = etree.XML(content, parser) except etree.XMLSyntaxError, e: wx.MessageBox("Error parsing file: %s" %e, "Error", wx.OK, frame) return None
def npz_load(inp, name): data = util.loadFile(inp) start_time = util.now() print("[Loading %s:%s...]" % (inp, name), ) sys.stdout.flush() loaded = data[name] print("[Took %d milliseconds]" % (util.now() - start_time)) return loaded
def process(args): f = util.loadFile(args.input) output_dir = './output' if args.output: output_dir = args.output util.writeDir(output_dir) #encoding print('---encoding') e = encoding.encode(f, args.token) userID = e.get_userID() itemID = e.get_itemID() adjlist = e.get_adjlist() user_train, item_train, value_train = e.output4FM() if args.format == 'FM': if args.sampling == True: #zero sampling print('---Zero samping') zero_user, zero_item, zero_value = sampling.get_zero( sampling.zeroSampling(adjlist) ) user_train.extend(zero_user) item_train.extend(zero_item) value_train.extend(zero_value) #Testing print('---Create Testing Data') test_user, test_item, test_value = build.build(len(userID), len(itemID)) #save print('---Save') util.saveFile('{0}/userID'.format(output_dir), userID) util.saveFile('{0}/itemID'.format(output_dir), itemID) if args.format == 'deepwalk-bipartite': #deepwalk util.saveFile('{0}/adjlist'.format(output_dir), adjlist) elif args.format == 'FM': #FM util.saveFile('{0}/rel-user'.format(output_dir), ['0 {0}:1'.format(i) for i in range(len(userID))]) util.saveFile('{0}/rel-item'.format(output_dir), ['0 {0}:1'.format(i) for i in range(len(itemID))]) util.saveFile('{0}/rel-user.train'.format(output_dir), user_train) util.saveFile('{0}/rel-item.train'.format(output_dir), item_train) util.saveFile('{0}/ans.train'.format(output_dir), value_train) util.saveFile('{0}/rel-user.test'.format(output_dir), test_user) util.saveFile('{0}/rel-item.test'.format(output_dir), test_item) util.saveFile('{0}/ans.test'.format(output_dir), test_value)
def importFadein(fileName, frame): # Fadein file is a zipped document.xml file. # the .xml is in open screenplay format: # http://sourceforge.net/projects/openscrfmt/files/latest/download # the 5 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None buf = StringIO.StringIO(data) try: z = zipfile.ZipFile(buf) f = z.open("document.xml") content = f.read() z.close() except: wx.MessageBox("File is not a valid .fadein file.", "Error", wx.OK, frame) return None if not content: wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame) return None elemMap = { "Action": screenplay.ACTION, "Character": screenplay.CHARACTER, "Dialogue": screenplay.DIALOGUE, "Parenthetical": screenplay.PAREN, "Scene Heading": screenplay.SCENE, "Shot": screenplay.SHOT, "Transition": screenplay.TRANSITION, } try: root = etree.XML(content) except etree.XMLSyntaxError, e: wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame) return None
def importFadein(fileName, frame): # Fadein file is a zipped document.xml file. # the .xml is in open screenplay format: # http://sourceforge.net/projects/openscrfmt/files/latest/download # the 5 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None buf = StringIO.StringIO(data) try: z = zipfile.ZipFile(buf) f = z.open("document.xml") content = f.read() z.close() except: wx.MessageBox("File is not a valid .fadein file.", "Error", wx.OK, frame) return None if not content: wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame) return None elemMap = { "Action" : screenplay.ACTION, "Character" : screenplay.CHARACTER, "Dialogue" : screenplay.DIALOGUE, "Parenthetical" : screenplay.PAREN, "Scene Heading" : screenplay.SCENE, "Shot" : screenplay.SHOT, "Transition" : screenplay.TRANSITION, } try: root = etree.XML(content) except etree.XMLSyntaxError, e: wx.MessageBox("Error parsing file: %s" %e, "Error", wx.OK, frame) return None
def loadDataSet(): print('Loading dataset...') ids = {} for entry in os.scandir( './'): #debug: change this back to the wiki80 filepath if entry.path.endswith('.txt') and entry.is_file(): lines = loadFile(entry.path) for line in lines: # Parse json ex = json.loads(line) # add to entity ids if ex['h']['name'] not in ids: ids[ex['h']['name']] = ex['h']['id'] if ex['t']['name'] not in ids: ids[ex['t']['name']] = ex['t']['id'] return ids
def plotComparison(namesearch, directories, title, plottag='', isArm=True, xRotation=False, yRotation=False, zRotation=False): filenames = list(set(util.getFilenames(directories, parentDir=util.comparisonDir, namesearch=namesearch))) rmse = dict() for filename in filenames: data = util.loadFile(filename) if xRotation or yRotation or zRotation: if isArm: velocity = data['rotateFist'][-1] else: velocity = data['rotateArm'][-1] else: velocity = float(filename[-9:-5]) predForces = np.array(data['predForces']) predActivations = np.array(data['predActivations']) yForces = np.array(data['yForces']) yActivations = np.array(data['yActivations']) predForces[predActivations < 0] = 0 predForces[predForces < 0] = 0 yForces[yActivations < 0] = 0 if velocity not in rmse: rmse[velocity] = [] rmse[velocity].append(np.sqrt(np.mean(np.square(yForces - predForces)))) print '-'*5, 'Gown Simulation Variation' if isArm else 'Shorts Simulation Variation', '-'*5 xVel = [] yRMSE = [] for vel, msevalues in rmse.iteritems(): xVel.append(vel) yRMSE.append(np.mean(msevalues)) xVel, yRMSE = (list(t) for t in zip(*sorted(zip(xVel, yRMSE)))) if xRotation or yRotation or zRotation: xVel = np.degrees(xVel) print 'RMSE %s rotations:' % ('x' if xRotation else 'y' if yRotation else 'z'), xVel else: print 'RMSE velocities:', xVel print 'RMSE:', yRMSE return xVel, yRMSE
def loadQuotes(parent): try: data = util.loadFile(misc.getFullPath("resources/quotes.txt"), parent) if data is None: return data = data.decode("utf-8") lines = data.splitlines() quotes = [] # lines saved for current quote being processed tmp = [] for i, line in enumerate(lines): if line.startswith(u"#") or not line.strip(): continue if line.startswith(u" "): if not tmp: raise Exception( "No lines defined for quote at line %d" % (i + 1)) if len(tmp) > 3: raise Exception( "Too many lines defined for quote at line %d" % (i + 1)) quotes.append(Quote(line.strip(), tmp)) tmp = [] else: tmp.append(line.strip()) if tmp: raise Exception("Last quote does not have source") SplashWindow.quotes = quotes except Exception, e: wx.MessageBox("Error loading quotes: %s" % str(e), "Error", wx.OK, parent)
def loadQuotes(parent): try: data = util.loadFile(misc.getFullPath("resources/quotes.txt"), parent) if data is None: return data = data.decode("utf-8") lines = data.splitlines() quotes = [] # lines saved for current quote being processed tmp = [] for i, line in enumerate(lines): if line.startswith(u"#") or not line.strip(): continue if line.startswith(u" "): if not tmp: raise Exception("No lines defined for quote at line %d" % (i + 1)) if len(tmp) > 3: raise Exception("Too many lines defined for quote at line %d" % (i + 1)) quotes.append(Quote(line.strip(), tmp)) tmp = [] else: tmp.append(line.strip()) if tmp: raise Exception("Last quote does not have source") SplashWindow.quotes = quotes except Exception, e: wx.MessageBox("Error loading quotes: %s" % str(e), "Error", wx.OK, parent)
def multiBatch(directories, seqCount=0): ''' Loads several mini-batches at a time and yields all of the data as a single dataset, X, and single label set, Y. When seqCount = 0, all available sequences are returned at once. ''' # Get all mini-batch files within the specified directories filenames = util.getFilenames(directories, parentDir=util.batchDir) # Loop over all mini-batch files and cumulate all data into a single dataset X = [] Y = [] for i, filename in enumerate(filenames): # Load a mini-batch from file and combine data data = util.loadFile(filename) X.extend(data['X'].tolist()) Y.extend(data['Y'].tolist()) if seqCount > 0 and len(X) / seqCount >= 1: # Yield this collection of batches, then empty X, Y for the next collection of batches data = None yield np.array(X), np.array(Y) X = [] Y = [] if X: # Yield any remaining data yield np.array(X), np.array(Y)
def importFDX(fileName, frame): elemMap = { "Action" : screenplay.ACTION, "Character" : screenplay.CHARACTER, "Dialogue" : screenplay.DIALOGUE, "Parenthetical" : screenplay.PAREN, "Scene Heading" : screenplay.SCENE, "Shot" : screenplay.SHOT, "Transition" : screenplay.TRANSITION, } # the 5 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None try: root = etree.XML(data) lines = [] for para in root.xpath("Content//Paragraph"): et = para.get("Type") # "General" has embedded Dual Dialogue paragraphs inside it; # nothing to do for the General element itself. if et == "General": continue # all unknown linetypes are converted to Action lt = elemMap.get(et, screenplay.ACTION) s = u"" for text in para.xpath("Text"): # text.text is None for paragraphs with no text, and += # blows up trying to add a string object and None, so # guard against that if text.text: s += text.text # FD uses some fancy unicode apostrophe, replace it with a # normal one s = s.replace(u"\u2019", "'") s = util.toInputStr(util.toLatin1(s)) lines.append(screenplay.Line(screenplay.LB_LAST, lt, s)) if len(lines) == 0: wx.MessageBox("The file contains no importable lines", "Error", wx.OK, frame) return None return lines except etree.XMLSyntaxError, e: wx.MessageBox("Error parsing file: %s" %e, "Error", wx.OK, frame) return None
def importTextFile(fileName, frame): # the 1 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 1000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None data = util.fixNL(data) lines = data.split("\n") tabWidth = 4 # key = indent level, value = Indent indDict = {} for i in range(len(lines)): s = util.toInputStr(lines[i].rstrip().expandtabs(tabWidth)) # don't count empty lines towards indentation statistics if s.strip() == "": lines[i] = "" continue cnt = util.countInitial(s, " ") ind = indDict.get(cnt) if not ind: ind = Indent(cnt) indDict[cnt] = ind tmp = s.upper() if util.multiFind(tmp, ["EXT.", "INT."]): ind.sceneStart += 1 if util.multiFind(tmp, ["CUT TO:", "DISSOLVE TO:"]): ind.trans += 1 if re.match(r"^ +\(.*\)$", tmp): ind.paren += 1 ind.lines.append(s.lstrip()) lines[i] = s if len(indDict) == 0: wx.MessageBox("File contains only empty lines.", "Error", wx.OK, frame) return None # scene/action indent setType(SCENE_ACTION, indDict, lambda v: v.sceneStart) # indent with most lines is dialogue in non-pure-action scripts setType(screenplay.DIALOGUE, indDict, lambda v: len(v.lines)) # remaining indent with lines is character most likely setType(screenplay.CHARACTER, indDict, lambda v: len(v.lines)) # transitions setType(screenplay.TRANSITION, indDict, lambda v: v.trans) # parentheticals setType(screenplay.PAREN, indDict, lambda v: v.paren) # some text files have this type of parens: # # JOE # (smiling and # hopping along) # # this handles them. parenIndent = findIndent(indDict, lambda v: v.lt == screenplay.PAREN) if parenIndent != -1: paren2Indent = findIndent( indDict, lambda v, var: (v.lt == -1) and (v.indent == var), parenIndent + 1) if paren2Indent != -1: indDict[paren2Indent].lt = screenplay.PAREN # set line type to ACTION for any indents not recognized for v in indDict.itervalues(): if v.lt == -1: v.lt = screenplay.ACTION dlg = ImportDlg(frame, indDict.values()) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None dlg.Destroy() ret = [] for i in range(len(lines)): s = lines[i] cnt = util.countInitial(s, " ") s = s.lstrip() sUp = s.upper() if s: lt = indDict[cnt].lt if lt == IGNORE: continue if lt == SCENE_ACTION: if s.startswith("EXT.") or s.startswith("INT."): lt = screenplay.SCENE else: lt = screenplay.ACTION if ret and (ret[-1].lt != lt): ret[-1].lb = screenplay.LB_LAST if lt == screenplay.CHARACTER: if sUp.endswith("(CONT'D)"): s = sUp[:-8].rstrip() elif lt == screenplay.PAREN: if s == "(continuing)": s = "" if s: line = screenplay.Line(screenplay.LB_SPACE, lt, s) ret.append(line) elif ret: ret[-1].lb = screenplay.LB_LAST if len(ret) == 0: ret.append(screenplay.Line(screenplay.LB_LAST, screenplay.ACTION)) # make sure the last line ends an element ret[-1].lb = screenplay.LB_LAST return ret
def importFountain(fileName, frame): # regular expressions for fountain markdown. # https://github.com/vilcans/screenplain/blob/master/screenplain/richstring.py ire = re.compile( # one star r'\*' # anything but a space, then text r'([^\s].*?)' # finishing with one star r'\*' # must not be followed by star r'(?!\*)') bre = re.compile( # two stars r'\*\*' # must not be followed by space r'(?=\S)' # inside text r'(.+?[*_]*)' # finishing with two stars r'(?<=\S)\*\*') ure = re.compile( # underline r'_' # must not be followed by space r'(?=\S)' # inside text r'([^_]+)' # finishing with underline r'(?<=\S)_') boneyard_re = re.compile('/\\*.*?\\*/', flags=re.DOTALL) # random magicstring used to escape literal star '\*' literalstar = "Aq7RR" # returns s with markdown formatting removed. def unmarkdown(s): s = s.replace("\\*", literalstar) for style in (bre, ire, ure): s = style.sub(r'\1', s) return s.replace(literalstar, "*") data = util.loadFile(fileName, frame, 1000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None inf = [] inf.append(misc.CheckBoxItem("Import titles as action lines.")) inf.append(misc.CheckBoxItem("Remove unsupported formatting markup.")) inf.append(misc.CheckBoxItem("Import section/synopsis as notes.")) dlg = misc.CheckBoxDlg(frame, "Fountain import options", inf, "Import options:", False) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None importTitles = inf[0].selected removeMarkdown = inf[1].selected importSectSyn = inf[2].selected # pre-process data - fix newlines, remove boneyard. data = util.fixNL(data) data = boneyard_re.sub('', data) prelines = data.split("\n") for i in xrange(len(prelines)): try: util.toLatin1(prelines[i]) except: prelines[i] = util.cleanInput( u"" + prelines[i].decode('UTF-8', "ignore")) lines = [] tabWidth = 4 lns = [] sceneStartsList = ("INT", "EXT", "EST", "INT./EXT", "INT/EXT", "I/E", "I./E") TWOSPACE = " " skipone = False # First check if title lines are present: c = 0 while c < len(prelines): if prelines[c] != "": c = c + 1 else: break # prelines[0:i] are the first bunch of lines, that could be titles. # Our check for title is simple: # - the line does not start with 'fade' # - the first line has a single ':' if c > 0: l = util.toInputStr(prelines[0].expandtabs(tabWidth).lstrip().lower()) if not l.startswith("fade") and l.count(":") == 1: # these are title lines. Now do what the user requested. if importTitles: # add TWOSPACE to all the title lines. for i in xrange(c): prelines[i] += TWOSPACE else: #remove these lines prelines = prelines[c + 1:] for l in prelines: if l != TWOSPACE: lines.append(util.toInputStr(l.expandtabs(tabWidth))) else: lines.append(TWOSPACE) linesLen = len(lines) def isPrevEmpty(): if lns and lns[-1].text == "": return True return False def isPrevType(ltype): return (lns and lns[-1].lt == ltype) # looks ahead to check if next line is not empty def isNextEmpty(i): return (i + 1 < len(lines) and lines[i + 1] == "") def getPrevType(): if lns: return lns[-1].lt else: return screenplay.ACTION def isParen(s): return (s.startswith('(') and s.endswith(')')) def isScene(s): if s.endswith(TWOSPACE): return False if s.startswith(".") and not s.startswith(".."): return True tmp = s.upper() if (re.match(r'^(INT|EXT|EST)[ .]', tmp) or re.match(r'^(INT\.?/EXT\.?)[ .]', tmp) or re.match(r'^I/E[ .]', tmp)): return True return False def isTransition(s): return ((s.isupper() and s.endswith("TO:")) or (s.startswith(">") and not s.endswith("<"))) def isCentered(s): return s.startswith(">") and s.endswith("<") def isPageBreak(s): return s.startswith('===') and s.lstrip('=') == '' def isNote(s): return s.startswith("[[") and s.endswith("]]") def isSection(s): return s.startswith("#") def isSynopsis(s): return s.startswith("=") and not s.startswith("==") # first pass - identify linetypes for i in range(linesLen): if skipone: skipone = False continue s = lines[i] sl = s.lstrip() # mark as ACTION by default. line = screenplay.Line(screenplay.LB_FORCED, screenplay.ACTION, s) # Start testing lines for element type. Go in order: # Scene Character, Paren, Dialog, Transition, Note. if s == "" or isCentered(s) or isPageBreak(s): # do nothing - import as action. pass elif s == TWOSPACE: line.lt = getPrevType() elif isScene(s): line.lt = screenplay.SCENE if sl.startswith('.'): line.text = sl[1:] else: line.text = sl elif isTransition(sl) and isPrevEmpty() and isNextEmpty(i): line.lt = screenplay.TRANSITION if line.text.startswith('>'): line.text = sl[1:].lstrip() elif s.isupper() and isPrevEmpty() and not isNextEmpty(i): line.lt = screenplay.CHARACTER if s.endswith(TWOSPACE): line.lt = screenplay.ACTION elif isParen(sl) and (isPrevType(screenplay.CHARACTER) or isPrevType(screenplay.DIALOGUE)): line.lt = screenplay.PAREN elif (isPrevType(screenplay.CHARACTER) or isPrevType(screenplay.DIALOGUE) or isPrevType(screenplay.PAREN)): line.lt = screenplay.DIALOGUE elif isNote(sl): line.lt = screenplay.NOTE line.text = sl.strip('[]') elif isSection(s) or isSynopsis(s): if not importSectSyn: if isNextEmpty(i): skipone = True continue line.lt = screenplay.NOTE line.text = sl.lstrip('=#') if line.text == TWOSPACE: pass elif line.lt != screenplay.ACTION: line.text = line.text.lstrip() else: tmp = line.text.rstrip() # we don't support center align, so simply add required indent. if isCentered(tmp): tmp = tmp[1:-1].strip() width = frame.panel.ctrl.sp.cfg.getType( screenplay.ACTION).width if len(tmp) < width: tmp = ' ' * ((width - len(tmp)) // 2) + tmp line.text = tmp if removeMarkdown: line.text = unmarkdown(line.text) if line.lt == screenplay.CHARACTER and line.text.endswith('^'): line.text = line.text[:-1] lns.append(line) ret = [] # second pass helper functions. def isLastLBForced(): return ret and ret[-1].lb == screenplay.LB_FORCED def makeLastLBLast(): if ret: ret[-1].lb = screenplay.LB_LAST def isRetPrevType(t): return ret and ret[-1].lt == t # second pass - remove unneeded empty lines, and fix the linebreaks. for ln in lns: if ln.text == '': if isLastLBForced(): makeLastLBLast() else: ret.append(ln) elif not isRetPrevType(ln.lt): makeLastLBLast() ret.append(ln) else: ret.append(ln) makeLastLBLast() return ret
def importFDX(fileName, frame): elemMap = { "Action": screenplay.ACTION, "Character": screenplay.CHARACTER, "Dialogue": screenplay.DIALOGUE, "Parenthetical": screenplay.PAREN, "Scene Heading": screenplay.SCENE, "Shot": screenplay.SHOT, "Transition": screenplay.TRANSITION, } # the 5 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None try: root = etree.XML(data) lines = [] def addElem(eleType, eleText): lns = eleText.split("\n") # if elem ends in a newline, last line is empty and useless; # get rid of it if not lns[-1] and (len(lns) > 1): lns = lns[:-1] for s in lns[:-1]: lines.append( screenplay.Line(screenplay.LB_FORCED, eleType, util.cleanInput(s))) lines.append( screenplay.Line(screenplay.LB_LAST, eleType, util.cleanInput(lns[-1]))) for para in root.xpath("Content//Paragraph"): addedNote = False et = para.get("Type") # Check for script notes s = u"" for notes in para.xpath("ScriptNote/Paragraph/Text"): if notes.text: s += notes.text # FD has AdornmentStyle set to "0" on notes with newline. if notes.get("AdornmentStyle") == "0": s += "\n" if s: addElem(screenplay.NOTE, s) addedNote = True # "General" has embedded Dual Dialogue paragraphs inside it; # nothing to do for the General element itself. # # If no type is defined (like inside scriptnote), skip. if (et == "General") or (et is None): continue s = u"" for text in para.xpath("Text"): # text.text is None for paragraphs with no text, and += # blows up trying to add a string object and None, so # guard against that if text.text: s += text.text # don't remove paragraphs with no text, unless that paragraph # contained a scriptnote if s or not addedNote: lt = elemMap.get(et, screenplay.ACTION) addElem(lt, s) if len(lines) == 0: wx.MessageBox("The file contains no importable lines", "Error", wx.OK, frame) return None return lines except etree.XMLSyntaxError, e: wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame) return None
def importCeltx(fileName, frame): # Celtx files are zipfiles, and the script content is within a file # called "script-xxx.html", where xxx can be random. # the 5 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None buf = io.StringIO(data) try: z = zipfile.ZipFile(buf) except: wx.MessageBox("File is not a valid Celtx script file.", "Error", wx.OK, frame) return None files = z.namelist() scripts = [s for s in files if s.startswith("script")] if len(scripts) == 0: wx.MessageBox("Unable to find script in this Celtx file.", "Error", wx.OK, frame) return None f = z.open(scripts[0]) content = f.read() z.close() if not content: wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame) return None elemMap = { "action": screenplay.ACTION, "character": screenplay.CHARACTER, "dialog": screenplay.DIALOGUE, "parenthetical": screenplay.PAREN, "sceneheading": screenplay.SCENE, "shot": screenplay.SHOT, "transition": screenplay.TRANSITION, "act": screenplay.ACTBREAK, } try: parser = etree.HTMLParser() root = etree.XML(content, parser) except etree.XMLSyntaxError as e: wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame) return None lines = [] def addElem(eleType, lns): # if elem ends in a newline, last line is empty and useless; # get rid of it if not lns[-1] and (len(lns) > 1): lns = lns[:-1] for s in lns[:-1]: lines.append( screenplay.Line(screenplay.LB_FORCED, eleType, util.cleanInput(s))) lines.append( screenplay.Line(screenplay.LB_LAST, eleType, util.cleanInput(lns[-1]))) for para in root.xpath("/html/body/p"): items = [] for line in para.itertext(): items.append(str(line.replace("\n", " "))) lt = elemMap.get(para.get("class"), screenplay.ACTION) if items: addElem(lt, items) if len(lines) == 0: wx.MessageBox("The file contains no importable lines", "Error", wx.OK, frame) return None return lines
def importAstx(fileName, frame): # astx files are xml files. The textlines can be found under # AdobeStory/document/stream/section/scene/paragraph which contain # one or more textRun/break elements, to be joined. The paragraph # attribute "element" gives us the element style. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None elemMap = { "Action": screenplay.ACTION, "Character": screenplay.CHARACTER, "Dialog": screenplay.DIALOGUE, "Parenthetical": screenplay.PAREN, "SceneHeading": screenplay.SCENE, "Shot": screenplay.SHOT, "Transition": screenplay.TRANSITION, } try: root = etree.XML(data) except etree.XMLSyntaxError as e: wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame) return None lines = [] def addElem(eleType, items): # if elem ends in a newline, last line is empty and useless; # get rid of it if not items[-1] and (len(items) > 1): items = items[:-1] for s in items[:-1]: lines.append( screenplay.Line(screenplay.LB_FORCED, eleType, util.cleanInput(s))) lines.append( screenplay.Line(screenplay.LB_LAST, eleType, util.cleanInput(items[-1]))) for para in root.xpath( "/AdobeStory/document/stream/section/scene/paragraph"): lt = elemMap.get(para.get("element"), screenplay.ACTION) items = [] s = "" for text in para: if text.tag == "textRun" and text.text: s += text.text elif text.tag == "break": items.append(s.rstrip()) s = "" items.append(s.rstrip()) addElem(lt, items) if not lines: wx.MessageBox("File has no content.", "Error", wx.OK, frame) return None return lines
def main(argv): if len(sys.argv) < 2: print("pass filename") sys.exit(2) print("loading " + argv[0]) texts = loadFile(argv[0]) # debug # texts = ['Rami Eid is studying at Stony Brook University in New York.', # 'Blounts Creek is a small unincorporated rural community in Beaufort County, North Carolina, United States, near a creek with the same name.'] # task 1 #task1(texts[0]) nlp = spacy.load("en_core_web_sm") for idx, doc in enumerate(nlp.pipe(texts, disable=["tagger", "parser"])): print("Named Entities:", [(ent.text, ent.label_) for ent in doc.ents]) # Represent entity graph as dictionary: <Entity name, Node> nodes = buildEntityGraph(doc, texts[idx]) # verifying graph print("Graph:") printGraph(nodes) # Find maximal cliques and clique weights print("BRON-KERBOSCH") sys.setrecursionlimit(2000) cliques = bron_kerbosch(list(nodes.values())) print("cliques:", cliques) # if the clique contains certain types of relations, then we fill them into the complex relation / template workTemplates = [] partTemplates = [] for clique in cliques: for node in clique: for edge in node.weightedEdges: if edge.dst in clique: tryAddWorkTemplate(edge, workTemplates) tryAddPartTemplate(edge, partTemplates) #tryAddBuyTemplate(edge, partTemplates) # verifying template filling for work in workTemplates: print('Work:', work.person, work.org, work.title, work.location, sep=', ') for part in partTemplates: print(part.part, part.whole, sep=' part of ') # writing templates to json output out = [] for template in workTemplates: arguments = {} arguments['1'] = template.person or "" arguments['2'] = template.org or "" arguments['3'] = template.title or "" arguments['4'] = template.location or "" extraction = Extraction('WORK', [token.text for token in doc], arguments) output = Output(argv[0], extraction) out.append(output) for template in partTemplates: arguments = {} arguments['1'] = template.part or "" arguments['2'] = template.whole or "" extraction = Extraction('PART', [token.text for token in doc], arguments) output = Output(argv[0], extraction) out.append(output) # Write new relations to data file jsons.suppress_warnings() for output in out: with open(str(argv[0])[:-4] + '.json', 'a') as the_file: the_file.write(json.dumps(jsons.dump(output)) + '\n')
def importFadein(fileName, frame): # Fadein file is a zipped document.xml file. # the .xml is in open screenplay format: # http://sourceforge.net/projects/openscrfmt/files/latest/download # the 5 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None buf = io.StringIO(data) try: z = zipfile.ZipFile(buf) f = z.open("document.xml") content = f.read() z.close() except: wx.MessageBox("File is not a valid .fadein file.", "Error", wx.OK, frame) return None if not content: wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame) return None elemMap = { "Action": screenplay.ACTION, "Character": screenplay.CHARACTER, "Dialogue": screenplay.DIALOGUE, "Parenthetical": screenplay.PAREN, "Scene Heading": screenplay.SCENE, "Shot": screenplay.SHOT, "Transition": screenplay.TRANSITION, } try: root = etree.XML(content) except etree.XMLSyntaxError as e: wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame) return None lines = [] def addElem(eleType, lns): # if elem ends in a newline, last line is empty and useless; # get rid of it if not lns[-1] and (len(lns) > 1): lns = lns[:-1] for s in lns[:-1]: lines.append( screenplay.Line(screenplay.LB_FORCED, eleType, util.cleanInput(s))) lines.append( screenplay.Line(screenplay.LB_LAST, eleType, util.cleanInput(lns[-1]))) # removes html formatting from s, and returns list of lines. # if s is None, return a list with single empty string. re_rem = [r'<font[^>]*>', r'<size[^>]*>', r'<bgcolor[^>]*>'] rem = [ "<b>", "</b>", "<i>", "</i>", "<u>", "</u>", "</font>", "</size>", "</bgcolor>" ] def sanitizeStr(s): if s: s = "" + s for r in re_rem: s = re.sub(r, "", s) for r in rem: s = s.replace(r, "") if s: return s.split("<br>") else: return [""] else: return [""] for para in root.xpath("paragraphs/para"): # check for notes/synopsis, import as Note. if para.get("note"): lt = screenplay.NOTE items = sanitizeStr("" + para.get("note")) addElem(lt, items) if para.get("synopsis"): lt = screenplay.NOTE items = sanitizeStr("" + para.get("synopsis")) addElem(lt, items) # look for the <style> and <text> tags. Bail if no <text> found. styl = para.xpath("style") txt = para.xpath("text") if txt: if styl: lt = elemMap.get(styl[0].get("basestylename"), screenplay.ACTION) else: lt = screenplay.ACTION items = sanitizeStr(txt[0].text) if (lt == screenplay.PAREN) and items and (items[0][0] != "("): items[0] = "(" + items[0] items[-1] = items[-1] + ")" else: continue addElem(lt, items) if len(lines) == 0: wx.MessageBox("The file contains no importable lines", "Error", wx.OK, frame) return None return lines
def __init__(self, filestr): self.filestr = filestr self.data = util.loadFile("data/" + filestr)
from datetime import datetime import util, datapreprocess import build.pysim as pysim ''' Used to compare how a trained LSTM model handels against various variations. ''' # Deterministic output np.random.seed(1000) # Generate or load 128 randomly positioned locations sequences = 128 if util.fileExists('randomPositionsSplines_Leg_%d_New' % sequences, '', util.comparisonDir): allArmPositions, allSplines = util.loadFile('randomPositionsSplines_Leg_%d_New' % sequences, '', util.comparisonDir) print 'Loaded arm positions and spline trajectories' else: allArmPositions = [[np.random.uniform(-0.05, 0.05), np.random.uniform(-0.2, -0.05), np.random.uniform(-0.05, 0.05)] for i in xrange(sequences)] # sides = [np.random.randint(1, 2) for j in xrange(sequences)] allSplines = [[[np.random.uniform(-0.03, 0.03), -0.1 + np.random.uniform(-0.02, 0.02), (-1)**i * np.random.uniform(0, 0.05), 0, 1, 0, 0] for i in xrange(1, 25)] for j in xrange(sequences)] util.saveData('randomPositionsSplines_Leg_%d_New' % sequences, [allArmPositions, allSplines], '', util.comparisonDir) print 'Saved arm positions for later reference' # Use velocities of Decrease: 50%, 10%, 5%, and Increase: 5%, 10%, 50% velocities = [1.5, 1.75, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.75, 3.0] rotateX = [0.0, -np.radians(1), -np.radians(5), -np.radians(10), -np.radians(15), np.radians(1), np.radians(5), np.radians(10), np.radians(15)] # y axis variation (left and right, negative is right (wrt human)) rotateY = [0.0, -np.radians(1), -np.radians(5), -np.radians(10), -np.radians(15), np.radians(1), np.radians(5), np.radians(10), np.radians(15)] # z axis variation (up and down, negative is down (wrt human)) rotateZ = [0.0, -np.radians(1), -np.radians(5), -np.radians(10), -np.radians(15), np.radians(1), np.radians(5), np.radians(10), np.radians(15)]
def importTextFile(fileName, frame): # the 1 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 1000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None data = util.fixNL(data) lines = data.split("\n") tabWidth = 4 # key = indent level, value = Indent indDict = {} for i in range(len(lines)): s = util.toInputStr(lines[i].rstrip().expandtabs(tabWidth)) # don't count empty lines towards indentation statistics if s.strip() == "": lines[i] = "" continue cnt = util.countInitial(s, " ") ind = indDict.get(cnt) if not ind: ind = Indent(cnt) indDict[cnt] = ind tmp = s.upper() if util.multiFind(tmp, ["EXT.", "INT."]): ind.sceneStart += 1 if util.multiFind(tmp, ["CUT TO:", "DISSOLVE TO:"]): ind.trans += 1 if re.match(r"^ +\(.*\)$", tmp): ind.paren += 1 ind.lines.append(s.lstrip()) lines[i] = s if len(indDict) == 0: wx.MessageBox("File contains only empty lines.", "Error", wx.OK, frame) return None # scene/action indent setType(SCENE_ACTION, indDict, lambda v: v.sceneStart) # indent with most lines is dialogue in non-pure-action scripts setType(screenplay.DIALOGUE, indDict, lambda v: len(v.lines)) # remaining indent with lines is character most likely setType(screenplay.CHARACTER, indDict, lambda v: len(v.lines)) # transitions setType(screenplay.TRANSITION, indDict, lambda v: v.trans) # parentheticals setType(screenplay.PAREN, indDict, lambda v: v.paren) # some text files have this type of parens: # # JOE # (smiling and # hopping along) # # this handles them. parenIndent = findIndent(indDict, lambda v: v.lt == screenplay.PAREN) if parenIndent != -1: paren2Indent = findIndent(indDict, lambda v, var: (v.lt == -1) and (v.indent == var), parenIndent + 1) if paren2Indent != -1: indDict[paren2Indent].lt = screenplay.PAREN # set line type to ACTION for any indents not recognized for v in indDict.itervalues(): if v.lt == -1: v.lt = screenplay.ACTION dlg = ImportDlg(frame, indDict.values()) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None dlg.Destroy() ret = [] for i in range(len(lines)): s = lines[i] cnt = util.countInitial(s, " ") s = s.lstrip() sUp = s.upper() if s: lt = indDict[cnt].lt if lt == IGNORE: continue if lt == SCENE_ACTION: if s.startswith("EXT.") or s.startswith("INT."): lt = screenplay.SCENE else: lt = screenplay.ACTION if ret and (ret[-1].lt != lt): ret[-1].lb = screenplay.LB_LAST if lt == screenplay.CHARACTER: if sUp.endswith("(CONT'D)"): s = sUp[:-8].rstrip() elif lt == screenplay.PAREN: if s == "(continuing)": s = "" if s: line = screenplay.Line(screenplay.LB_SPACE, lt, s) ret.append(line) elif ret: ret[-1].lb = screenplay.LB_LAST if len(ret) == 0: ret.append(screenplay.Line(screenplay.LB_LAST, screenplay.ACTION)) # make sure the last line ends an element ret[-1].lb = screenplay.LB_LAST return ret
def importFountain(fileName, frame): # regular expressions for fountain markdown. # https://github.com/vilcans/screenplain/blob/master/screenplain/richstring.py ire = re.compile( # one star r'\*' # anything but a space, then text r'([^\s].*?)' # finishing with one star r'\*' # must not be followed by star r'(?!\*)' ) bre = re.compile( # two stars r'\*\*' # must not be followed by space r'(?=\S)' # inside text r'(.+?[*_]*)' # finishing with two stars r'(?<=\S)\*\*' ) ure = re.compile( # underline r'_' # must not be followed by space r'(?=\S)' # inside text r'([^_]+)' # finishing with underline r'(?<=\S)_' ) boneyard_re = re.compile('/\\*.*?\\*/', flags=re.DOTALL) # random magicstring used to escape literal star '\*' literalstar = "Aq7RR" # returns s with markdown formatting removed. def unmarkdown(s): s = s.replace("\\*", literalstar) for style in (bre, ire, ure): s = style.sub(r'\1', s) return s.replace(literalstar, "*") data = util.loadFile(fileName, frame, 1000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None inf = [] inf.append(misc.CheckBoxItem("Import titles as action lines.")) inf.append(misc.CheckBoxItem("Remove unsupported formatting markup.")) inf.append(misc.CheckBoxItem("Import section/synopsis as notes.")) dlg = misc.CheckBoxDlg(frame, "Fountain import options", inf, "Import options:", False) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None importTitles = inf[0].selected removeMarkdown = inf[1].selected importSectSyn = inf[2].selected # pre-process data - fix newlines, remove boneyard. data = data.decode("utf-8") data = util.fixNL(data) data = boneyard_re.sub('', data) prelines = data.split("\n") lines = [] tabWidth = 4 lns = [] sceneStartsList = ("INT", "EXT", "EST", "INT./EXT", "INT/EXT", "I/E", "I./E") TWOSPACE = " " skipone = False # First check if title lines are present: c = 0 while c < len(prelines): if prelines[c] != "": c = c+1 else: break # prelines[0:i] are the first bunch of lines, that could be titles. # Our check for title is simple: # - the line does not start with 'fade' # - the first line has a single ':' if c > 0: l = util.toInputStr(prelines[0].expandtabs(tabWidth).lstrip().lower()) if not l.startswith("fade") and l.count(":") == 1: # these are title lines. Now do what the user requested. if importTitles: # add TWOSPACE to all the title lines. for i in xrange(c): prelines[i] += TWOSPACE else: #remove these lines prelines = prelines[c+1:] for l in prelines: if l != TWOSPACE: lines.append(util.toInputStr(l.expandtabs(tabWidth))) else: lines.append(TWOSPACE) linesLen = len(lines) def isPrevEmpty(): if lns and lns[-1].text == "": return True return False def isPrevType(ltype): return (lns and lns[-1].lt == ltype) # looks ahead to check if next line is not empty def isNextEmpty(i): return (i+1 < len(lines) and lines[i+1] == "") def getPrevType(): if lns: return lns[-1].lt else: return screenplay.ACTION def isParen(s): return (s.startswith('(') and s.endswith(')')) def isScene(s): if s.endswith(TWOSPACE): return False if s.startswith(".") and not s.startswith(".."): return True tmp = s.upper() if (re.match(r'^(INT|EXT|EST)[ .]', tmp) or re.match(r'^(INT\.?/EXT\.?)[ .]', tmp) or re.match(r'^I/E[ .]', tmp)): return True return False def isTransition(s): return ((s.isupper() and s.endswith("TO:")) or (s.startswith(">") and not s.endswith("<"))) def isCentered(s): return s.startswith(">") and s.endswith("<") def isPageBreak(s): return s.startswith('===') and s.lstrip('=') == '' def isNote(s): return s.startswith("[[") and s.endswith("]]") def isSection(s): return s.startswith("#") def isSynopsis(s): return s.startswith("=") and not s.startswith("==") # first pass - identify linetypes for i in range(linesLen): if skipone: skipone = False continue s = lines[i] sl = s.lstrip() # mark as ACTION by default. line = screenplay.Line(screenplay.LB_FORCED, screenplay.ACTION, s) # Start testing lines for element type. Go in order: # Scene Character, Paren, Dialog, Transition, Note. if s == "" or isCentered(s) or isPageBreak(s): # do nothing - import as action. pass elif s == TWOSPACE: line.lt = getPrevType() elif isScene(s): line.lt = screenplay.SCENE if sl.startswith('.'): line.text = sl[1:] else: line.text = sl elif isTransition(sl) and isPrevEmpty() and isNextEmpty(i): line.lt = screenplay.TRANSITION if line.text.startswith('>'): line.text = sl[1:].lstrip() elif s.isupper() and isPrevEmpty() and not isNextEmpty(i): line.lt = screenplay.CHARACTER if s.endswith(TWOSPACE): line.lt = screenplay.ACTION elif isParen(sl) and (isPrevType(screenplay.CHARACTER) or isPrevType(screenplay.DIALOGUE)): line.lt = screenplay.PAREN elif (isPrevType(screenplay.CHARACTER) or isPrevType(screenplay.DIALOGUE) or isPrevType(screenplay.PAREN)): line.lt = screenplay.DIALOGUE elif isNote(sl): line.lt = screenplay.NOTE line.text = sl.strip('[]') elif isSection(s) or isSynopsis(s): if not importSectSyn: if isNextEmpty(i): skipone = True continue line.lt = screenplay.NOTE line.text = sl.lstrip('=#') if line.text == TWOSPACE: pass elif line.lt != screenplay.ACTION: line.text = line.text.lstrip() else: tmp = line.text.rstrip() # we don't support center align, so simply add required indent. if isCentered(tmp): tmp = tmp[1:-1].strip() width = frame.panel.ctrl.sp.cfg.getType(screenplay.ACTION).width if len(tmp) < width: tmp = ' ' * ((width - len(tmp)) // 2) + tmp line.text = tmp if removeMarkdown: line.text = unmarkdown(line.text) if line.lt == screenplay.CHARACTER and line.text.endswith('^'): line.text = line.text[:-1] lns.append(line) ret = [] # second pass helper functions. def isLastLBForced(): return ret and ret[-1].lb == screenplay.LB_FORCED def makeLastLBLast(): if ret: ret[-1].lb = screenplay.LB_LAST def isRetPrevType(t): return ret and ret[-1].lt == t # second pass - remove unneeded empty lines, and fix the linebreaks. for ln in lns: if ln.text == '': if isLastLBForced(): makeLastLBLast() else: ret.append(ln) elif not isRetPrevType(ln.lt): makeLastLBLast() ret.append(ln) else: ret.append(ln) makeLastLBLast() return ret
def importFDX(fileName, frame): elemMap = { "Action" : screenplay.ACTION, "Character" : screenplay.CHARACTER, "Dialogue" : screenplay.DIALOGUE, "Parenthetical" : screenplay.PAREN, "Scene Heading" : screenplay.SCENE, "Shot" : screenplay.SHOT, "Transition" : screenplay.TRANSITION, } # the 5 MB limit is arbitrary, we just want to avoid getting a # MemoryError exception for /dev/zero etc. data = util.loadFile(fileName, frame, 5000000) if data == None: return None if len(data) == 0: wx.MessageBox("File is empty.", "Error", wx.OK, frame) return None try: root = etree.XML(data) lines = [] def addElem(eleType, eleText): lns = eleText.split("\n") # if elem ends in a newline, last line is empty and useless; # get rid of it if not lns[-1] and (len(lns) > 1): lns = lns[:-1] for s in lns[:-1]: lines.append(screenplay.Line( screenplay.LB_FORCED, eleType, util.cleanInput(s))) lines.append(screenplay.Line( screenplay.LB_LAST, eleType, util.cleanInput(lns[-1]))) for para in root.xpath("Content//Paragraph"): addedNote = False et = para.get("Type") # Check for script notes s = u"" for notes in para.xpath("ScriptNote/Paragraph/Text"): if notes.text: s += notes.text # FD has AdornmentStyle set to "0" on notes with newline. if notes.get("AdornmentStyle") == "0": s += "\n" if s: addElem(screenplay.NOTE, s) addedNote = True # "General" has embedded Dual Dialogue paragraphs inside it; # nothing to do for the General element itself. # # If no type is defined (like inside scriptnote), skip. if (et == "General") or (et is None): continue s = u"" for text in para.xpath("Text"): # text.text is None for paragraphs with no text, and += # blows up trying to add a string object and None, so # guard against that if text.text: s += text.text # don't remove paragraphs with no text, unless that paragraph # contained a scriptnote if s or not addedNote: lt = elemMap.get(et, screenplay.ACTION) addElem(lt, s) if len(lines) == 0: wx.MessageBox("The file contains no importable lines", "Error", wx.OK, frame) return None return lines except etree.XMLSyntaxError, e: wx.MessageBox("Error parsing file: %s" %e, "Error", wx.OK, frame) return None
#!/usr/bin/env python # add words to ../dict_en.dat in the correct place import sys if len(sys.argv) < 2: raise "add_word.py word1 word2..." sys.path.insert(0, "..") import util util.init(False) s = util.loadFile("../dict_en.dat", None) if s == None: raise "error" words = {} lines = s.splitlines() for it in lines: words[util.lower(it)] = None for arg in sys.argv[1:]: words[util.lower(arg)] = None words = words.keys() words.sort() f = open("../dict_en.dat", "wb") for w in words:
# add words to ../dict_en.dat in the correct place import sys if len(sys.argv) < 2: raise Exception("add_word.py word1 word2...") sys.path.insert(0, "../src") import misc import util util.init(False) misc.init(False) s = util.loadFile("../dict_en.dat", None) if s == None: raise Exception("error") words = {} lines = s.splitlines() for it in lines: words[util.lower(it)] = None for arg in sys.argv[1:]: words[util.lower(arg)] = None words = list(words.keys()) words.sort()