from sgmllib import SGMLParser import toolbox import re from datetime import date class RespTextBlock: def __init__(self): self.upperText = "Responsorial Psalm" self.openChorus = "[chorus]" self.closeChorus = "[/chorus]" self.chorusLine = "" self.body = "" def makeText(self): return "%s\n\n%s\n%s\n%s\n\n%s\n%s\n%s\n%s" % ( self.upperText, self.openChorus, self.chorusLine, self.closeChorus, self.body, self.openChorus, self.chorusLine, self.closeChorus) class RespFinder(SGMLParser): def __init__(self,respText,secReadText): SGMLParser.__init__(self) self.respText = respText self.secReadText = secReadText def reset(self): SGMLParser.reset(self) self.outText = [] self.gotPsalm = False def handle_data(self,text): if text.find(self.respText) != -1: self.gotPsalm = True if text.find(self.secReadText) != -1: self.gotPsalm = False if self.gotPsalm: self.outText.append(text) def writeFile(blankLinePattern = "^\r?\n$", respText = "RESPONSORIAL PSALM", secReadText = "SECOND READING", linesBeforeResp = 3, \ urlLoc = r" http://www.catholicireland.net/pages/todaysreadings.php?lang=eng", nextSunText = r"&nextsun=yes", nextSunFlag = False, \ outFile = "resp.txt"): if not ((date.today().weekday() == 6) and not nextSunFlag): urlLoc += nextSunText respBlock = RespTextBlock() reMatch = re.compile(blankLinePattern) usock = toolbox.openAnything(urlLoc) parser = RespFinder(respText,secReadText) parser.feed(usock.read()) usock.close() parser.close() respList = parser.outText[linesBeforeResp:] ## respList = parser.outText[:] print respList if respList: respBlock.chorusLine = respList.pop(0) respBlock.body = "\n".join([item.lstrip("123456789 ").replace("[Response]","\n") for item in respList if not reMatch.search(item)]) try: fileName = open(outFile,"w") fileName.write(respBlock.makeText()) print "File %s written" % (outFile) except: print "File write failed. Oops" else: print "Something she be wrong" if __name__ == "__main__": writeFile()