2007年09月03日 星期一 17:55
HTML2MoinMoin.py ÈçÏ£º
#!/usr/bin/python2
"""
Usage:
moinconvert URL
Retrives the given URL and convert it to MoinMoin markup. The result is
written to stdout.
"""
import htmlentitydefs, sys
from HTMLParser import HTMLParser
class HTML2MoinMoin(HTMLParser):
start_tags = {
"a" : " [%(0)s ",
"b" : "'''",
"em" : "''",
"tt" : "{{{",
"pre" : "\n{{{",
"p" : "\n\n",
"br" : "\n\n",
"h1" : "\n\n= ",
"h2" : "\n\n== ",
"h3" : "\n\n=== ",
"h4" : "\n\n==== ",
"h5" : "\n\n===== ",
"title" : "TITLE: ",
"table" : "\n",
"tr" : "",
"td" : "||"
}
end_tags = {
"a" : ']',
"b" : "'''",
"em" : "''",
"tt" : "}}}",
"pre" : "}}}\n",
"p" : "",
"h1" : " =\n\n",
"h2" : " ==\n\n",
"h3" : " ===\n\n",
"h4" : " ====\n\n",
"h5" : " =====\n\n",
"table" : "\n",
"tr" : "||\n",
"dt" : ":: "
}
def __init__(self):
HTMLParser.__init__(self)
self.output = sys.stdout
self.list_mode = []
self.preformatted = False
self.verbose = 0
def write(self, text):
self.output.write(text)
def do_ul_start(self, attrs, tag):
self.list_mode.append("*")
def do_ol_start(self, attrs, tag):
self.list_mode.append("1.")
def do_dl_start(self, attrs, tag):
self.list_mode.append("")
def do_ul_end(self, tag):
self.list_mode = self.list_mode[:-1]
do_ol_end = do_ul_end
do_dl_end = do_ul_end
def do_li_start(self, args, tag):
self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1])
def do_dt_start(self, args, tag):
self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1])
def do_pre_start(self, args, tag):
self.preformatted = True
self.write(self.start_tags["pre"])
def do_pre_end(self, tag):
self.preformatted = False
self.write(self.end_tags["pre"])
def handle_starttag(self, tag, attrs):
func = self.__dict__.get("do_%s_start" % tag,
self.do_default_start)
if ((func == self.do_default_start) and
self.start_tags.has_key(tag)):
attr_dict = {}
i = 0
for a in attrs:
attr_dict[a[0]] = a[1]
attr_dict[str(i)] = a[1]
i += 1
self.write(self.start_tags[tag] % attr_dict)
else:
func(attrs, tag)
def handle_endtag(self, tag):
func = HTML2MoinMoin.__dict__.get("do_%s_end" % tag,
HTML2MoinMoin.do_default_end)
if ((func == HTML2MoinMoin.do_default_end) and
self.end_tags.has_key(tag)):
self.write(self.end_tags[tag])
else:
func(self, tag)
def handle_data(self, data):
if self.preformatted:
self.write(data)
else:
self.write(data.replace("\n", " "))
def handle_charref(self, name):
self.write(name)
def handle_entityref(self, name):
if htmlentitydefs.entitydefs.has_key(name):
self.write(htmlentitydefs.entitydefs[name])
else:
self.write("&" + name)
def do_default_start(self, attrs, tag):
if self.verbose:
print "Encountered the beginning of a %s tag" % tag
print "Attribs: %s" % attrs
def do_default_end(self, tag):
if self.verbose:
print "Encountered the end of a %s tag" % tag
def main():
import urllib
htmldata = urllib.urlopen(sys.argv[1]).read()
p = HTML2MoinMoin()
p.feed(htmldata)
p.close()
if __name__ == "__main__":
main()
££££££££££££££££££
ÎÒдÁËÒ»¶Î´úÂëÏë¼Ì³ÐHTML2MoinMoin
# -*- coding: utf-8 -*-
#!/usr/bin/python
import htmlentitydefs, sys
from HTML2MoinMoin import HTML2MoinMoin
class YtParser(HTML2MoinMoin):
start_tags = {
"a" : " [%(0)s ",
"b" : "'''",
"em" : "''",
"i" : "''",
"tt" : "{{{",
"pre" : "\n{{{",
'pre class="code"' :"\n{{{\n#!python\n",
"p" : "\n\n",
"br" : "\n\n",
"h1" : "\n\n= ",
"h2" : "\n\n== ",
"h3" : "\n\n=== ",
"h4" : "\n\n==== ",
"h5" : "\n\n===== ",
"title" : "TITLE: ",
"table" : "\n",
"tr" : "",
"td" : "||"
}
end_tags = {
"a" : ']',
"b" : "'''",
"em" : "''",
"i" : "''",
"tt" : "}}}",
"pre" : "}}}\n",
"p" : "",
"h1" : " =\n\n",
"h2" : " ==\n\n",
"h3" : " ===\n\n",
"h4" : " ====\n\n",
"h5" : " =====\n\n",
"table" : "\n",
"tr" : "||\n",
"dt" : ":: "
}
def __init__(self):
HTML2MoinMoin.__init__(self)
def handle_starttag(self, tag, attrs):
print "++++++++++++++++++++++++++++++++++"
def handle_data(self, data):
print "++++++++++++++++++++++++++++++++++"
def test(filename):
fin=open(filename,"r")
p = YtParser()
p.feed(fin.read())
p.close()
fin.close()
if __name__ == "__main__":
test("test.html")
¿ÉÊÇÎÒ·¢ÏÖhandle_starttag²»ÄÜÖØÔØ£¬¶øhandle_data¿ÉÒÔÖØÔØ£¬Çë½Ì¸÷λÕâÊÇΪʲô£¿pythonÖØÔØÓÐÊ²Ã´ÌØÊâµÄ¹æÔòÂð£¿
-------------- 下一部分 --------------
Ò»¸öHTML¸½¼þ±»ÒƳý...
URL: http://python.cn/pipermail/python-chinese/attachments/20070903/4dc6a794/attachment-0001.htm
Zeuux © 2025
京ICP备05028076号