2007年07月03日 星期二 12:54
ÓÐÈËÔÚpythonÖÐʹÓùýlibxml2Âð£¬ÎÒ·¢ÏÖÿ´Î½âÎöÒ»¸öxmlÎļþ£¬¶¼»áÔö¼Ó4k×óÓÒµÄÐéÄâÄڴ棬³¤ÆÚʹÓã¬ÄÚ´æ²»¿°Öظº°¡¡£ÇëÎÊÓÐʲôºÃµÄ½â¾ö·½·¨Ã»£¿
×¢£ºÊÇÔÚwindowsÏÂʹÓÃ
ÏÂÃæÊDzâÊÔ´úÂ룺
#!/usr/bin/python -u
import libxml2
#------------------------------------------------------------------------------
# Memory debug specific
#------------------------------------------------------------------------------
def _escape(data):
"""Escape data for XML"""
data=data.replace("&","&")
data=data.replace("<","<")
data=data.replace(">",">")
data=data.replace("'","'")
data=data.replace('"',""")
return data
class callback:
def __init__(self):
self._head = ""
self._tail = ""
self._current = ""
self._level = 0
self._doc = None
self._root = None
def startDocument(self):
return
print "."
def endElement(self, tag):
return
self._current+="" % (tag,)
self._level -= 1
if self._level > 1:
return
if self._level==1:
xml=self._head+self._current+self._tail
doc=libxml2.parseDoc(xml)
try:
node = doc.getRootElement().children
try:
node1 = node.docCopyNode(self._doc, 1)
try:
pass
#self._root.addChild(node1)
#self._handler.stanza(self._doc, node1)
except:
node1.unlinkNode()
node1.freeNode()
del node1
pass
finally:
node1.unlinkNode()
node1.freeNode()
del node1
#del node
finally:
doc.freeDoc()
else:
print 'level:%d'%self._level
xml=self._head+self._tail
doc=libxml2.parseDoc(xml)
try:
#self._handler.stream_end(self._doc)
self._doc.freeDoc()
self._doc = None
self._root = None
finally:
doc.freeDoc()
def startElement(self, tag, attrs):
return
#print 'startElement_____________'
s = "<"+tag
if attrs:
for a,v in attrs.items():
s+=" %s='%s'" % (a,_escape(v))
s += ">"
if self._level == 0:
self._head = s
self._tail = "" % (tag,)
xml=self._head+self._tail
## if self._doc:
## self._doc.freeDoc()
## self._doc=None
self._doc = libxml2.parseDoc(xml)
#self._handler.stream_start(self._doc)
#self._root = self._doc.getRootElement()
elif self._level == 1:
self._current = s
else:
self._current += s
self._level += 1
#print self._level
def characters(self, data):
pass
def warning(self, msg):
pass
def error(self, msg):
pass
def fatalError(self, msg):
pass
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
import os
import sys
programName = os.path.basename(sys.argv[0])
if len(sys.argv) != 2:
print "Use: %s " % programName
sys.exit(1)
inputPath = sys.argv[1]
if not os.path.exists (inputPath):
print "Error: directory does not exist"
sys.exit(1)
libxml2.debugMemory(1)
inputFileNames = []
dirContent = os.listdir(inputPath)
for fichero in dirContent:
extension1=fichero.rfind(".htm")
extension2=fichero.rfind(".html")
dot = fichero.rfind(".")
extension = max(extension1,extension2)
if extension != -1 and extension == dot:
inputFileNames.append (fichero)
if len(inputFileNames) == 0:
print "Error: no input files"
sys.exit(1)
handler = callback()
NUM_ITERS = 20
isrun=True
while isrun:
for i in range(NUM_ITERS):
for inputFileName in inputFileNames:
ctxt = libxml2.createPushParser(handler, "", 0, inputFileName)
#libxml2.initParser()
#print inputFileName
inputFilePath = inputPath + inputFileName
f = open(inputFilePath)
data = f.read()
#print data
f.close()
ctxt.parseChunk(data, len(data), 1)
#libxml2.pythonCleanupParser()
#libxml2.cleanupParser()
ctxt.clearParserCtxt()
#ctxt = None
print libxml2.memoryUsed()
del ctxt
ctxt = None
s=raw_input('Quit?')
isrun=s!='q'
s=raw_input('Press any key...')
# Memory debug specific
libxml2.cleanupParser()
if libxml2.debugMemory(1) == 0:
print "OK"
else:
print "Memory leak %d bytes" % (libxml2.debugMemory(1))
libxml2.dumpMemory()
--
python c# and opensource
blog:http://www.chyni.cn
-------------- 下一部分 --------------
Ò»¸öHTML¸½¼þ±»ÒƳý...
URL: http://python.cn/pipermail/python-chinese/attachments/20070703/2d91522d/attachment.html
2007年07月03日 星期二 14:00
libxml2(http://xmlsoft.org/python.html) CÓïÑÔ°æLibxml2(Òµ½ç±ê×¼ÁË)µÄÒ»¸öpython·â×°£¬¾Ý˵Ëٶȷdz£¿ì. ¹¦Äܷdz£Ç¿£¬Ö§³Ö¼¸ºõËùÓеÄXML´¦ÀíÒªÇó¡£ °üÀ¨¶ÔRelax NGµÈµÄÖ§³Ö¡£ µ«Êǽӿڲ»¹»pythonic£¬ÐèÒª¿¼ÂÇÄÚ´æ´¦Àí£¬ ÔÚWindowsÉÏ»á³öÏÖÎ޹ʹҵôµÄÇé¿ö£¬²»Îȶ¨¡£ ------------------------------ from ÅË¿¡ÓµÄBlog <http://blog.czug.org/panjy> On 7/3/07, ×··çÖðÔ <chinesexu在gmail.com> wrote: > > > ÓÐÈËÔÚpythonÖÐʹÓùýlibxml2Âð£¬ÎÒ·¢ÏÖÿ´Î½âÎöÒ»¸öxmlÎļþ£¬¶¼»áÔö¼Ó4k×óÓÒµÄÐéÄâÄڴ棬³¤ÆÚʹÓã¬ÄÚ´æ²»¿°Öظº°¡¡£ÇëÎÊÓÐʲôºÃµÄ½â¾ö·½·¨Ã»£¿ > ×¢£ºÊÇÔÚwindowsÏÂʹÓà > > > ÏÂÃæÊDzâÊÔ´úÂ룺 > #!/usr/bin/python -u > import libxml2 > > #------------------------------------------------------------------------------ > > > > # Memory debug specific > > > > #------------------------------------------------------------------------------ > > def _escape(data): > """Escape data for XML""" > data=data.replace("&","&") > data=data.replace("<","<") > data=data.replace(">",">") > data=data.replace ("'","'") > data=data.replace('"',""") > return data > class callback: > def __init__(self): > self._head = "" > self._tail = "" > self._current = "" > self._level = 0 > self._doc = None > self._root = None > > def startDocument(self): > return > print "." > > def endElement(self, tag): > return > self._current+="" % (tag,) > self._level -= 1 > if self._level > 1: > return > if self._level==1: > xml=self._head+self._current+self._tail > doc=libxml2.parseDoc(xml) > try: > node = doc.getRootElement ().children > try: > node1 = node.docCopyNode(self._doc, 1) > try: > pass > #self._root.addChild(node1) > #self._handler.stanza(self._doc, node1) > except: > node1.unlinkNode() > node1.freeNode() > del node1 > pass > finally: > node1.unlinkNode() > node1.freeNode() > del node1 > #del node > finally: > doc.freeDoc() > else: > print 'level:%d'%self._level > xml=self._head+self._tail > doc=libxml2.parseDoc (xml) > try: > #self._handler.stream_end(self._doc) > self._doc.freeDoc() > self._doc = None > self._root = None > finally: > doc.freeDoc() > > def startElement(self, tag, attrs): > return > #print 'startElement_____________' > s = "<"+tag > if attrs: > for a,v in attrs.items(): > s+=" %s='%s'" % (a,_escape(v)) > s += ">" > if self._level == 0: > self._head = s > self._tail = "" % (tag,) > xml=self._head+self._tail > ## if self._doc: > ## self._doc.freeDoc() > ## self._doc=None > > self._doc = libxml2.parseDoc(xml) > #self._handler.stream_start(self._doc) > #self._root = self._doc.getRootElement() > elif self._level == 1: > self._current = s > else: > self._current += s > self._level += 1 > #print self._level > > > > > def characters(self, data): > pass > > def warning(self, msg): > pass > > def error(self, msg): > pass > > def fatalError(self, msg): > pass > > > #------------------------------------------------------------------------------ > #------------------------------------------------------------------------------ > > > import os > import sys > > programName = os.path.basename(sys.argv[0]) > > if len(sys.argv) != 2: > print "Use: %s" % programName > sys.exit(1) > > inputPath = sys.argv [1] > > if not os.path.exists (inputPath): > print "Error: directory does not exist" > sys.exit(1) > > libxml2.debugMemory(1) > > inputFileNames = [] > dirContent = os.listdir(inputPath) > for fichero in dirContent: > extension1=fichero.rfind(".htm") > extension2=fichero.rfind(".html") > dot = fichero.rfind(".") > extension = max(extension1,extension2) > if extension != -1 and extension == dot: > inputFileNames.append (fichero) > > if len(inputFileNames) == 0: > print "Error: no input files" > sys.exit(1) > > > handler = callback() > NUM_ITERS = 20 > isrun=True > while isrun: > for i in range(NUM_ITERS): > for inputFileName in inputFileNames: > ctxt = libxml2.createPushParser(handler, "", 0, inputFileName) > #libxml2.initParser() > #print inputFileName > inputFilePath = inputPath + inputFileName > f = open(inputFilePath) > data = f.read() > #print data > f.close() > > > ctxt.parseChunk(data, len(data), 1) > #libxml2.pythonCleanupParser() > #libxml2.cleanupParser() > ctxt.clearParserCtxt() > #ctxt = None > print libxml2.memoryUsed() > del ctxt > ctxt = None > > s=raw_input('Quit?') > isrun=s!='q' > s=raw_input('Press any key...') > > > # Memory debug specific > libxml2.cleanupParser() > if libxml2.debugMemory(1) == 0: > print "OK" > else: > print "Memory leak %d bytes" % ( libxml2.debugMemory(1)) > libxml2.dumpMemory() > > -- > python c# and opensource > blog:http://www.chyni.cn > _______________________________________________ > python-chinese > Post: send python-chinese在lists.python.cn > Subscribe: send subscribe to python-chinese-request在lists.python.cn > Unsubscribe: send unsubscribe to python-chinese-request在lists.python.cn > Detail Info: http://python.cn/mailman/listinfo/python-chinese > -- ÎÒ×ßµ½Ò»¸öİÉúµÄµØ·½, ¸æËß±ðÈË ÎÒҪȥÁ÷ÀË Å¶£¬ÎÒҪȥÁÆÉË¡¡ Gtalk: iexper(at)gmail.com ÓòÃû¹ýÆÚÁË -------------- 下一部分 -------------- Ò»¸öHTML¸½¼þ±»ÒƳý... URL: http://python.cn/pipermail/python-chinese/attachments/20070703/e5755163/attachment-0001.html
Zeuux © 2025
京ICP备05028076号