from xml import sax


	class MovieHandler(sax.ContentHandler):
		def __init__(self):
			# 初始化数据,并增加一个当前数据
			self.CurrentData = ""
			self.type = ""
			self.format = ""
			self.year = ""
			self.rating = ""
			self.stars = ""
			self.description = ""

		# 文档启动的时候调用
		def startDocument(self):
			self.write_file = open("result_txt", "w")
			print('XML开始解析中...')

		# 元素开始事件处理
		def startElement(self, name, attrs):
			self.CurrentData = name
			# if self.CurrentData == 'text':
				# print('*********text_start*********')
				# title = attrs['title']
				# print('Title:{0}'.format(title))

		# 内容事件处理
		def characters(self, content):
			# if self.CurrentData == "type":
			#     self.type = content
			# elif self.CurrentData == "format":
			#     self.format = content
			# elif self.CurrentData == "year":
			#     self.year = content
			# elif self.CurrentData == "rating":
			#     self.rating = content
			# elif self.CurrentData == "stars":
			#     self.stars = content
			# elif self.CurrentData == "description":
			#     self.description = content
			if self.CurrentData == "text":
				self.format += content

		# 元素结束事件处理
		def endElement(self, name):
			# if self.CurrentData == 'type':
			#     print('Type:{0}'.format(self.type))
			# elif self.CurrentData == 'format':
			#     print('Format:{0}'.format(self.format))
			# elif self.CurrentData == 'year':
			#     print('Year:{0}'.format(self.year))
			# elif self.CurrentData == 'rating':
			#     print('Rating:{0}'.format(self.rating))
			# elif self.CurrentData == 'stars':
			#     print('Stars:{0}'.format(self.stars))
			# elif self.CurrentData == 'description':
			#     print('Description:{0}'.format(self.description))
			if self.CurrentData == "text":
				if self.format.strip():
					self.write_file.write(self.format)
					# print(self.format)
					# print('*********text_end*********')
			self.CurrentData = ""

		# 文档结束的时候调用
		def endDocument(self):
			self.write_file.close()
			print('XML文档解析结束!')


	if __name__ == '__main__':
		handler = MovieHandler()
		parser = sax.make_parser()
		# parser.setFeature(sax.handler.feature_namespaces, 0)
		parser.setContentHandler(handler)
		parser.parse("jawiki-20190901-pages-articles-multistream.xml")

Q.E.D.


重剑无锋 大巧不工