This file contains everything you need to parse XIST objects from files, strings, URLs etc.
class Parser(object):
Basic parser interface.
def __init__(self):
selfdef begin(self, application):
selfStart parsing. Events will be passed to application, which must
implement a handler for each event type.
def end(self):
selfFinish parsing.
def feed(self, data, final):
selfFeed data (a byte string) to the parser. If final is true
this will be the last call to feed.
class SGMLOPParser(Parser):
A parser based of sgmlop.
def __init__(self, encoding=None):
selfCreate a new SGMLOPParser object.
def begin(self, application):
selfdef feed(self, data, final):
selfdef end(self):
selfdef handle_comment(self, data):
selfdef handle_data(self, data):
selfdef handle_cdata(self, data):
selfdef handle_proc(self, target, data):
selfdef handle_entityref(self, name):
selfdef handle_enterstarttag(self, name):
selfdef handle_leavestarttag(self, name):
selfdef handle_enterattr(self, name):
selfdef handle_leaveattr(self, name):
selfdef handle_endtag(self, name):
selfclass ExpatParser(Parser):
A parser using Pythons builtin expat XML parser.
def __init__(self, encoding=None, transcode=False, xmldecl=False, doctype=False):
selfdef begin(self, application):
selfdef end(self):
selfdef handle_xmldecl(self, version, encoding, standalone):
selfdef handle_begindoctype(self, doctypename, systemid, publicid, has_internal_subset):
selfdef handle_enddoctype(self):
selfdef handle_default(self, data):
selfdef handle_comment(self, data):
selfdef handle_data(self, data):
selfdef handle_startelement(self, name, attrs):
selfdef handle_endelement(self, name):
selfdef handle_proc(self, target, data):
selfdef feed(self, data, final):
selfclass Builder(object):
It is the job of a Builder to create the object tree from the
events generated by the underlying parser.
def __init__(self, parser=None, prefixes=None, tidy=False, loc=True, validate=True, encoding=None, pool=None):
selfCreate a new Builder instance.
Arguments have the following meaning:
parseran instance of the
Parserclass (or any object that provides the appropriate interface).prefixes(mapping)a mapping that maps namespace prefixes to namespace names/modules (or lists of namespace names/modules). This is used to preinitialize the namespace prefix mapping.
tidy(bool)If
tidyis true, libxml2's HTML parser will be used for parsing broken HTML.loc(bool)Should location information be attached to the generated nodes?
validate(bool)Should the parsed XML nodes be validated after parsing?
encoding(string orNone)The default encoding to use when the source doesn't provide an encoding. The default
Noneresults in the encoding being detected from the XML itself.pool(ll.xist.xsc.Poolobject)This pool will be used for creating all nodes during parsing.
def _parseHTML(self, data, base, sysid, encoding):
selfInternal helper method for parsing HTML via libxml2.
def _begin(self, base=None, encoding=None):
selfdef _end(self, parser):
selfdef parsestring(self, data, base=None, encoding=None):
selfParse the string data (str or unicode) into an
XIST tree. base is the base URL for the parsing process,
encoding can be used to force the parser to use the specified
encoding.
def parseiter(self, iterable, base=None, encoding=None):
selfParse the input from the iterable var (which must produce the input
in chunks of bytes) into an XIST tree. base is the base URL for the
parsing process, encoding can be used to force the parser to use
the specified encoding.
def parsestream(self, stream, base=None, encoding=None, bufsize=8192):
selfParse XML input from the stream stream. base is the base
URL for the parsing process, encoding can be used to force the
parser to use the specified encoding. bufsize is the buffer size
used for reading the stream in blocks.
def parsefile(self, filename, base=None, encoding=None, bufsize=8192):
selfParse XML input from the file named filename. base is the
base URL for the parsing process (defaulting to filename if not
specified), encoding can be used to force the parser to use the
specified encoding. bufsize is the buffer size used for reading
the file in blocks.
def parseurl(self, name, base=None, encoding=None, bufsize=8192, *args, **kwargs):
selfParse XML input from the URL name (which might be a string
or an ll.url.URL object) into an XIST tree. base is the
base URL for the parsing process (defaulting to the final URL of the
response (i.e. including redirects)). encoding can be used to
force the parser to use the specified encoding. bufsize is the
buffer size used for reading the response in blocks. args and
kwargs will be passed on to the open call.
def parseetree(self, tree, base=None):
selfParse XML input from the object tree which must support the
ElementTree API. base is the base URL for the parsing process
(i.e. this URL will be prepended to all links in the tree).
def handle_xmldecl(self, version, encoding, standalone, line, col):
selfdef handle_doctype(self, content, line, col):
selfdef handle_enterstarttag(self, name, line, col):
selfdef handle_enterattr(self, name, line, col):
selfdef handle_leaveattr(self, name, line, col):
selfdef handle_leavestarttag(self, name, line, col):
selfdef handle_endtag(self, name, line, col):
selfhandle_cdata = def handle_data(self, content, line, col):
selfdef handle_data(self, content, line, col):
selfdef handle_comment(self, content, line, col):
selfdef handle_proc(self, target, data, line, col):
selfdef handle_entityref(self, name, line, col):
selfdef getLocation(self):
selfdef __appendNode(self, node, line, col):
selfdef parsestring(data, base=None, encoding=None, **builderargs):
Parse the string data into an XIST tree. For the arguments
base and encoding see the method parsestring in the
Builder class. You can pass any other argument that the
Builder constructor takes as keyword arguments
via builderargs.
def parseiter(iterable, base=None, encoding=None, **builderargs):
Parse the input from the iterable iterable (which must produce the
input in chunks of bytes) into an XIST tree. For the arguments base
and encoding see the method parsestring in the
Builder class. You can pass any other argument that the
Builder constructor takes as keyword arguments via
builderargs.
def parsestream(stream, base=None, encoding=None, bufsize=8192, **builderargs):
Parse XML from the stream stream into an XIST tree. For the arguments
base, encoding and bufzise see the method
parsestream in the Parser class. You can pass any other
argument that the Builder constructor takes as keyword arguments via
builderargs.
def parsefile(filename, base=None, encoding=None, bufsize=8192, **builderargs):
Parse XML input from the file named filename. For the arguments
base, encoding and bufsize see the method
parsefile in the Builder class. You can pass any other
argument that the Builder constructor takes as keyword arguments
via builderargs.
def parseurl(name, base=None, encoding=None, bufsize=8192, headers=None, data=None, **builderargs):
Parse XML input from the URL name into an XIST tree. For the arguments
base, encoding, bufsize, headers and data
see the method parseurl in the Builder class. You can pass
any other argument that the Builder constructor takes as keyword
arguments via builderargs.
def parseetree(tree, base=None, **builderargs):
Parse XML input from the object tree which must support the
ElementTree API. For the argument base see the method
parseetree in the Builder class. You can pass any other
argument that the Builder constructor takes as keyword arguments
via builderargs.