(* Objects to represent the Raw XML *) object RawXML is components: data:string; end RawXML; object RawLayXML extends RawXML description: (* This is the Raw Layout XML read in as a file. *); end RawXML; object RawLecXML extends RawXML description: (* This is the Raw Lecture XML read in as a file *); end RawLecXML; (* Operations to parse Raw XML into a structure *) operation parseLectureXMLtoStructure inputs: r:RawLecXML; outputs: l:LectureXMLStructure; description: (* Parses a well-formed xhtml 1.1 document into * A data struture m-way tree *); precondition: (* r is valid XML *) (#r > 1); postcondition: (* Every matching pair of tags is turned into an XMLTag object *) (forall (i:XMLTag) (i in l iff dataTagInRawXML(i, r) ) ) (* Also preserve the order of the XML tags.. * I have NO idea how to parse XML in rsl :-( * Have mercy on my SOUL! *) ; end parseLectureXMLtoStructure; operation itemsInRawTag inputs: r:RawXML; outputs: t:XMLItem*; description: (* Transforms a RawXML tag into a list of XMLItems. * What this is SUPPOSED to do, is go though the XML provided, and do the following: Get and remove any top-level tags (such as html) Partition remaning data into XMLItem*, in the following manner:
... : Item 1 extra : Item 2 ... : Item 3 contents : Item 4 For each XMLItem that is a tag, recurse on that tag. Return the resulting XMLStructure*); precondition: (* r is a valid XML tag *); postcondition: ( (* Pull all the 0th level tags and make sure t is equal to that list *) forall(i:XMLItem) ( i in t iff ( i in stripItems(r) ) ) and (* For all the 'contents' strings found by stripItems, make them into an * anonymous tag * and for all the raw xml strings in each tag, recursively make them into tags also *) forall(t' in t) ( (t = t - t') and ( t'.t = itemsToTag(t'.s) ) and forall(sit in t'.t.data) ( sit.t = itemsToTag(sit.s) ) and (t = t + t') ) ); end itemsInRawTag; operation itemsToTag inputs: i:RawXML; outputs: t:XMLTag; description: (* Takes a List of XMLItems and represents it as an XMLTag *); end itemsToTag; object strippedTag is components: name:string and attribs:XMLAttribute* and raw:RawXML; end strippedTag; operation stripItems inputs: r:RawXML; outputs: i:XMLItem*; description: (* Strip tags takes RawXML like this: