Nicolas Cellier uploaded a new version of XML-Parser to project The Trunk:
http://source.squeak.org/trunk/XML-Parser-nice.20.mcz ==================== Summary ==================== Name: XML-Parser-nice.20 Author: nice Time: 22 December 2009, 8:48:53 am UUID: d5935177-0115-b349-bf6f-bab536e87cd1 Ancestors: XML-Parser-Alexandre_Bergel.19, XML-Parser-ar.7 Merge trunk with latest version from http://www.squeaksource.com/XMLSupport =============== Diff against XML-Parser-ar.7 =============== Item was changed: ----- Method: XMLTokenizer class>>isCharEscape: (in category 'accessing') ----- + isCharEscape: entityValue + ^entityValue size = 1 + and: [CharEscapes includes: entityValue first]! - isCharEscape: aChar - ^CharEscapes includes: aChar! Item was added: + ----- Method: XMLElement>>elementsAndContentsDo: (in category 'enumerating') ----- + elementsAndContentsDo: aBlock + self elementsAndContents do: aBlock! Item was changed: ----- Method: XMLDOMParser>>endElement: (in category 'content') ----- endElement: elementName | currentElement | + currentElement := self pop. - currentElement _ self pop. currentElement name = elementName ifFalse: [self driver errorExpected: 'End tag "', elementName , '" doesn''t match "' , currentElement name , '".']! Item was added: + ----- Method: SAXHandler class>>parseDocumentFrom:useNamespaces: (in category 'instance creation') ----- + parseDocumentFrom: aStream useNamespaces: aBoolean + | parser | + parser := self on: aStream. + parser useNamespaces: aBoolean. + parser startDocument. + parser parseDocument. + ^parser! Item was changed: ----- Method: XMLElement>>attributes (in category 'accessing') ----- attributes + ^attributes ifNil: [attributes := Dictionary new]! - ^attributes ifNil: [attributes _ Dictionary new]! Item was changed: ----- Method: XMLTokenizer>>pushBack: (in category 'streaming') ----- pushBack: aString + "Fixed to push the string before the peek char (if any)." + | pushBackString | + pushBackString := peekChar - pushBackString _ peekChar ifNil: [aString] + ifNotNil: [aString, peekChar asString]. + peekChar := nil. - ifNotNil: [peekChar asString , aString]. - peekChar _ nil. self pushStream: (ReadStream on: pushBackString)! Item was changed: ----- Method: XMLWriter>>canonical: (in category 'accessing') ----- canonical: aBoolean + canonical := aBoolean! - canonical _ aBoolean! Item was changed: ----- Method: XMLPI>>data: (in category 'accessing') ----- data: aString + data := aString! - data _ aString! Item was changed: ----- Method: SAXHandler>>endDocument (in category 'content') ----- endDocument "This call corresponds to the Java SAX call endDocument()." + eod := true! - eod _ true! Item was added: + ----- Method: XMLParserTest>>testExampleAddressBook (in category 'tests') ----- + testExampleAddressBook + | tokenizer | + "self debug: #testExampleAddressBook" + + tokenizer := XMLTokenizer on: self addressBookXML readStream. + + "We enumerate the first characters of the addressbook example. The file being parsed begins with <addressbook" + self assert: tokenizer next = $<. + self assert: tokenizer next = $a. + self assert: tokenizer next = $d. + self assert: tokenizer next = $d. + self assert: tokenizer next = $r. + + self shouldnt: ([tokenizer next notNil] whileTrue: []) raise: Error. ! Item was changed: ----- Method: XMLTokenizer>>handleEntity:in: (in category 'entities') ----- handleEntity: referenceString in: parsingContext | entity entityValue | + entity := self entity: referenceString. + entityValue := entity valueForContext: parsingContext. - entity _ self entity: referenceString. - entityValue _ entity valueForContext: parsingContext. (self class isCharEscape: entityValue) + ifTrue: [entityValue := entity reference]. - ifTrue: [entityValue _ entity reference]. self pushStream: (ReadStream on: entityValue asString)! Item was changed: ----- Method: SAXHandler>>document: (in category 'accessing') ----- document: aDocument + document := aDocument! - document _ aDocument! Item was changed: ----- Method: XMLElement>>addContent: (in category 'initialize') ----- addContent: contentString + self addElement: contentString! - self contents add: contentString! Item was changed: ----- Method: XMLTokenizer>>readNumberBase: (in category 'private') ----- readNumberBase: base "Read a hex number from stream until encountering $; " | value digit | base = 10 ifFalse: [ | numberString | + numberString := self nextUpTo: $;. - numberString _ self nextUpTo: $;. self stream skip: -1. ^Integer readFrom: numberString asUppercase readStream base: base. ]. + value := 0. + digit := DigitTable at: self peek asciiValue. - value _ 0. - digit _ DigitTable at: self peek asciiValue. digit < 0 ifTrue: [self error: 'At least one digit expected here']. self next. + value := digit. + [digit := DigitTable at: self peek asciiValue. - value _ digit. - [digit _ DigitTable at: self peek asciiValue. digit < 0 ifTrue: [^value] ifFalse: [ self next. + value := value * base + digit] - value _ value * base + digit] ] repeat. ^ value! Item was changed: ----- Method: SAXHandler class>>parseDTDFrom: (in category 'instance creation') ----- parseDTDFrom: aStream | driver parser | + driver := SAXDriver on: aStream. - driver _ SAXDriver on: aStream. driver validating: true. driver startParsingMarkup. + parser := self new driver: driver. - parser _ self new driver: driver. parser startDocument. parser parseDocument. ^parser! Item was changed: ----- Method: SAXDriver>>splitName:into: (in category 'namespaces') ----- splitName: aName into: twoArgsBlock "Split the name into namespace and local name (the block arguments). Handle both qualified and unqualified names using the default name space" | i ns ln | + i := aName lastIndexOf: $:. - i _ aName lastIndexOf: $:. i = 0 ifTrue: [ ns := nil. ln := aName] ifFalse: [ ns := aName copyFrom: 1 to: (i - 1). ln := aName copyFrom: i+1 to: aName size]. twoArgsBlock value: ns value: ln! Item was changed: ----- Method: XMLTokenizer>>skipSeparators (in category 'streaming') ----- skipSeparators | nextChar | + [((nextChar := self peek) ~~ nil) + and: [SeparatorTable includes: nextChar]] + whileTrue: [self next]. - [((nextChar _ self peek) == nil) - or: [SeparatorTable at: nextChar asciiValue+1]] - whileFalse: [self next]. (nestedStreams == nil or: [self atEnd not]) ifFalse: [ self checkNestedStream. self skipSeparators]! Item was changed: ----- Method: XMLTokenizer>>skipUpTo: (in category 'streaming') ----- skipUpTo: delimiter | nextChar | self unpeek. + [self atEnd or: [(nextChar := self next) == delimiter]] - [self atEnd or: [(nextChar _ self next) == delimiter]] whileFalse: []. nextChar == delimiter ifFalse: [self parseError: 'XML no delimiting ' , delimiter printString , ' found'] ! Item was changed: ----- Method: XMLTokenizer>>unpeek (in category 'streaming') ----- unpeek + "Fixed to use nested stream since multi-byte streams + do not properly override pushBack: to deal with multi-byte + characters." + + peekChar ifNotNil: [self pushBack: '']! - peekChar - ifNotNil: [ - self stream pushBack: (String with: peekChar). - peekChar _ nil]! Item was added: + ----- Method: XMLWriter>>outdent (in category 'private') ----- + outdent + currentIndent + ifNotNil: [ + stream cr. + currentIndent := currentIndent-1. + self writeIndent. + currentIndent := currentIndent-1.]! Item was changed: ----- Method: SAXHandler>>initialize (in category 'initialize') ----- initialize + eod := false! - eod _ false! Item was added: + ----- Method: XMLParserTest>>testParsing (in category 'tests') ----- + testParsing + | xmlDocument root firstPerson numberOfPersons | + "self debug: #testParsing" + + xmlDocument := XMLDOMParser parseDocumentFrom: self addressBookXML readStream. + self assert: (xmlDocument isKindOf: XMLDocument). + root := xmlDocument root. + self assert: (root class == XMLElement). + + "the tag has to be a symbol!!" + self assert: (root firstTagNamed: 'person') isNil. + self assert: (root firstTagNamed: 'addressbook') isNil. + + self assert: (root firstTagNamed: #addressbook) == root. + + numberOfPersons := 0. + root tagsNamed: #person do: [:p | numberOfPersons := numberOfPersons + 1]. + self assert: numberOfPersons = 4. + + firstPerson := root firstTagNamed: #person. + self assert: (firstPerson attributeAt: #'employee-number') = 'A0000'. + self assert: (firstPerson attributeAt: #'family-name') = 'Gates'. + self assert: (firstPerson attributeAt: #'first-name') = 'Bob'.! Item was changed: ----- Method: XMLTokenizer class>>exampleAddressBook (in category 'examples') ----- exampleAddressBook | tokenizer | "XMLTokenizer exampleAddressBook" + tokenizer := XMLTokenizer on: self addressBookXML readStream. - tokenizer _ XMLTokenizer on: self addressBookXML readStream. [tokenizer next notNil] whileTrue: []! Item was changed: ----- Method: XMLTokenizer>>nextPI (in category 'tokenizing') ----- nextPI | piTarget piData | "Skip ?" self next. + piTarget := self nextLiteral. - piTarget _ self nextLiteral. piTarget asUppercase = 'XML' ifTrue: [^self nextXMLDecl]. self skipSeparators. + piData := self nextUpToAll: '?>'. - piData _ self nextUpToAll: '?>'. self handlePI: piTarget data: piData! Item was changed: ----- Method: XMLTokenizer>>nextEntityDeclaration (in category 'tokenizing dtd') ----- nextEntityDeclaration | entityName entityDef referenceClass reference | self skipSeparators. + referenceClass := self peek == $% - referenceClass _ self peek == $% ifTrue: [ self next. self skipSeparators. DTDParameterEntityDeclaration] ifFalse: [DTDEntityDeclaration]. + entityName := self nextLiteral. - entityName _ self nextLiteral. self skipSeparators. + entityDef := (self peek == $" or: [self peek == $']) - entityDef _ (self peek == $" or: [self peek == $']) ifTrue: [self nextEntityValue] ifFalse: [self nextExternalId]. self skipUpTo: $>. + reference := referenceClass name: entityName value: entityDef. - reference _ referenceClass name: entityName value: entityDef. reference registerIn: self. ^reference! Item was changed: ----- Method: SAXHandler class>>on: (in category 'instance creation') ----- on: aStream | driver parser | + driver := SAXDriver on: aStream. - driver _ SAXDriver on: aStream. driver validating: true. + parser := self new driver: driver. - parser _ self new driver: driver. ^parser! Item was changed: XMLNodeWithElements subclass: #XMLElement + instanceVariableNames: 'name attributes' - instanceVariableNames: 'name contents attributes' classVariableNames: '' poolDictionaries: '' category: 'XML-Parser'! Item was added: + ----- Method: XMLWriter>>indent (in category 'private') ----- + indent + currentIndent + ifNotNil: [currentIndent := currentIndent +1]! Item was added: + ----- Method: String>>applyLanguageInfomation: (in category '*xml-parser') ----- + applyLanguageInfomation: languageEnvironment + + | leadingChar | + leadingChar := languageEnvironment leadingChar. + self withIndexDo: [:each :idx | + each asciiValue > 255 + ifTrue: [self at: idx put: (Character leadingChar: leadingChar code: each asUnicode)]]! Item was changed: ----- Method: XMLStringNode>>string: (in category 'accessing') ----- string: aString + string := aString! - string _ aString! Item was changed: ----- Method: XMLTokenizer>>nextName (in category 'tokenizing') ----- nextName | nextChar | nameBuffer reset. self peek == $. ifTrue: [self malformedError: 'Character expected.']. + [(nextChar := self peek) - [(nextChar _ self peek) ifNil: [self errorExpected: 'Character expected.']. + NameDelimiters includes: nextChar] whileFalse: [ - NameDelimiters at: nextChar asciiValue] whileFalse: [ nameBuffer nextPut: self next]. + ^self fastStreamStringContents: nameBuffer! - ^self fastStreamStringContents: nameBuffer - " ^nameBuffer contents"! Item was changed: ----- Method: XMLNodeWithElements>>addElement: (in category 'accessing') ----- addElement: element + self elementsAndContents add: element! - self elements add: element! Item was changed: ----- Method: DTDEntityDeclaration>>name: (in category 'accessing') ----- name: aString + name := aString asSymbol! - name _ aString asSymbol! Item was added: + ----- Method: XMLTokenizer>>stream:upToAll: (in category 'streaming') ----- + stream: aStream upToAll: aCollection + "Answer a subcollection from the current access position to the occurrence (not inclusive) of aCollection. If aCollection is not in the stream, answer nil." + + | startPos endMatch result | + startPos := aStream position. + (aStream match: aCollection) + ifTrue: [endMatch := aStream position. + aStream position: startPos. + result := aStream next: endMatch - startPos - aCollection size. + aStream position: endMatch. + ^ result] + ifFalse: [ + aStream position: startPos. + ^nil]! Item was changed: ----- Method: XMLTokenizer>>next (in category 'streaming') ----- next "Return the next character from the current input stream. If the current stream is at end pop to next nesting level if there is one. Due to the potential nesting of original document, included documents and replacment texts the streams are held in a stack representing the nested streams. The current stream is the top one." | nextChar | peekChar ifNil: [ nestedStreams ifNotNil: [self checkNestedStream]. + ^nextChar := stream next] - ^nextChar _ stream next] ifNotNil: [ + nextChar := peekChar. + peekChar := nil. - nextChar _ peekChar. - peekChar _ nil. ^nextChar]. ! Item was changed: ----- Method: XMLTokenizer>>entities (in category 'entities') ----- entities + entities ifNil: [entities := self initEntities]. - entities ifNil: [entities _ self initEntities]. ^entities! Item was changed: ----- Method: XMLDOMParser>>endElement:namespace:namespaceURI:qualifiedName: (in category 'content') ----- endElement: localName namespace: namespace namespaceURI: uri qualifiedName: qualifiedName | currentElement | + currentElement := self pop. - currentElement _ self pop. (currentElement namespace isNil or: [currentElement namespace = self defaultNamespace]) ifTrue: [ currentElement localName = localName ifFalse: [self driver errorExpected: 'End tag "', localName , '" doesn''t match "' , currentElement localName , '".']] ifFalse: [ currentElement qualifiedName = qualifiedName ifFalse: [self driver errorExpected: 'End tag "', qualifiedName , '" doesn''t match "' , currentElement qualifiedName , '".']]! Item was changed: ----- Method: XMLWriter>>endTag (in category 'writing xml') ----- endTag self stream nextPutAll: '>'. + self indent. "self canonical ifFalse: [self stream space]"! Item was changed: ----- Method: XMLTokenizer>>initialize (in category 'initialize') ----- initialize + parsingMarkup := false. + validating := false. + attributeBuffer := WriteStream on: (String new: 128). + nameBuffer := WriteStream on: (String new: 128)! - parsingMarkup _ false. - validating _ false. - attributeBuffer _ WriteStream on: (String new: 128). - nameBuffer _ WriteStream on: (String new: 128)! Item was changed: ----- Method: XMLTokenizer>>checkAndExpandReference: (in category 'tokenizing') ----- checkAndExpandReference: parsingContext | referenceString nextChar | + nextChar := self peek. - nextChar _ self peek. self validating ifFalse: [^nil]. nextChar == $& ifTrue: [ self next. self peek == $# ifTrue: [^self pushStream: (ReadStream on: self nextCharReference asString)]. + referenceString := self nextLiteral. - referenceString _ self nextLiteral. self next == $; ifFalse: [self errorExpected: ';']. self handleEntity: referenceString in: parsingContext ] ifFalse: [ ((nextChar == $% and: [self parsingMarkup]) and: [parsingContext == #entityValue]) ifTrue: [ self skipSeparators. + referenceString := self nextLiteral. - referenceString _ self nextLiteral. self handleEntity: referenceString in: parsingContext]]. self atEnd ifTrue: [self errorExpected: 'Character expected.']. ^nextChar! Item was changed: ----- Method: XMLElement>>printXMLOn: (in category 'printing') ----- printXMLOn: writer writer startElement: self name attributeList: self attributes. (writer canonical not and: [self isEmpty and: [self attributes isEmpty not]]) ifTrue: [writer endEmptyTag: self name] ifFalse: [ writer endTag. + self elementsAndContentsDo: [:content | content printXMLOn: writer]. - self contentsDo: [:content | content printXMLOn: writer]. - super printXMLOn: writer. writer endTag: self name]! Item was changed: ----- Method: XMLWriter>>popTag: (in category 'private') ----- popTag: tagName | stackTop | + stackTop := self stack isEmpty - stackTop _ self stack isEmpty ifTrue: ['<empty>'] ifFalse: [self stack last]. ^stackTop = tagName ifTrue: [self stack removeLast] ifFalse: [self error: 'Closing tag "' , tagName , '" does not match "' , stackTop]! Item was added: + ----- Method: XMLTokenizer>>upToAll: (in category 'streaming') ----- + upToAll: delimitingString + "Answer a subcollection from the current access position to the occurrence (if any, but not inclusive) of delimitingString. If delimitingString is not in the stream, answer the entire rest of the stream." + + | result | + + self hasNestedStreams + ifFalse: [ + result := self stream: self stream upToAll: delimitingString. + result + ifNil: [self parseError: 'XML no delimiting ' , delimitingString printString , ' found']. + ^result]. + + result := self stream: self stream upToAll: delimitingString. + result + ifNotNil: [^result]. + result := String streamContents: [:resultStream | + resultStream nextPutAll: self stream upToEnd. + self atEnd + ifTrue: [self parseError: 'XML no delimiting ' , delimitingString printString , ' found']. + self stream position timesRepeat: [ + self atEnd + ifFalse: [ + resultStream nextPut: self next]]]. + self pushBack: result. + ^self upToAll: delimitingString! Item was changed: ----- Method: XMLDOMParser>>pop (in category 'private') ----- pop | oldTop | + oldTop := self stack removeLast. + entity := oldTop. - oldTop _ self stack removeLast. - entity _ oldTop. ^oldTop! Item was changed: ----- Method: XMLTokenizer>>nextExternalId (in category 'tokenizing dtd') ----- nextExternalId | extDefType systemId dir | + extDefType := self nextLiteral. - extDefType _ self nextLiteral. extDefType = 'PUBLIC' ifTrue: [ self skipSeparators. self nextPubidLiteral. self skipSeparators. self peek == $> ifFalse: [ + systemId := self nextSystemLiteral]]. - systemId _ self nextSystemLiteral]]. extDefType = 'SYSTEM' ifTrue: [ self skipSeparators. + systemId := self nextSystemLiteral]. - systemId _ self nextSystemLiteral]. systemId ifNil: [^nil]. "The rest of this method only applies if we're reading aFileStream" (self topStream isKindOf: FileStream) ifFalse: [^'']. + dir := self topStream directory. - dir _ self topStream directory. ^(dir fileExists: systemId) ifTrue: [(dir readOnlyFileNamed: systemId) contentsOfEntireFile] ifFalse: ['']! Item was changed: ----- Method: XMLDocument>>version: (in category 'accessing') ----- version: aString + version := aString! - version _ aString! Item was changed: ----- Method: XMLTokenizer>>stream: (in category 'private') ----- stream: newStream "Continue parsing from the new nested stream." + stream := newStream! - stream _ newStream! Item was changed: ----- Method: XMLTokenizer>>nextWhitespace (in category 'tokenizing') ----- nextWhitespace | nextChar resultStream resultString| + resultStream := (String new: 10) writeStream. + [((nextChar := self peek) ~~ nil) + and: [SeparatorTable includes: nextChar]] + whileTrue: [resultStream nextPut: nextChar. self next]. - resultStream _ (String new: 10) writeStream. - [((nextChar _ self peek) == nil) - or: [SeparatorTable at: nextChar asciiValue+1 ifAbsent:[true]]] - whileFalse: [resultStream nextPut: nextChar. self next]. (nestedStreams == nil or: [self atEnd not]) ifFalse: [self checkNestedStream. self nextWhitespace]. + resultString := resultStream contents. + resultString isEmpty ifFalse: [self handleWhitespace: resultString]. + ^resultString! - resultString _ resultStream contents. - resultString isEmpty ifFalse: [self handleWhitespace: resultString].! Item was added: + ----- Method: XMLElement>>allAttributes (in category 'accessing') ----- + allAttributes + ^ self attributes asOrderedCollection! Item was changed: ----- Method: XMLWriter>>stream: (in category 'accessing') ----- stream: aStream + stream := aStream! - stream _ aStream! Item was changed: ----- Method: DTDEntityDeclaration class>>initialize (in category 'class initialization') ----- initialize "DTDEntityDeclaration initialize" + contextBehavior := Dictionary new. - contextBehavior _ Dictionary new. contextBehavior at: #content put: #include ; at: #attributeValueContent put: #includedInLiteral ; at: #attributeValue put: #forbidden ; at: #entityValue put: #bypass ; at: #dtd put: #forbidden ! Item was added: + ----- Method: XMLElement>>elementsDo: (in category 'enumerating') ----- + elementsDo: aBlock + self elementsAndContentsDo: [:each | each isText ifFalse: [aBlock value: each]]! Item was changed: ----- Method: XMLTokenizer>>nextDocType (in category 'tokenizing dtd') ----- nextDocType | declType | + declType := self nextLiteral. - declType _ self nextLiteral. declType = 'DOCTYPE' ifTrue: [ self startParsingMarkup. ^self nextDocTypeDecl]. self errorExpected: 'markup declaration, not ' , declType printString! Item was added: + ----- Method: XMLParserTest>>addressBookXML (in category 'source') ----- + addressBookXML + ^'<addressbook> + <person employee-number="A0000" family-name="Gates" first-name="Bob"> + <contact-info><!!--Confidential--></contact-info> + <address city="Los Angeles" number="1239" state="CA" street="Pine Rd."/> + <job-info employee-type="Full-Time" is-manager="no" job-description="Manager"/> + <manager employee-number="A0000"/> + </person> + <person employee-number="A7000" family-name="Brown" + first-name="Robert" middle-initial="L."> + <contact-info> + <email address="[hidden email]"/> + <home-phone number="03-3987873"/> + </contact-info> + <address city="New York" number="344" state="NY" street="118 St."/> + <job-info employee-type="Full-Time" is-manager="yes" job-description="Group Leader"/> + <manager employee-number="A0000"/> + </person> + <person employee-number="A7890" family-name="DePaiva" + first-name="Kassie" middle-initial="W."> + <contact-info><!!-- Kassie''s agent phone: 03-987654 --></contact-info> + <address city="Los Angeles" number="1234" state="CA" street="Pine Rd."/> + <job-info employee-type="Full-Time" is-manager="no" job-description="Actor"/> + <manager employee-number="A0000"/> + <misc-info>One of the most talented actresses on Daytime. Kassie + plays the devious and beautiful Blair Cramer on ABC's + "One Life To Live."</misc-info> + </person> + <person employee-number="A7987" family-name="Smith" first-name="Joe"> + <contact-info> + <email address="[hidden email]"/> + <mobile-phone number="888-7657765"/> + <home-phone number="03-8767898"/> + <home-phone number="03-8767871"/> + </contact-info> + <address city="New York" number="12789" state="NY" street="W. 15th Ave."/> + <job-info employee-type="Part-Time" is-manager="no" job-description="Hacker"/> + <manager employee-number="A7000"/> + </person> + </addressbook> + '! Item was changed: XMLNode subclass: #XMLNodeWithElements + instanceVariableNames: 'elementsAndContents uri namespace parent' - instanceVariableNames: 'elements uri namespace' classVariableNames: '' poolDictionaries: '' category: 'XML-Parser'! Item was added: + ----- Method: XMLParserTest>>testParsingCharacters (in category 'tests') ----- + testParsingCharacters + | parser | + "This test is actually not that useful. This is not the proper way of using the parser. This test is here just for specification purpose" + "self debug: #testParsingCharacters" + + parser := XMLParser on: self addressBookXML readStream. + + self assert: parser next = $<. + self assert: parser next = $a. + self assert: parser next = $d. + self assert: parser next = $d. + self assert: parser next = $r.! Item was changed: ----- Method: XMLTokenizer>>nextCharReference (in category 'tokenizing') ----- nextCharReference | base charValue | self next == $# ifFalse: [self errorExpected: 'character reference']. + base := self peek == $x - base _ self peek == $x ifTrue: [ self next. 16] ifFalse: [10]. + charValue := [self readNumberBase: base] on: Error do: [:ex | self errorExpected: 'Number.']. - charValue _ [self readNumberBase: base] on: Error do: [:ex | self errorExpected: 'Number.']. (self next) == $; ifFalse: [self errorExpected: '";"']. ^Unicode value: charValue! Item was changed: ----- Method: XMLElement>>name: (in category 'initialize') ----- name: aString + name := aString asSymbol! - name _ aString asSymbol! Item was changed: ----- Method: XMLDOMParser>>initialize (in category 'initialize') ----- initialize super initialize. + stack := OrderedCollection new. + incremental := false! - stack _ OrderedCollection new. - incremental _ false! Item was changed: ----- Method: DTDExternalEntityDeclaration class>>initialize (in category 'class initialization') ----- initialize "DTDExternalEntityDeclaration initialize" + contextBehavior := Dictionary new. - contextBehavior _ Dictionary new. contextBehavior at: #content put: #include ; at: #attributeValueContent put: #includedInLiteral ; at: #attributeValue put: #forbidden ; at: #entityValue put: #bypass ; at: #dtd put: #forbidden ! Item was changed: ----- Method: XMLElement>>isEmpty (in category 'testing') ----- isEmpty + ^self elements isEmpty! - ^self elements isEmpty - and: [self contents isEmpty]! Item was added: + ----- Method: XMLParserTest>>testExampleAddressBookWithDTD (in category 'tests') ----- + testExampleAddressBookWithDTD + | tokenizer | + "XMLTokenizer exampleAddressBookWithDTD" + + tokenizer := XMLTokenizer on: self addressBookXMLWithDTD readStream. + self shouldnt: ([tokenizer next notNil] whileTrue: []) raise: Error! Item was changed: ----- Method: XMLDOMParser class>>parseDocumentFrom: (in category 'instance creation') ----- parseDocumentFrom: aStream + ^self parseDocumentFrom: aStream useNamespaces: false! - ^(super parseDocumentFrom: aStream) document! Item was changed: ----- Method: XMLDOMParser>>documentAttributes: (in category 'content') ----- documentAttributes: attributeList self document version: (attributeList at: 'version' ifAbsent: [nil]). self document encoding: (attributeList at: 'encoding' ifAbsent: [nil]). self document requiredMarkup: (attributeList at: 'requiredMarkup' ifAbsent: [nil]). ! Item was changed: ----- Method: XMLTokenizer>>nextEndTag (in category 'tokenizing') ----- nextEndTag + | tagName | - | string | "Skip /" self next. + tagName := self nextName. self skipSeparators. + (self nextTrimmedBlanksUpTo: $>) + ifNotEmpty: [self parseError: 'XML invalid end tag ' , tagName]. + self handleEndTag: tagName! - string _ self nextTrimmedBlanksUpTo: $>. - "string _ (self nextUpTo: $>) withBlanksTrimmed." - self handleEndTag: string! Item was added: + ----- Method: XMLElement>>parent (in category 'accessing') ----- + parent + ^ parent! Item was changed: ----- Method: XMLDocument>>encoding: (in category 'accessing') ----- encoding: aString + encoding := aString! - encoding _ aString! Item was changed: ----- Method: XMLDOMParser>>startElement:attributeList: (in category 'content') ----- startElement: elementName attributeList: attributeList | newElement | + newElement := XMLElement named: elementName attributes: attributeList. - newElement _ XMLElement named: elementName attributes: attributeList. self incremental ifFalse: [self stack isEmpty ifFalse: [self top addElement: newElement]]. self push: newElement! Item was changed: ----- Method: XMLTokenizer>>nextUpToAll: (in category 'streaming') ----- nextUpToAll: delimitingString | string | self unpeek. + string := self upToAll: delimitingString. + string + ifNil: [self parseError: 'XML no delimiting ' , delimitingString printString , ' found']. + ^string! - string _ self stream upToAll: delimitingString. - self stream skip: delimitingString size negated. - (self stream next: delimitingString size) = delimitingString - ifFalse: [self parseError: 'XML no delimiting ' , delimitingString printString , ' found']. - ^string - ! Item was changed: ----- Method: XMLNamespaceScope>>validateAttributes: (in category 'validation') ----- validateAttributes: attributeList "check all attribute namespaces are defined and not duplicated by aliasing" | namespace localName | attributeList keysDo: [:attrName | self splitName: attrName into: [:ns :ln | + namespace := ns. + localName := ln]. - namespace _ ns. - localName _ ln]. namespace ifNotNil: [ (self namespaceAliases: namespace) do: [:alias | (attributeList includesKey: alias , ':' , localName) ifTrue: [self parseError: 'Attributes ' , attrName , ' and ' , alias , ':' , localName , ' are aliased to namespace ' , (self namespaceURIOf: namespace) ]]]]! Item was changed: ----- Method: SAXHandler class>>parserOnFileNamed:readIntoMemory: (in category 'instance creation') ----- parserOnFileNamed: fileName readIntoMemory: readIntoMemory | stream | + stream := FileDirectory default readOnlyFileNamed: fileName. - stream _ FileDirectory default readOnlyFileNamed: fileName. readIntoMemory + ifTrue: [stream := stream contentsOfEntireFile readStream]. - ifTrue: [stream _ stream contentsOfEntireFile readStream]. ^self on: stream! Item was changed: ----- Method: XMLElement>>setAttributes: (in category 'initialize') ----- setAttributes: newAttributes + attributes := newAttributes! - attributes _ newAttributes! Item was added: + ----- Method: XMLNodeWithElements>>elementsAndContents (in category 'accessing') ----- + elementsAndContents + elementsAndContents ifNil: [elementsAndContents := OrderedCollection new]. + ^elementsAndContents! Item was changed: ----- Method: XMLTokenizer>>initEntities (in category 'entities') ----- initEntities | ents | + ents := Dictionary new. - ents _ Dictionary new. ents + at: 'amp' put: (DTDEntityDeclaration name: 'amp' value: '&'); + at: 'quot' put: (DTDEntityDeclaration name: 'quot' value: '"'); + at: 'apos' put: (DTDEntityDeclaration name: 'apos' value: ''''); + at: 'gt' put: (DTDEntityDeclaration name: 'gt' value: '>'); + at: 'lt' put: (DTDEntityDeclaration name: 'lt' value: '<'). - at: 'amp' put: (DTDEntityDeclaration name: 'amp' value: $&); - at: 'quot' put: (DTDEntityDeclaration name: 'quot' value: $"); - at: 'apos' put: (DTDEntityDeclaration name: 'apos' value: $'); - at: 'gt' put: (DTDEntityDeclaration name: 'gt' value: $>); - at: 'lt' put: (DTDEntityDeclaration name: 'lt' value: $<). ^ents! Item was changed: ----- Method: XMLDOMParser>>characters: (in category 'content') ----- characters: aString | newElement | + newElement := XMLStringNode string: aString. - newElement _ XMLStringNode string: aString. self top addContent: newElement. ! Item was added: + ----- Method: ByteString>>applyLanguageInfomation: (in category '*xml-parser') ----- + applyLanguageInfomation: languageEnvironment + ! Item was added: + ----- Method: XMLElement>>at: (in category 'accessing') ----- + at: aSymbol + ^ self attributeAt: aSymbol ! Item was changed: ----- Method: XMLDOMParser>>processingInstruction:data: (in category 'content') ----- processingInstruction: piName data: dataString | newElement | + newElement := XMLPI target: piName data: dataString. - newElement _ XMLPI target: piName data: dataString. self top addElement: newElement! Item was changed: ----- Method: XMLElement>>contentString (in category 'accessing') ----- contentString + | contentElements | + contentElements := self elementsAndContents. + ^(contentElements size > 0 + and: [contentElements first isText]) + ifTrue: [contentElements first string] - ^(self contents size == 1 - and: [self contents first isKindOf: XMLStringNode]) - ifTrue: [self contents first string] ifFalse: ['']! Item was added: + ----- Method: XMLTokenizer>>streamEncoding: (in category 'streaming') ----- + streamEncoding: encodingString + + | converterClass | + Smalltalk at: #TextConverter ifPresent: [:tc | + (stream respondsTo: #converter:) ifTrue: [ + converterClass := tc defaultConverterClassForEncoding: encodingString asLowercase. + converterClass ifNotNil: [stream converter: converterClass new]]]! Item was changed: ----- Method: XMLTokenizer>>nextPCData (in category 'tokenizing') ----- nextPCData | resultStream nextChar referenceString entity entityValue nextPeek | + resultStream := (String new: 10) writeStream. - resultStream _ (String new: 10) writeStream. self validating ifFalse: [ [self peek == $<] whileFalse: [resultStream nextPut: self next]. ^self handlePCData: resultStream contents]. [ + nextPeek := nextChar := self peek. - nextPeek _ nextChar _ self peek. nextChar ifNil: [self errorExpected: 'Character expected.']. nextChar == $& ifTrue: [ self next. self peek == $# ifTrue: [ + nextPeek := nil. + nextChar := self nextCharReference] - nextPeek _ nil. - nextChar _ self nextCharReference] ifFalse: [ + referenceString := self nextLiteral. - referenceString _ self nextLiteral. self next == $; ifFalse: [self errorExpected: ';']. + entity := self entity: referenceString. + entityValue := entity valueForContext: #content. - entity _ self entity: referenceString. - entityValue _ entity valueForContext: #content. (self class isCharEscape: entityValue) ifTrue: [ + nextPeek := nil. + nextChar := entityValue first] - nextPeek _ nil. - nextChar _ entityValue] ifFalse: [ + entityValue := entityValue asString. - entityValue _ entityValue asString. entityValue isEmpty + ifTrue: [nextPeek := nextChar := nil] - ifTrue: [nextPeek _ nextChar _ nil] ifFalse: [ self pushStream: (ReadStream on: entityValue asString). + nextPeek := nextChar := self peek]]]] - nextPeek _ nextChar _ self peek]]]] ifFalse: [nextPeek == $< ifFalse: [self next]]. nextPeek == $<] whileFalse: [ nextChar ifNotNil: [resultStream nextPut: nextChar]]. self handlePCData: resultStream contents! Item was changed: ----- Method: XMLTokenizer>>nextAttributeInto:namespaces: (in category 'tokenizing') ----- nextAttributeInto: attributes namespaces: namespaces | attrName attrValue | + attrName := self nextName. - attrName _ self nextName. self skipSeparators. self next == $= ifFalse: [self errorExpected: '=']. self skipSeparators. + attrValue := self nextAttributeValue. - attrValue _ self nextAttributeValue. (self usesNamespaces and: [(attrName findString: 'xmlns') = 1]) ifTrue: [attrName size > 6 ifTrue: [namespaces at: (attrName copyFrom: 7 to: attrName size) put: attrValue] ifFalse: [namespaces at: attrName put: attrValue]] ifFalse: [attributes at: attrName put: attrValue]! Item was changed: ----- Method: XMLDOMParser>>push: (in category 'private') ----- push: anObject self stack add: anObject. + entity := anObject - entity _ anObject ! Item was changed: ----- Method: XMLTokenizer>>nextIncludeSection: (in category 'tokenizing') ----- nextIncludeSection: parseSection | section | "Read the file up to the next include section delimiter and parse it if parseSection is true" + section := self nextUpToAll: ']]>'. - section _ self nextUpToAll: ']]>'. parseSection ifTrue: [ self pushStream: (ReadStream on: section)]! Item was added: + ----- Method: XMLParserTest>>addressBookXMLWithDTD (in category 'source') ----- + addressBookXMLWithDTD + ^'<?xml version="1.0" encoding="UTF-8"?> + <!!DOCTYPE addressbook SYSTEM "addressbook.dtd"> + <?xml-stylesheet type="text/xsl" href="demo.xsl"?> + <addressbook> + <person employee-number="A0000" family-name="Gates" first-name="Bob"> + <contact-info><!!--Confidential--></contact-info> + <address city="Los Angeles" number="1239" state="CA" street="Pine Rd."/> + <job-info employee-type="Full-Time" is-manager="no" job-description="Manager"/> + <manager employee-number="A0000"/> + </person> + <person employee-number="A7000" family-name="Brown" + first-name="Robert" middle-initial="L."> + <contact-info> + <email address="[hidden email]"/> + <home-phone number="03-3987873"/> + </contact-info> + <address city="New York" number="344" state="NY" street="118 St."/> + <job-info employee-type="Full-Time" is-manager="yes" job-description="Group Leader"/> + <manager employee-number="A0000"/> + </person> + <person employee-number="A7890" family-name="DePaiva" + first-name="Kassie" middle-initial="W."> + <contact-info><!!-- Kassie''s agent phone: 03-987654 --></contact-info> + <address city="Los Angeles" number="1234" state="CA" street="Pine Rd."/> + <job-info employee-type="Full-Time" is-manager="no" job-description="Actor"/> + <manager employee-number="A0000"/> + <misc-info>One of the most talented actresses on Daytime. Kassie + plays the devious and beautiful Blair Cramer on ABC's + "One Life To Live."</misc-info> + </person> + <person employee-number="A7987" family-name="Smith" first-name="Joe"> + <contact-info> + <email address="[hidden email]"/> + <mobile-phone number="888-7657765"/> + <home-phone number="03-8767898"/> + <home-phone number="03-8767871"/> + </contact-info> + <address city="New York" number="12789" state="NY" street="W. 15th Ave."/> + <job-info employee-type="Part-Time" is-manager="no" job-description="Hacker"/> + <manager employee-number="A7000"/> + </person> + </addressbook> + '! Item was changed: ----- Method: XMLPI>>target: (in category 'accessing') ----- target: aString + target := aString! - target _ aString! Item was changed: ----- Method: XMLTokenizer>>validating: (in category 'accessing') ----- validating: aBoolean + validating := aBoolean! - validating _ aBoolean! Item was changed: ----- Method: XMLDocument>>printCanonicalOn: (in category 'printing') ----- printCanonicalOn: aStream | writer | + writer := XMLWriter on: aStream. - writer _ XMLWriter on: aStream. writer canonical: true. self printXMLOn: writer! Item was changed: ----- Method: XMLTokenizer>>nextEntity (in category 'tokenizing') ----- nextEntity + "return the next XMLnode, or nil if there are no more. + Fixed to retain leading whitespace when PCDATA is detected." - "return the next XMLnode, or nil if there are no more" + |whitespace| "branch, depending on what the first character is" + whitespace := self nextWhitespace. - self nextWhitespace. self atEnd ifTrue: [self handleEndDocument. ^ nil]. self checkAndExpandReference: (self parsingMarkup ifTrue: [#dtd] ifFalse: [#content]). ^self peek = $< ifTrue: [self nextNode] + ifFalse: [whitespace isEmpty + ifFalse: [self pushBack: whitespace]. + self nextPCData]! - ifFalse: [self nextPCData]! Item was changed: ----- Method: DTDEntityDeclaration>>ndata: (in category 'accessing') ----- ndata: aString + ndata := aString! - ndata _ aString! Item was changed: ----- Method: XMLTokenizer>>nextComment (in category 'tokenizing') ----- nextComment | string | "Skip first -" self next. self next == $- ifFalse: [self errorExpected: 'second comment $-']. + string := self nextUpToAll: '-->'. - string _ self nextUpToAll: '-->'. self handleComment: string! Item was changed: ----- Method: XMLNamespaceScope>>namespaceAliases: (in category 'private') ----- namespaceAliases: namespace "Locate all namespaces that are aliases of the given URI." | aliases uri | + aliases := Set new. + uri := self namespaceURIOf: namespace ifAbsent: [self parseError: 'Attribute refers to undefined namespace ' , namespace asString ]. - aliases _ Set new. - uri _ self namespaceURIOf: namespace ifAbsent: [self parseError: 'Attribute refers to undefined namespace ' , namespace asString ]. currentBindings keysAndValuesDo: [:ns :u | (u = uri and: [ns ~= namespace]) ifTrue: [aliases add: ns]]. ^ aliases! Item was changed: ----- Method: XMLNode>>firstTagNamed:with: (in category 'searching') ----- firstTagNamed: aSymbol with: aBlock "Return the first encountered node with the specified tag that allows the block to evaluate to true. Pass the message on" | answer | self elementsDo: [:node | + (answer := node firstTagNamed: aSymbol with: aBlock) ifNotNil: [^answer]]. - (answer _ node firstTagNamed: aSymbol with: aBlock) ifNotNil: [^answer]]. ^nil! Item was changed: XMLTokenizer subclass: #XMLParser instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'XML-Parser'! + + !XMLParser commentStamp: 'Alexandre.Bergel 6/1/2009 10:03' prior: 0! + This is a generic parser. + There is two ways to parse XML files, either using SAX, or using DOM. Both ways are supported in the XML-Parser package. SAX is event-based while DOM is tree-based. Ask google for more information! Item was changed: ----- Method: XMLTokenizer>>nextTrimmedBlanksUpTo: (in category 'streaming') ----- nextTrimmedBlanksUpTo: delimiter | resultStream nextChar | + resultStream := WriteStream on: (String new: 10). + nextChar := nil. + [(nextChar := self next) == delimiter] + whileFalse: [ + nextChar == $ ifFalse: [ + resultStream nextPut: nextChar]]. - resultStream _ WriteStream on: (String new: 10). - nextChar _ nil. - [peekChar _ self peek. - peekChar - ifNotNil: [ - [peekChar == $ - and: [nextChar == $ ]] - whileTrue: [peekChar _ self next]]. - (nextChar _ self next) == delimiter] - whileFalse: [resultStream nextPut: nextChar]. nextChar == delimiter ifFalse: [self parseError: 'XML no delimiting ' , delimiter printString , ' found']. ^resultStream contents ! Item was added: + ----- Method: XMLDOMParser class>>parseDocumentFrom:useNamespaces: (in category 'instance creation') ----- + parseDocumentFrom: aStream useNamespaces: aBoolean + ^(super parseDocumentFrom: aStream useNamespaces: aBoolean) document! Item was changed: ----- Method: XMLTokenizer class>>addressBookXMLWithDTD (in category 'examples') ----- addressBookXMLWithDTD ^'<?xml version="1.0" encoding="UTF-8"?> <!!DOCTYPE addressbook SYSTEM "addressbook.dtd"> + <?xml-stylesheet type="text/xsl" href="demo.xsl"?> <addressbook> <person employee-number="A0000" family-name="Gates" first-name="Bob"> <contact-info><!!--Confidential--></contact-info> <address city="Los Angeles" number="1239" state="CA" street="Pine Rd."/> <job-info employee-type="Full-Time" is-manager="no" job-description="Manager"/> <manager employee-number="A0000"/> </person> <person employee-number="A7000" family-name="Brown" first-name="Robert" middle-initial="L."> <contact-info> <email address="[hidden email]"/> <home-phone number="03-3987873"/> </contact-info> <address city="New York" number="344" state="NY" street="118 St."/> <job-info employee-type="Full-Time" is-manager="yes" job-description="Group Leader"/> <manager employee-number="A0000"/> </person> <person employee-number="A7890" family-name="DePaiva" first-name="Kassie" middle-initial="W."> <contact-info><!!-- Kassie''s agent phone: 03-987654 --></contact-info> <address city="Los Angeles" number="1234" state="CA" street="Pine Rd."/> <job-info employee-type="Full-Time" is-manager="no" job-description="Actor"/> <manager employee-number="A0000"/> <misc-info>One of the most talented actresses on Daytime. Kassie plays the devious and beautiful Blair Cramer on ABC's "One Life To Live."</misc-info> </person> <person employee-number="A7987" family-name="Smith" first-name="Joe"> <contact-info> <email address="[hidden email]"/> <mobile-phone number="888-7657765"/> <home-phone number="03-8767898"/> <home-phone number="03-8767871"/> </contact-info> <address city="New York" number="12789" state="NY" street="W. 15th Ave."/> <job-info employee-type="Part-Time" is-manager="no" job-description="Hacker"/> <manager employee-number="A7000"/> </person> </addressbook> '! Item was changed: ----- Method: XMLDOMParser>>startElement:namespaceURI:namespace:attributeList: (in category 'content') ----- startElement: localName namespaceURI: namespaceUri namespace: namespace attributeList: attributeList | newElement | + "newElement := namespace = self defaultNamespace - "newElement _ namespace = self defaultNamespace ifTrue: [XMLElement named: localName namespace: nil uri: nil attributes: attributeList] ifFalse: [XMLElement named: localName namespace: namespace uri: namespaceUri attributes: attributeList]." + newElement := XMLElement named: localName namespace: namespace uri: namespaceUri attributes: attributeList. - newElement _ XMLElement named: localName namespace: namespace uri: namespaceUri attributes: attributeList. self incremental ifFalse: [self stack isEmpty ifFalse: [self top addElement: newElement]]. self push: newElement! Item was changed: ----- Method: XMLTokenizer>>startParsingMarkup (in category 'private') ----- startParsingMarkup + parsingMarkup := true! - parsingMarkup _ true! Item was changed: ----- Method: XMLWriter>>initialize (in category 'initialize') ----- initialize + stack := OrderedCollection new. + canonical := false. - stack _ OrderedCollection new. - canonical _ false. scope := XMLNamespaceScope new! Item was changed: ----- Method: SAXDriver>>saxHandler: (in category 'accessing') ----- saxHandler: aHandler + saxHandler := aHandler! - saxHandler _ aHandler! Item was changed: ----- Method: XMLElement>>contents (in category 'accessing') ----- contents + ^self elementsAndContents select: [:each | each isText]! - contents ifNil: [contents _ OrderedCollection new]. - ^contents! Item was changed: ----- Method: XMLTokenizer>>externalEntities (in category 'entities') ----- externalEntities + externalEntities ifNil: [externalEntities := Dictionary new]. - externalEntities ifNil: [externalEntities _ Dictionary new]. ^externalEntities! Item was changed: ----- Method: XMLDocument>>dtd: (in category 'accessing') ----- dtd: aDTD + dtd := aDTD! - dtd _ aDTD! Item was changed: ----- Method: SAXDriver>>handleStartTag:attributes:namespaces: (in category 'handling tokens') ----- handleStartTag: elementName attributes: attributeList namespaces: namespaces | localName namespace namespaceURI | + (attributeList includesKey: 'xml:lang') + ifTrue: [languageEnvironment := LanguageEnvironment localeID: (LocaleID isoString: (attributeList at: 'xml:lang'))]. self usesNamespaces ifTrue: [ self scope enterScope. "declare any namespaces" namespaces keysAndValuesDo: [:ns :uri | self scope declareNamespace: ns uri: uri]. self splitName: elementName into: [:ns :ln | + namespace := ns. + localName := ln]. - namespace _ ns. - localName _ ln]. "ensure our namespace is defined" namespace ifNil: [namespace := self scope defaultNamespace] ifNotNil: [ namespaceURI := self scope namespaceURIOf: namespace. namespaceURI ifNil: [self parseError: 'Start tag ' , elementName , ' refers to undefined namespace ' , namespace asString]]. self validatesAttributes ifTrue: [self scope validateAttributes: attributeList]. "call the handler" self saxHandler checkEOD; startElement: localName namespaceURI: namespaceURI namespace: namespace attributeList: attributeList] ifFalse: [ "call the handler" self saxHandler checkEOD; startElement: elementName namespaceURI: nil namespace: nil attributeList: attributeList]! Item was changed: ----- Method: XMLTokenizer>>fastStreamStringContents: (in category 'private') ----- fastStreamStringContents: writeStream | newSize | + newSize := writeStream position. - newSize _ writeStream position. ^(String new: newSize) replaceFrom: 1 to: newSize with: writeStream originalContents startingAt: 1! Item was changed: ----- Method: XMLTokenizer>>nextXMLDecl (in category 'tokenizing') ----- nextXMLDecl | attributes nextChar namespaces | self skipSeparators. + attributes := Dictionary new. + namespaces := Dictionary new. + [(nextChar := self peek) == $?] whileFalse: [ - attributes _ Dictionary new. - namespaces _ Dictionary new. - [(nextChar _ self peek) == $?] whileFalse: [ self nextAttributeInto: attributes namespaces: namespaces. self skipSeparators.]. self next. self next == $> ifFalse: [self errorExpected: '> expected.']. + (attributes includesKey: 'encoding') ifTrue: [self streamEncoding: (attributes at: 'encoding')]. + self handleXMLDecl: attributes namespaces: namespaces + ! - self handleXMLDecl: attributes namespaces: namespaces! Item was changed: ----- Method: XMLTokenizer class>>exampleAddressBookWithDTD (in category 'examples') ----- exampleAddressBookWithDTD | tokenizer | "XMLTokenizer exampleAddressBookWithDTD" + tokenizer := XMLTokenizer on: self addressBookXMLWithDTD readStream. - tokenizer _ XMLTokenizer on: self addressBookXMLWithDTD readStream. [tokenizer next notNil] whileTrue: []! Item was added: + ----- Method: XMLElement>>@ (in category 'accessing') ----- + @ aSymbol + "shorthand form" + ^ self at: aSymbol ! Item was changed: ----- Method: XMLTokenizer>>nextAttributeValue (in category 'tokenizing') ----- nextAttributeValue | delimiterChar attributeValueStream nextChar nextPeek referenceString entity entityValue | + delimiterChar := self next. - delimiterChar _ self next. (delimiterChar == $" or: [delimiterChar == $']) ifFalse: [self errorExpected: 'Attribute value delimiter expected.']. + attributeValueStream := attributeBuffer reset. - attributeValueStream _ attributeBuffer reset. [ + nextPeek := nextChar := self next. - nextPeek _ nextChar _ self next. nextChar ifNil: [self errorExpected: 'Character expected.']. nextChar == $& ifTrue: [ self peek == $# ifTrue: [ + nextPeek := nil. + nextChar := self nextCharReference] - nextPeek _ nil. - nextChar _ self nextCharReference] ifFalse: [ + referenceString := self nextLiteral. - referenceString _ self nextLiteral. self next == $; ifFalse: [self errorExpected: ';']. + entity := self entity: referenceString. + entityValue := entity valueForContext: #content. - entity _ self entity: referenceString. - entityValue _ entity valueForContext: #content. (self class isCharEscape: entityValue) ifTrue: [ + nextPeek := nil. + nextChar := entityValue first] - nextPeek _ nil. - nextChar _ entityValue] ifFalse: [ + entityValue := entityValue asString. - entityValue _ entityValue asString. entityValue isEmpty + ifTrue: [nextPeek := nextChar := nil] - ifTrue: [nextPeek _ nextChar _ nil] ifFalse: [ self pushStream: (ReadStream on: entityValue asString). + nextPeek := nextChar := self next]]]]. - nextPeek _ nextChar _ self next]]]]. nextPeek == delimiterChar] whileFalse: [ nextChar ifNotNil: [attributeValueStream nextPut: nextChar]]. ^self fastStreamStringContents: attributeValueStream " ^attributeValueStream contents"! Item was changed: ----- Method: SAXDriver>>handlePCData: (in category 'handling tokens') ----- handlePCData: aString + self languageEnvironment + ifNotNil: [aString applyLanguageInfomation: self languageEnvironment]. self saxHandler checkEOD; characters: aString! Item was changed: ----- Method: SAXHandler>>driver: (in category 'accessing') ----- driver: aDriver + driver := aDriver. - driver _ aDriver. driver saxHandler: self! Item was changed: ----- Method: DTDParameterEntityDeclaration class>>initialize (in category 'class initialization') ----- initialize "DTDParameterEntityDeclaration initialize" + contextBehavior := Dictionary new. - contextBehavior _ Dictionary new. contextBehavior at: #content put: #notRecognized: ; at: #attributeValueContent put: #notRecognized: ; at: #attributeValue put: #notRecognized: ; at: #entityValue put: #include: ; at: #dtd put: #includePE:! Item was changed: ----- Method: XMLDOMParser>>nextEntity (in category 'parsing') ----- nextEntity | currentTop | + currentTop := self top. - currentTop _ self top. [self driver nextEntity isNil or: [self top ~~ currentTop]] whileTrue. ^entity! Item was changed: ----- Method: XMLTokenizer>>nextUpTo: (in category 'streaming') ----- nextUpTo: delimiter | resultStream nextChar | + resultStream := WriteStream on: (String new: 10). + [self atEnd or: [(nextChar := self next) == delimiter]] - resultStream _ WriteStream on: (String new: 10). - [self atEnd or: [(nextChar _ self next) == delimiter]] whileFalse: [resultStream nextPut: nextChar]. nextChar == delimiter ifFalse: [self parseError: 'XML no delimiting ' , delimiter printString , ' found']. ^resultStream contents ! Item was changed: ----- Method: XMLTokenizer>>peek (in category 'streaming') ----- peek "Return the next character from the current input stream. If the current stream poop to next nesting level if there is one. Due to the potential nesting of original document, included documents and replacment texts the streams are held in a stack representing the nested streams. The current stream is the top one." peekChar ifNil: [ nestedStreams ifNotNil: [self checkNestedStream]. + ^peekChar := stream next] - ^peekChar _ stream next] ifNotNil: [^peekChar]! Item was changed: ----- Method: DTDEntityDeclaration>>value: (in category 'accessing') ----- value: aString + value := aString! - value _ aString! Item was changed: ----- Method: XMLDocument>>requiredMarkup: (in category 'accessing') ----- requiredMarkup: aString + requiredMarkup := aString! - requiredMarkup _ aString! Item was changed: Object subclass: #XMLWriter + instanceVariableNames: 'stream stack scope scanner canonical currentIndent indentString' - instanceVariableNames: 'stream stack scope scanner canonical' classVariableNames: 'XMLTranslation XMLTranslationMap' poolDictionaries: '' category: 'XML-Parser'! Item was changed: ----- Method: XMLTokenizer>>parameterEntities (in category 'entities') ----- parameterEntities + parameterEntities ifNil: [parameterEntities := Dictionary new]. - parameterEntities ifNil: [parameterEntities _ Dictionary new]. ^parameterEntities! Item was added: + ----- Method: XMLElement>>parent: (in category 'accessing') ----- + parent: anXMLElement + parent := anXMLElement ! Item was added: + ----- Method: XMLDocument>>root (in category 'accessing') ----- + root + "return my root element" + ^ self topElement ! Item was changed: ----- Method: XMLDOMParser class>>addressBookXMLWithDTD (in category 'examples') ----- addressBookXMLWithDTD "XMLDOMParser addressBookXMLWithDTD" + ^self parseDocumentFrom: XMLTokenizer addressBookXMLWithDTD readStream useNamespaces: true! - ^self parseDocumentFrom: XMLTokenizer addressBookXMLWithDTD readStream! Item was changed: ----- Method: XMLNodeWithElements>>elementUnqualifiedAt:ifAbsent: (in category 'accessing') ----- elementUnqualifiedAt: entityName ifAbsent: aBlock + elementsAndContents - elements ifNil: [^aBlock value]. ^self elements detect: [:each | each localName = entityName] ifNone: [^aBlock value]! Item was changed: XMLTokenizer subclass: #SAXDriver + instanceVariableNames: 'saxHandler scope useNamespaces validateAttributes languageEnvironment' - instanceVariableNames: 'saxHandler scope useNamespaces validateAttributes' classVariableNames: '' poolDictionaries: '' category: 'XML-Parser'! Item was changed: ----- Method: SAXHandler class>>parseDocumentFrom: (in category 'instance creation') ----- parseDocumentFrom: aStream + ^self parseDocumentFrom: aStream useNamespaces: false! - | parser | - parser _ self on: aStream. - parser startDocument. - parser parseDocument. - ^parser! Item was changed: ----- Method: XMLNodeWithElements>>elements (in category 'accessing') ----- elements + ^self elementsAndContents! - elements ifNil: [elements _ OrderedCollection new]. - ^elements! Item was changed: ----- Method: XMLWriter>>startTag: (in category 'writing xml') ----- startTag: tagName + self writeIndent. self startTag: tagName xmlns: nil! Item was changed: ----- Method: XMLTokenizer>>nextCDataOrConditional (in category 'tokenizing') ----- nextCDataOrConditional | nextChar conditionalKeyword | "Skip [" self next. self skipSeparators. + nextChar := self peek. - nextChar _ self peek. nextChar == $% ifTrue: [ self checkAndExpandReference: (self parsingMarkup ifTrue: [#dtd] ifFalse: [#content]). + conditionalKeyword := self nextLiteral. - conditionalKeyword _ self nextLiteral. self skipSeparators. ^self next == $[ ifTrue: [ self skipSeparators. self nextIncludeSection: (self conditionalInclude: conditionalKeyword)] ifFalse: [self errorExpected: '[' ]]. nextChar == $C ifTrue: [ ^self nextLiteral = 'CDATA' ifTrue: [self peek == $[ ifTrue: [self nextCDataContent] ifFalse: [self errorExpected: '[' ]] ifFalse: [self errorExpected: 'CData']]. self errorExpected: 'CData or declaration' ! Item was added: + ----- Method: XMLTokenizer>>match:into: (in category 'streaming') ----- + match: subCollection into: resultStream + "Set the access position of the receiver to be past the next occurrence of the subCollection. Answer whether subCollection is found. No wildcards, and case does matter." + + | pattern startMatch | + pattern _ ReadStream on: subCollection. + startMatch _ nil. + [pattern atEnd] whileFalse: + [self atEnd ifTrue: [^ false]. + (self next) = (pattern next) + ifTrue: [pattern position = 1 ifTrue: [startMatch _ self position]] + ifFalse: [pattern position: 0. + startMatch ifNotNil: [ + self position: startMatch. + startMatch _ nil]]]. + ^ true + + ! Item was added: + ----- Method: SAXDriver>>languageEnvironment (in category 'accessing') ----- + languageEnvironment + ^languageEnvironment! Item was changed: ----- Method: XMLNode>>firstTagNamed: (in category 'searching') ----- firstTagNamed: aSymbol "Return the first encountered node with the specified tag. Pass the message on" | answer | + self elementsDo: [:node | (answer := node firstTagNamed: aSymbol) ifNotNil: [^answer]]. - self elementsDo: [:node | (answer _ node firstTagNamed: aSymbol) ifNotNil: [^answer]]. ^nil! Item was changed: ----- Method: XMLTokenizer>>nextMarkupDeclaration (in category 'tokenizing dtd') ----- nextMarkupDeclaration | declType | + declType := self nextLiteral. - declType _ self nextLiteral. self validating ifFalse: [^self skipMarkupDeclaration]. declType = 'ENTITY' ifTrue: [self nextEntityDeclaration] ifFalse: [self skipMarkupDeclaration]! Item was changed: ----- Method: XMLTokenizer>>nextCDataContent (in category 'tokenizing') ----- nextCDataContent | cdata | "Skip $[ " self next. + cdata := self nextUpToAll: ']]>'. - cdata _ self nextUpToAll: ']]>'. self handleCData: cdata ! Item was added: + ----- Method: XMLWriter>>indentString: (in category 'accessing') ----- + indentString: aString + currentIndent := 0. + indentString := aString! Item was added: + ----- Method: XMLElement>>elements (in category 'accessing') ----- + elements + ^self elementsAndContents select: [:each | each isText not]! Item was changed: ----- Method: XMLElement>>contentsDo: (in category 'enumerating') ----- contentsDo: aBlock + self elementsAndContentsDo: [:each | each isText ifTrue: [aBlock value: each]]! - contents - ifNotNil: [ - self contents do: [:each | aBlock value: each]]! Item was changed: ----- Method: XMLTokenizer>>nextTag (in category 'tokenizing') ----- nextTag | tagName attributes nextChar namespaces | (self peek = $/) ifTrue: [^self nextEndTag]. + tagName := self nextName. - tagName _ self nextName. self skipSeparators. + attributes := Dictionary new: 33. + namespaces := Dictionary new: 5. + [(nextChar := self peek) == $> or: [nextChar == $/]] whileFalse: [ - attributes _ Dictionary new: 33. - namespaces _ Dictionary new: 5. - [(nextChar _ self peek) == $> or: [nextChar == $/]] whileFalse: [ self checkAndExpandReference: #content. self nextAttributeInto: attributes namespaces: namespaces. self skipSeparators.]. self handleStartTag: tagName attributes: attributes namespaces: namespaces. self next == $/ ifTrue: [ self handleEndTag: tagName. self next]. ! Item was changed: ----- Method: SAXHandler class>>parseDocumentFromFileNamed:readIntoMemory: (in category 'instance creation') ----- parseDocumentFromFileNamed: fileName readIntoMemory: readIntoMemory | stream xmlDoc | + stream := FileDirectory default readOnlyFileNamed: fileName. - stream _ FileDirectory default readOnlyFileNamed: fileName. readIntoMemory + ifTrue: [stream := stream contentsOfEntireFile readStream]. + [xmlDoc := self parseDocumentFrom: stream] - ifTrue: [stream _ stream contentsOfEntireFile readStream]. - [xmlDoc _ self parseDocumentFrom: stream] ensure: [stream close]. ^xmlDoc! Item was added: + ClassTestCase subclass: #XMLParserTest + instanceVariableNames: '' + classVariableNames: '' + poolDictionaries: '' + category: 'XML-Parser'! Item was added: + ----- Method: XMLWriter>>writeIndent (in category 'private') ----- + writeIndent + currentIndent ifNotNil: [ + currentIndent timesRepeat: [self stream nextPutAll: indentString]]! Item was changed: ----- Method: XMLTokenizer>>nextNode (in category 'tokenizing') ----- nextNode | nextChar | "Skip < " self next. + nextChar := self peek. - nextChar _ self peek. nextChar == $!! ifTrue: [ "Skip !!" self next. + nextChar := self peek. - nextChar _ self peek. nextChar == $- ifTrue: [^self nextComment]. nextChar == $[ ifTrue: [^self nextCDataOrConditional]. ^self parsingMarkup ifTrue: [self nextMarkupDeclaration] ifFalse: [self nextDocType]]. nextChar == $? ifTrue: [^self nextPI]. ^self nextTag! Item was changed: ----- Method: XMLTokenizer class>>initialize (in category 'class initialization') ----- initialize "XMLTokenizer initialize" + CharEscapes := CharacterSet newFrom: #( $& $" $' $> $< ). - | nameDelimiters | - - CharEscapes _ #( $& $" $' $> $< ) asSet. - - SeparatorTable _ Array new: 256. - SeparatorTable atAllPut: true. - #(9 10 12 13 32) do: [:each | SeparatorTable at: each+1 put: false]. + SeparatorTable := CharacterSet new. + #(9 10 12 13 32) do: [:each | SeparatorTable add: each asCharacter]. - LiteralChars _ Array new: 256. - LiteralChars atAllPut: false. - ':-_.' do: [:each | LiteralChars at: each asciiValue put: true]. - 1 to: 256 do: [:i | ((i-1) asCharacter isDigit or: [(i-1) asCharacter isLetter]) - ifTrue: [LiteralChars at: i put: true]]. + LiteralChars := CharacterSet newFrom: #( $: $- $: $= $.). + 0 to: 255 do: [:i | + | char | + char := i asCharacter. + (char isDigit or: [char isLetter]) + ifTrue: [LiteralChars add: char]]. + + NameDelimiters := CharacterSet new. + #(9 10 12 13 32 61 "$= asInteger 61" 62 "$> asInteger" 47 "$/ asInteger") + do: [:each | NameDelimiters add: each asCharacter]. + + DigitTable := Array new: 256. - nameDelimiters _ #(9 10 12 13 32 61 "$= asInteger 61" 62 "$> asInteger" 47 "$/ asInteger"). - NameDelimiters _ Array new: 256. - NameDelimiters atAllPut: false. - nameDelimiters do: [:each | NameDelimiters at: each put: true]. - - DigitTable _ Array new: 256. DigitTable atAllPut: -1. ($0 to: $9) do: [:each | DigitTable at: each asciiValue put: each digitValue]. + ($a to: $f) do: [:each | DigitTable at: each asciiValue put: each digitValue]. ($A to: $F) do: [:each | DigitTable at: each asciiValue put: each digitValue]. ! Item was changed: ----- Method: XMLNodeWithElements>>namespace:uri: (in category 'name space') ----- namespace: ns uri: u + namespace := ns. + uri := u! - namespace _ ns. - uri _ u! Item was changed: ----- Method: XMLWriter>>endTag: (in category 'writing xml') ----- endTag: tagName + self outdent. self endTag: tagName xmlns: nil! Item was changed: ----- Method: XMLTokenizer>>nextEntityValue (in category 'tokenizing') ----- nextEntityValue | delimiterChar entityValueStream nextChar nextPeek referenceString entity entityValue | + delimiterChar := self next. - delimiterChar _ self next. (delimiterChar == $" or: [delimiterChar == $']) ifFalse: [self errorExpected: 'Entity value delimiter expected.']. + entityValueStream := WriteStream on: (String new). - entityValueStream _ WriteStream on: (String new). [ + nextPeek := nextChar := self peek. - nextPeek _ nextChar _ self peek. nextChar ifNil: [self errorExpected: 'Character expected.']. nextChar == $& ifTrue: [ self next. self peek == $# ifTrue: [ + nextPeek := nil. + nextChar := self nextCharReference] - nextPeek _ nil. - nextChar _ self nextCharReference] ifFalse: [ + referenceString := self nextLiteral. - referenceString _ self nextLiteral. self next == $; ifFalse: [self errorExpected: ';']. + entity := self entity: referenceString. + entityValue := entity valueForContext: #entityValue. - entity _ self entity: referenceString. - entityValue _ entity valueForContext: #entityValue. self pushStream: (ReadStream on: entityValue asString). + nextPeek := nextChar := self next]] - nextPeek _ nextChar _ self next]] ifFalse: [ nextChar == $% ifTrue: [ self skipSeparators. + referenceString := self nextLiteral. + nextChar := self handleEntity: referenceString in: #entityValue. + nextPeek := nextChar := self next] - referenceString _ self nextLiteral. - nextChar _ self handleEntity: referenceString in: #entityValue. - nextPeek _ nextChar _ self next] ifFalse: [self next]]. nextPeek == delimiterChar] whileFalse: [ nextChar ifNotNil: [entityValueStream nextPut: nextChar]]. ^entityValueStream contents! Item was changed: ----- Method: XMLTokenizer>>nestedStreams (in category 'private') ----- nestedStreams + nestedStreams ifNil: [nestedStreams := OrderedCollection new]. - nestedStreams ifNil: [nestedStreams _ OrderedCollection new]. ^nestedStreams! Item was changed: ----- Method: XMLDOMParser>>incremental: (in category 'accessing') ----- incremental: aBoolean + incremental := aBoolean! - incremental _ aBoolean! Item was changed: ----- Method: XMLTokenizer>>endParsingMarkup (in category 'private') ----- endParsingMarkup + parsingMarkup := false! - parsingMarkup _ false! Item was changed: ----- Method: XMLTokenizer>>popNestingLevel (in category 'streaming') ----- popNestingLevel self hasNestedStreams ifTrue: [ self stream close. self stream: self nestedStreams removeLast. self nestedStreams size > 0 + ifFalse: [nestedStreams := nil]]! - ifFalse: [nestedStreams _ nil]]! Item was changed: ----- Method: XMLTokenizer>>nextLiteral (in category 'tokenizing') ----- nextLiteral | resultStream nextChar resultString | + resultStream := (String new: 10) writeStream. + ((nextChar := self peek) isLetter - resultStream _ (String new: 10) writeStream. - ((nextChar _ self peek) isLetter or: [nextChar == $_]) ifFalse: [self errorExpected: 'Name literal.']. + [nextChar := self peek. + (LiteralChars includes: nextChar) - [nextChar _ self peek. - (LiteralChars at: nextChar asciiValue+1) ifTrue: [ nextChar == $& ifTrue: [ + nextChar := self next. - nextChar _ self next. resultStream nextPut: (self peek == $# ifTrue: [self nextCharReference] ifFalse: [^resultStream contents])] ifFalse: [ resultStream nextPut: self next]] + ifFalse: [resultString := resultStream contents. - ifFalse: [resultString _ resultStream contents. resultString isEmpty ifTrue: [self errorExpected: 'Name literal'] ifFalse: [^resultString]]] repeat! Item was added: + ----- Method: XMLNode>>elementsAndContentsDo: (in category 'enumerating') ----- + elementsAndContentsDo: aBlock + self elementsDo: aBlock! Item was changed: ----- Method: XMLWriter>>xmlDeclaration:encoding: (in category 'writing xml') ----- xmlDeclaration: versionString encoding: encodingString self canonical ifFalse: [ self startPI: 'xml'; attribute: 'version' value: versionString; attribute: 'encoding' value: encodingString; endPI. self stream flush]! Item was changed: ----- Method: XMLTokenizer>>nextDocTypeDecl (in category 'tokenizing dtd') ----- nextDocTypeDecl | nextChar | self skipSeparators. self nextLiteral. self skipSeparators. self peek == $[ + ifFalse: [[nextChar := self peek. - ifFalse: [[nextChar _ self peek. nextChar == $> or: [nextChar == $[ ]] whileFalse: [self next]]. self peek == $[ ifTrue: [ self next. [self skipSeparators. self peek == $]] whileFalse: [ self checkAndExpandReference: #dtd. self nextNode]. self next == $] ifFalse: [self errorExpected: ']' ]]. self skipSeparators. self next == $> ifFalse: [self errorExpected: '>' ]. self endParsingMarkup! Item was changed: ----- Method: XMLNodeWithElements>>elementAt:ifAbsent: (in category 'accessing') ----- elementAt: entityName ifAbsent: aBlock + elementsAndContents - elements ifNil: [^aBlock value]. + ^self elements detect: [:each | each isProcessingInstruction not and: [each name = entityName or: [each localName = entityName]]] ifNone: [^aBlock value]! - ^self elements detect: [:each | each name = entityName or: [each localName = entityName]] ifNone: [^aBlock value]! Item was added: + ----- Method: XMLWriter>>indentTab (in category 'accessing') ----- + indentTab + self indentString: (String with: Character tab)! Item was changed: ----- Method: SAXDriver>>handleEndTag: (in category 'handling tokens') ----- handleEndTag: elementName | namespace localName namespaceURI qualifiedName | self usesNamespaces ifTrue: [ self splitName: elementName into: [:ns :ln | + namespace := ns. + localName := ln]. - namespace _ ns. - localName _ ln]. "ensure our namespace is defined" namespace ifNil: [ namespace := self scope defaultNamespace. qualifiedName := namespace , ':' , elementName] ifNotNil: [ namespaceURI := self scope namespaceURIOf: namespace. namespaceURI ifNil: [self parseError: 'Start tag ' , elementName , ' refers to undefined namespace ' , namespace asString]. qualifiedName := elementName]. "call the handler" self saxHandler checkEOD; endElement: localName namespace: namespace namespaceURI: namespaceURI qualifiedName: qualifiedName. self scope leaveScope] ifFalse: [ "call the handler" self saxHandler checkEOD; endElement: elementName namespace: nil namespaceURI: nil qualifiedName: elementName]! Item was changed: ----- Method: XMLNodeWithElements>>elementsDo: (in category 'enumerating') ----- elementsDo: aBlock + self elements do: [:each | aBlock value: each]! - elements - ifNotNil: [ - self elements do: [:each | aBlock value: each]]! Item was removed: - ----- Method: SAXHandler class>>new (in category 'instance creation') ----- - new - ^super new initialize! Item was removed: - ----- Method: XMLTokenizer class>>new (in category 'instance creation') ----- - new - ^super new initialize! |
Free forum by Nabble | Edit this page |