Marcel Taeumel uploaded a new version of Collections to project The Trunk:
http://source.squeak.org/trunk/Collections-mt.631.mcz ==================== Summary ==================== Name: Collections-mt.631 Author: mt Time: 3 May 2015, 10:38:38.118 pm UUID: 0b21fbfc-3cd6-8f45-bf1a-d7e3c5e5c107 Ancestors: Collections-mt.630 HtmlReadWriter: support for comments added, list of ignored tags added =============== Diff against Collections-mt.630 =============== Item was added: + ----- Method: HtmlReadWriter>>ignoredTags (in category 'accessing') ----- + ignoredTags + "Because we cannot process all of them." + + ^ #(body script table tr td ul ol li form select option input)! Item was added: + ----- Method: HtmlReadWriter>>isTagIgnored: (in category 'testing') ----- + isTagIgnored: aTag + + | space t | + space := aTag indexOf: Character space. + t := space > 0 + ifTrue: [aTag copyFrom: 2 to: space - 1] + ifFalse: [aTag copyFrom: 2 to: aTag size - 1]. + ^ self ignoredTags includes: t! Item was changed: ----- Method: HtmlReadWriter>>mapATag: (in category 'mapping') ----- mapATag: aTag | result startIndex stopIndex attribute | result := OrderedCollection new. + + Transcript showln: aTag. "<a href=""http://google.de"">" attribute := 'href'. startIndex := aTag findString: attribute. startIndex > 0 ifTrue: [ + startIndex := aTag findString: '=' startingAt: startIndex+attribute size. + stopIndex := aTag findString: ' ' startingAt: startIndex+1. + stopIndex = 0 ifTrue: [ + stopIndex := aTag findString: '>' startingAt: startIndex+1]. - startIndex := aTag findString: '"' startingAt: startIndex+attribute size. - stopIndex := aTag findString: '"' startingAt: startIndex+1. - result add: (TextURL new url: (aTag copyFrom: startIndex+1 to: stopIndex-1))]. + (aTag at: startIndex + 1) = $" + ifTrue: [startIndex := startIndex + 1]. + (aTag at: stopIndex - 1) = $" + ifTrue: [stopIndex := stopIndex - 1]. + result add: (TextURL new url: (aTag copyFrom: startIndex+1 to: stopIndex-1))]. + ^ result! Item was changed: ----- Method: HtmlReadWriter>>mapTagToAttribute: (in category 'mapping') ----- mapTagToAttribute: aTag aTag = '<b>' ifTrue: [^ {TextEmphasis bold}]. aTag = '<i>' ifTrue: [^ {TextEmphasis italic}]. aTag = '<u>' ifTrue: [^ {TextEmphasis underlined}]. "aTag = '<code>' ifTrue: [^ {TextFontReference toFont: Preferences standardCodeFont}]." (aTag beginsWith: '<font') ifTrue: [^ self mapFontTag: aTag]. (aTag beginsWith: '<a') ifTrue: [^ self mapATag: aTag]. + "h1, h2, h3, ..." + (aTag second = $h and: [aTag third isDigit]) + ifTrue: [^ {TextEmphasis bold}]. + ^ {}! Item was changed: ----- Method: HtmlReadWriter>>nextText (in category 'accessing') ----- nextText count := 0. offset := 0. "To ignore characters in the input string that are used by tags." runStack := Stack new. runArray := RunArray new. string := OrderedCollection new. "{text attributes. start index. end index. number of open tags}" runStack push: {OrderedCollection new. 1. nil. 0}. [stream atEnd] whileFalse: [self processNextTag]. self processRunStackTop. "Add last run." string := String withAll: string. + runArray coalesce. ^ Text string: string runs: runArray! Item was added: + ----- Method: HtmlReadWriter>>processComment: (in category 'reading') ----- + processComment: aComment + ! Item was added: + ----- Method: HtmlReadWriter>>processEmptyTag: (in category 'reading') ----- + processEmptyTag: aTag + + (aTag beginsWith: '<br') ifTrue: [ + string add: Character cr. + count := count + 1. + ^ self]. + + (self ignoredTags includes: (aTag copyFrom: 2 to: aTag size - 3)) + ifTrue: [^ self]. + + "TODO..."! Item was changed: ----- Method: HtmlReadWriter>>processEndTag: (in category 'reading') ----- processEndTag: aTag | index | index := count - offset. + (self ignoredTags includes: (aTag copyFrom: 3 to: aTag size -1)) + ifTrue: [^ self]. + - aTag = '</br>' ifTrue: [ - string add: Character cr. - count := count + 1. - ^ self]. - "De-Accumulate adjacent tags." runStack top at: 4 put: runStack top fourth - 1. runStack top fourth > 0 ifTrue: [^ self "not yet"]. self processRunStackTop. runStack pop. runStack top at: 2 put: index + 1.! Item was changed: ----- Method: HtmlReadWriter>>processNextTag (in category 'reading') ----- processNextTag + | tag htmlEscape lookForNewTag lookForHtmlEscape tagFound valid inComment | - | tag htmlEscape lookForNewTag lookForHtmlEscape tagFound valid | lookForNewTag := true. lookForHtmlEscape := false. tagFound := false. tag := OrderedCollection new. htmlEscape := OrderedCollection new. + inComment := false. [stream atEnd not and: [tagFound not]] whileTrue: [ | character | character := stream next. valid := (#(10 13) includes: character asciiValue) not. count := count + 1. character = $< ifTrue: [lookForNewTag := false]. + character = $& ifTrue: [ + inComment ifFalse: [lookForHtmlEscape := true]]. - character = $& ifTrue: [lookForHtmlEscape := true]. lookForNewTag ifTrue: [ lookForHtmlEscape ifFalse: [valid ifTrue: [string add: character] ifFalse: [offset := offset + 1]] ifTrue: [valid ifTrue: [htmlEscape add: character]. offset := offset + 1]] ifFalse: [valid ifTrue: [tag add: character]. offset := offset + 1]. + inComment := ((lookForNewTag not and: [tag size >= 4]) + and: [tag beginsWith: '<!!--']) + and: [(tag endsWith: '-->') not]. + + ((character = $> and: [inComment not]) and: [lookForNewTag not]) ifTrue: [ - character = $> ifTrue: [ lookForNewTag := true. + (tag beginsWith: '<!!--') + ifTrue: [self processComment: (String withAll: tag)] + ifFalse: [tag second ~= $/ + ifTrue: [ + (tag atLast: 2) == $/ + ifTrue: [self processEmptyTag: (String withAll: tag)] + ifFalse: [self processStartTag: (String withAll: tag)]] + ifFalse: [self processEndTag: (String withAll: tag)]]. - "Full tag like <b> or </b> found." - tag second ~= $/ - ifTrue: [self processStartTag: (String withAll: tag)] - ifFalse: [self processEndTag: (String withAll: tag)]. tagFound := true]. + (((character = $; and: [lookForNewTag]) + and: [htmlEscape notEmpty]) and: [htmlEscape first = $&]) ifTrue: [ + lookForHtmlEscape := false. + self processHtmlEscape: (String withAll: htmlEscape). + htmlEscape := OrderedCollection new]]. - character = $; ifTrue: [ - lookForHtmlEscape := false. - self processHtmlEscape: (String withAll: htmlEscape). - htmlEscape := OrderedCollection new]]. ! Item was changed: ----- Method: HtmlReadWriter>>processStartTag: (in category 'reading') ----- processStartTag: aTag | index | + (self isTagIgnored: aTag) ifTrue: [^ self]. + index := count - offset. + - aTag = '<br>' ifTrue: [ string add: Character cr. count := count + 1. ^ self]. + (aTag beginsWith: '<img') ifTrue: [ + string addAll: '[image]'. + count := count + 7. + ^ self]. "Accumulate adjacent tags." (runStack size > 1 and: [runStack top second = (index + 1) "= adjacent start tags"]) ifTrue: [ runStack top at: 1 put: (runStack top first copy addAll: (self mapTagToAttribute: aTag); yourself). runStack top at: 4 put: (runStack top fourth + 1). "increase number of open tags" ^self]. self processRunStackTop. "Remove start/end info to reuse attributes later." runStack top at: 2 put: nil. runStack top at: 3 put: nil. "Copy attr list and add new attr." runStack push: ({runStack top first copy addAll: (self mapTagToAttribute: aTag); yourself. index + 1. nil. 1}).! |
Free forum by Nabble | Edit this page |