Fix headlines
Материал из Tkabber Wiki
Код в Config.tcl для перевода в простой текст хедлайнов, идущих с сайта 4pda.com:
### Helper code to extract plain text from HTML.
### requires htmlparse package (a part of Tcllib).
package require htmlparse
namespace eval htparse {
variable id 0
}
proc htparse::to_text {html} {
variable id
set token [namespace current]::htparse$id
incr id
upvar #0 $token state
set state(text) ""
set state(linkno) 1
set state(links) [list]
htmlparse::parse -cmd [list [namespace current]::Step $token] \
[htmlparse::mapEscapes $html]
set out $state(text)
set n 1
foreach link $state(links) {
append out \n\[ $n {] } $link
incr n
}
unset state
return $out
}
proc htparse::Step {token tag slash param cdata} {
upvar #0 $token state
if {[IsBreak $tag $slash]} {
append state(text) \n
} elseif {[NeedSpace $tag $slash]} {
append state(text) " "
}
set text [string trim $cdata]
if {$text ne ""} {
append state(text) $text
}
foreach {name attr} {a href img src} {
if {$tag eq $name} {
if {[ExtractAttr $param $attr link]} {
lappend state(links) $link
append state(text) { [} $state(linkno) {]}
incr state(linkno)
}
}
}
}
proc htparse::ExtractAttr {param attr valueVar} {
upvar 1 $valueVar value
foreach pair [split $param] {
foreach {a v} [split $pair =] {
if {$a eq $attr} {
set value $v
return 1
}
}
}
return 0
}
proc htparse::IsBreak {tag slash} {
expr {$slash ne ""
&& ($tag eq "p" || $tag eq "div" || $tag eq "br")}
}
proc htparse::NeedSpace {tag slash} {
expr {$tag eq "img" || ($tag eq "a" && $slash eq "")}
}
### rewrite_message_hook code to locate headlines coming
### from 4pda.com and rewrite their bodies as plain text
proc fix_headline {vconnid vfrom vid vtype vis_subject vsubject \
vbody verr vthread vpriority vx} {
upvar 2 $vtype type $vbody body $vx x
if {$type ne "headline"} return
if {[string match *4pda* [headline_url $x]]} {
set body [htparse::to_text $body]
}
}
proc headline_url {x} {
foreach extra $x {
jlib::wrapper:splitxml $extra tag vars isempty chdata children
if {[jlib::wrapper:getattr $vars xmlns] eq "jabber:x:oob"} {
foreach item $children {
jlib::wrapper:splitxml $item tag vars isempty chdata children
if {$tag eq "url"} {
return $url
}
}
}
}
return ""
}
hook rewrite_message_hook fix_headline