Fix headlines

Материал из Tkabber Wiki

Перейти к: навигация, поиск

Код в Config.tcl для перевода в простой текст хедлайнов, идущих с сайта 4pda.com:

### Helper code to extract plain text from HTML.
### requires htmlparse package (a part of Tcllib).

package require htmlparse

namespace eval htparse {
	variable id 0
}

proc htparse::to_text {html} {
	variable id
	set token [namespace current]::htparse$id
	incr id

	upvar #0 $token state
	set state(text)   ""
	set state(linkno) 1
	set state(links)  [list]

	htmlparse::parse -cmd [list [namespace current]::Step $token] \
		[htmlparse::mapEscapes $html]

	set out $state(text)
	set n 1
	foreach link $state(links) {
		append out \n\[ $n {] } $link
		incr n
	}

	unset state
	return $out
}

proc htparse::Step {token tag slash param cdata} {
	upvar #0 $token state

	if {[IsBreak $tag $slash]} {
		append state(text) \n
	} elseif {[NeedSpace $tag $slash]} {
		append state(text) " "
	}

	set text [string trim $cdata]
	if {$text ne ""} {
		append state(text) $text
	}

	foreach {name attr} {a href img src} {
		if {$tag eq $name} {
			if {[ExtractAttr $param $attr link]} {
				lappend state(links) $link
				append state(text) { [} $state(linkno) {]}
				incr state(linkno)
			}
		}
	}
}

proc htparse::ExtractAttr {param attr valueVar} {
	upvar 1 $valueVar value

	foreach pair [split $param] {
		foreach {a v} [split $pair =] {
			if {$a eq $attr} {
				set value $v
				return 1
			}
		}
	}

	return 0
}

proc htparse::IsBreak {tag slash} {
	expr {$slash ne ""
	   && ($tag eq "p" || $tag eq "div" || $tag eq "br")}
}

proc htparse::NeedSpace {tag slash} {
	expr {$tag eq "img" || ($tag eq "a" && $slash eq "")}
}

### rewrite_message_hook code to locate headlines coming
### from 4pda.com and rewrite their bodies as plain text

proc fix_headline {vconnid vfrom vid vtype vis_subject vsubject \
		vbody verr vthread vpriority vx} {
	upvar 2 $vtype type $vbody body $vx x
	if {$type ne "headline"} return

	if {[string match *4pda* [headline_url $x]]} {
		set body [htparse::to_text $body]
	}
}

proc headline_url {x} {
	foreach extra $x {
		jlib::wrapper:splitxml $extra tag vars isempty chdata children
		if {[jlib::wrapper:getattr $vars xmlns] eq "jabber:x:oob"} {
			foreach item $children {
				jlib::wrapper:splitxml $item tag vars isempty chdata children
				if {$tag eq "url"} {
					return $url
				}
			}
		}
	}

	return ""
}

hook rewrite_message_hook fix_headline
Личные инструменты