Add xmlx.CharsetFunc parameter to Feed.Fetch method. Can be used to supply a custom character encoding conversion routine to the xml decoder. Update test code accordingly.

This commit is contained in:
jim teeuwen 2012-02-29 11:31:01 +01:00
parent 957d827135
commit f51db12ad3
2 changed files with 12 additions and 6 deletions

11
feed.go
View File

@ -84,7 +84,13 @@ func New(cachetimeout int, enforcecachelimit bool, ch ChannelHandler, ih ItemHan
// The value is in seconds.
func (this *Feed) LastUpdate() int64 { return this.lastupdate }
func (this *Feed) Fetch(uri string) (err error) {
// Fetch retrieves the feed's latest content if necessary.
//
// The charset parameter overrides the xml decoder's CharsetReader.
// This allows us to specify a custom character encoding conversion
// routine when dealing with non-utf8 input. Supply 'nil' to use the
// default from Go's xml package.
func (this *Feed) Fetch(uri string, charset xmlx.CharsetFunc) (err error) {
if !this.CanUpdate() {
return
}
@ -94,7 +100,8 @@ func (this *Feed) Fetch(uri string) (err error) {
// Extract type and version of the feed so we can have the appropriate
// function parse it (rss 0.91, rss 0.92, rss 2, atom etc).
doc := xmlx.New()
if err = doc.LoadUri(uri); err != nil {
if err = doc.LoadUri(uri, charset); err != nil {
return
}
this.Type, this.Version = this.GetVersionInfo(doc)

View File

@ -4,9 +4,8 @@ import "testing"
func TestFeed(t *testing.T) {
urilist := []string{
//"http://localhost:8081/craigslist.rss",
//"http://store.steampowered.com/feeds/news.xml", // This feed violates the rss spec.
"http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml",
//"http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml", // Non-utf8 encoding.
"http://store.steampowered.com/feeds/news.xml", // This feed violates the rss spec.
"http://cyber.law.harvard.edu/rss/examples/sampleRss092.xml",
"http://cyber.law.harvard.edu/rss/examples/rss2sample.xml",
"http://blog.case.edu/news/feed.atom",
@ -18,7 +17,7 @@ func TestFeed(t *testing.T) {
for _, uri := range urilist {
feed = New(5, true, chanHandler, itemHandler)
if err = feed.Fetch(uri); err != nil {
if err = feed.Fetch(uri, nil); err != nil {
t.Errorf("%s >>> %s", uri, err)
return
}