Did some significant reworking of the Rss and Atom internals. The Feed object now supports passing in 2 funciton handlers which notify your app of any new Channels or Items. These functions are called whenever the feed is updated and new channels or items are found which previously did not exist. See src/feed_test.go for examples. This change also made it prudent to change some of the struct definitions. In particular the fields referencing other structs. They have been changed from Value to Pointer definitions. So beware that this may break some of your existing code. At the very least, the feeder.New() function now takes 2 extra parameters.

This commit is contained in:
jim teeuwen 2010-12-18 00:25:16 +01:00
parent eb15b6a3ef
commit a8057b0c92
7 changed files with 223 additions and 107 deletions

7
README
View File

@ -22,6 +22,13 @@
tried to create as many shared fields as possible but some of them simply do
not occur in either the RSS or Atom spec.
The Feed object supports notifications of new channels and items.
This is achieved by passing 2 function handlers to the feeder.New() function.
They will be called whenever a feed is updated from the remote source and
either a new channel or a new item is found that previously did not exist.
This allows you to easily monitor a feed for changes. See src/feed_test.go for
an example of how this works.
================================================================================
LICENSE
================================================================================

View File

@ -6,14 +6,42 @@ import "xmlx"
func (this *Feed) readAtom(doc *xmlx.Document) (err os.Error) {
ns := "http://www.w3.org/2005/Atom"
channels := doc.SelectNodes(ns, "feed")
getChan := func(id string) *Channel {
for _, c := range this.Channels {
if c.Id == id {
return c
}
}
return nil
}
haveItem := func(ch *Channel, id string) bool {
for _, item := range ch.Items {
if item.Id == id {
return true
}
}
return false
}
var ch *Channel
var i *Item
var tn *xmlx.Node
var list []*xmlx.Node
for _, node := range channels {
ch := Channel{}
if ch = getChan(node.GetValue("", "pubDate")); ch == nil {
ch = new(Channel)
this.Channels = append(this.Channels, ch)
}
ch.Title = node.GetValue(ns, "title")
ch.LastBuildDate = node.GetValue(ns, "updated")
ch.Id = node.GetValue(ns, "id")
ch.Rights = node.GetValue(ns, "rights")
list := node.SelectNodes(ns, "link")
list = node.SelectNodes(ns, "link")
ch.Links = make([]Link, len(list))
for i, v := range list {
ch.Links[i].Href = v.GetAttr("", "href")
@ -22,8 +50,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err os.Error) {
ch.Links[i].HrefLang = v.GetAttr("", "hreflang")
}
tn := node.SelectNode(ns, "subtitle")
if tn != nil {
if tn = node.SelectNode(ns, "subtitle"); tn != nil {
ch.SubTitle = SubTitle{}
ch.SubTitle.Type = tn.GetAttr("", "type")
ch.SubTitle.Text = tn.Value
@ -43,50 +70,56 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err os.Error) {
ch.Author.Email = tn.GetValue("", "email")
}
itemcount := len(ch.Items)
list = node.SelectNodes(ns, "entry")
ch.Items = make([]Item, len(list))
for _, v := range list {
item := Item{}
item.Title = v.GetValue(ns, "title")
item.Id = v.GetValue(ns, "id")
item.PubDate = v.GetValue(ns, "updated")
item.Description = v.GetValue(ns, "summary")
list = v.SelectNodes(ns, "link")
item.Links = make([]Link, 0)
for _, lv := range list {
for _, item := range list {
if haveItem(ch, item.GetValue("", "id")) {
continue
}
i = new(Item)
i.Title = item.GetValue(ns, "title")
i.Id = item.GetValue(ns, "id")
i.PubDate = item.GetValue(ns, "updated")
i.Description = item.GetValue(ns, "summary")
links := item.SelectNodes(ns, "link")
for _, lv := range links {
if tn.GetAttr(ns, "rel") == "enclosure" {
enc := Enclosure{}
enc := new(Enclosure)
enc.Url = lv.GetAttr("", "href")
enc.Type = lv.GetAttr("", "type")
item.Enclosures = append(item.Enclosures, enc)
i.Enclosures = append(i.Enclosures, enc)
} else {
lnk := Link{}
lnk := new(Link)
lnk.Href = lv.GetAttr("", "href")
lnk.Rel = lv.GetAttr("", "rel")
lnk.Type = lv.GetAttr("", "type")
lnk.HrefLang = lv.GetAttr("", "hreflang")
item.Links = append(item.Links, lnk)
i.Links = append(i.Links, lnk)
}
}
list = v.SelectNodes(ns, "contributor")
item.Contributors = make([]string, len(list))
for ci, cv := range list {
item.Contributors[ci] = cv.GetValue("", "name")
list = item.SelectNodes(ns, "contributor")
for _, cv := range list {
i.Contributors = append(i.Contributors, cv.GetValue("", "name"))
}
if tn = v.SelectNode(ns, "content"); tn != nil {
item.Content = Content{}
item.Content.Type = tn.GetAttr("", "type")
item.Content.Lang = tn.GetValue("xml", "lang")
item.Content.Base = tn.GetValue("xml", "base")
item.Content.Text = tn.Value
}
ch.Items = append(ch.Items, item)
if tn = item.SelectNode(ns, "content"); tn != nil {
i.Content = new(Content)
i.Content.Type = tn.GetAttr("", "type")
i.Content.Lang = tn.GetValue("xml", "lang")
i.Content.Base = tn.GetValue("xml", "base")
i.Content.Text = tn.Value
}
this.Channels = append(this.Channels, ch)
ch.Items = append(ch.Items, i)
}
if itemcount != len(ch.Items) && this.itemhandler != nil {
this.itemhandler(this, ch, ch.Items[itemcount:])
}
}
return
}

View File

@ -11,14 +11,14 @@ type Channel struct {
PubDate string
LastBuildDate string
Docs string
Categories []Category
Categories []*Category
Generator Generator
TTL int
Rating string
SkipHours []int
SkipDays []int
Image Image
Items []Item
Items []*Item
Cloud Cloud
TextInput Input

View File

@ -32,6 +32,9 @@ import "fmt"
import "strconv"
import "strings"
type ChannelHandler func(f *Feed, newchannels []*Channel)
type ItemHandler func(f *Feed, ch *Channel, newitems []*Item)
type Feed struct {
// Custom cache timeout in minutes.
CacheTimeout int
@ -47,21 +50,31 @@ type Feed struct {
Version [2]int
// Channels with content.
Channels []Channel
Channels []*Channel
// Url from which this feed was created.
Url string
// A notification function, used to notify the host when a new channel
// has been found.
chanhandler ChannelHandler
// A notification function, used to notify the host when a new item
// has been found for a given channel.
itemhandler ItemHandler
// Last time content was fetched. Used in conjunction with CacheTimeout
// to ensure we don't get content too often.
lastupdate int64
}
func New(cachetimeout int, enforcecachelimit bool) *Feed {
func New(cachetimeout int, enforcecachelimit bool, ch ChannelHandler, ih ItemHandler) *Feed {
v := new(Feed)
v.CacheTimeout = cachetimeout
v.EnforceCacheLimit = enforcecachelimit
v.Type = "none"
v.chanhandler = ch
v.itemhandler = ih
return v
}
@ -71,7 +84,6 @@ func (this *Feed) Fetch(uri string) (err os.Error) {
}
this.Url = uri
this.Channels = nil
// Extract type and version of the feed so we can have the appropriate
// function parse it (rss 0.91, rss 0.92, rss 2, atom etc).
@ -86,14 +98,21 @@ func (this *Feed) Fetch(uri string) (err os.Error) {
return
}
chancount := len(this.Channels)
if err = this.buildFeed(doc); err != nil || len(this.Channels) == 0 {
return
}
// Notify host of new channels
if chancount != len(this.Channels) && this.chanhandler != nil {
this.chanhandler(this, this.Channels[chancount:])
}
// reset cache timeout values according to feed specified values (TTL)
if this.EnforceCacheLimit && this.CacheTimeout < this.Channels[0].TTL {
this.CacheTimeout = this.Channels[0].TTL
}
return
}

View File

@ -15,10 +15,31 @@ func TestFeed(t *testing.T) {
var err os.Error
for _, uri := range urilist {
feed = New(5, true)
feed = New(5, true, chanHandler, itemHandler)
if err = feed.Fetch(uri); err != nil {
t.Errorf("%s >>> %s", uri, err)
}
}
/*
Output of handlers:
6 new item(s) in WriteTheWeb of http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml
1 new channel(s) in http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml
21 new item(s) in Dave Winer: Grateful Dead of http://cyber.law.harvard.edu/rss/examples/sampleRss092.xml
1 new channel(s) in http://cyber.law.harvard.edu/rss/examples/sampleRss092.xml
4 new item(s) in Liftoff News of http://cyber.law.harvard.edu/rss/examples/rss2sample.xml
1 new channel(s) in http://cyber.law.harvard.edu/rss/examples/rss2sample.xml
15 new item(s) in Blog@Case of http://blog.case.edu/news/feed.atom
1 new channel(s) in http://blog.case.edu/news/feed.atom
*/
}
func chanHandler(feed *Feed, newchannels []*Channel) {
//println(len(newchannels), "new channel(s) in", feed.Url)
}
func itemHandler(feed *Feed, ch *Channel, newitems []*Item) {
//println(len(newitems), "new item(s) in", ch.Title, "of", feed.Url)
}

View File

@ -3,19 +3,19 @@ package feeder
type Item struct {
// RSS and Shared fields
Title string
Links []Link
Links []*Link
Description string
Author Author
Categories []Category
Categories []*Category
Comments string
Enclosures []Enclosure
Enclosures []*Enclosure
Guid string
PubDate string
Source Source
Source *Source
// Atom specific fields
Id string
Generator Generator
Generator *Generator
Contributors []string
Content Content
Content *Content
}

View File

@ -4,13 +4,60 @@ import "os"
import "xmlx"
func (this *Feed) readRss2(doc *xmlx.Document) (err os.Error) {
days := make(map[string]int)
days["Monday"] = 1
days["Tuesday"] = 2
days["Wednesday"] = 3
days["Thursday"] = 4
days["Friday"] = 5
days["Saturday"] = 6
days["Sunday"] = 7
getChan := func(pubdate string) *Channel {
for _, c := range this.Channels {
if c.PubDate == pubdate {
return c
}
}
return nil
}
haveItem := func(ch *Channel, id, title, desc string) bool {
for _, item := range ch.Items {
switch {
case len(id) > 0:
if item.Id == id {
return true
}
case len(title) > 0:
if item.Title == title {
return true
}
default:
if item.Description == desc {
return true
}
}
}
return false
}
var ch *Channel
var i *Item
var n *xmlx.Node
var list, tl []*xmlx.Node
channels := doc.SelectNodes("", "channel")
for _, node := range channels {
ch := Channel{}
ch.Title = node.GetValue("", "title")
if ch = getChan(node.GetValue("", "pubDate")); ch == nil {
ch = new(Channel)
this.Channels = append(this.Channels, ch)
}
list := node.SelectNodes("", "link")
ch.Title = node.GetValue("", "title")
list = node.SelectNodes("", "link")
ch.Links = make([]Link, len(list))
for i, v := range list {
ch.Links[i].Href = v.Value
}
@ -25,14 +72,14 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err os.Error) {
ch.Docs = node.GetValue("", "docs")
list = node.SelectNodes("", "category")
ch.Categories = make([]Category, len(list))
ch.Categories = make([]*Category, len(list))
for i, v := range list {
ch.Categories[i] = new(Category)
ch.Categories[i].Domain = v.GetAttr("", "domain")
ch.Categories[i].Text = v.Value
}
n := node.SelectNode("", "generator")
if n != nil {
if n = node.SelectNode("", "generator"); n != nil {
ch.Generator = Generator{}
ch.Generator.Text = n.Value
}
@ -49,7 +96,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err os.Error) {
list = node.SelectNodes("", "days")
ch.SkipDays = make([]int, len(list))
for i, v := range list {
ch.SkipDays[i] = mapDay(v.Value)
ch.SkipDays[i] = days[v.Value]
}
if n = node.SelectNode("", "image"); n != nil {
@ -78,16 +125,22 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err os.Error) {
ch.TextInput.Link = n.GetValue("", "link")
}
itemcount := len(ch.Items)
list = node.SelectNodes("", "item")
for _, item := range list {
i := Item{}
if haveItem(ch, item.GetValue("", "pubDate"),
item.GetValue("", "title"), item.GetValue("", "description")) {
continue
}
i = new(Item)
i.Title = item.GetValue("", "title")
i.Description = item.GetValue("", "description")
list = node.SelectNodes("", "link")
i.Links = make([]Link, 0)
for _, v := range list {
lnk := Link{}
tl = node.SelectNodes("", "link")
for _, v := range tl {
lnk := new(Link)
lnk.Href = v.Value
i.Links = append(i.Links, lnk)
}
@ -101,24 +154,25 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err os.Error) {
i.Guid = item.GetValue("", "guid")
i.PubDate = item.GetValue("", "pubDate")
list := item.SelectNodes("", "category")
i.Categories = make([]Category, len(list))
for li, lv := range list {
i.Categories[li].Domain = lv.GetAttr("", "domain")
i.Categories[li].Text = lv.Value
tl = item.SelectNodes("", "category")
for _, lv := range tl {
cat := new(Category)
cat.Domain = lv.GetAttr("", "domain")
cat.Text = lv.Value
i.Categories = append(i.Categories, cat)
}
list = item.SelectNodes("", "enclosure")
i.Enclosures = make([]Enclosure, len(list))
for li, lv := range list {
i.Enclosures[li].Url = lv.GetAttr("", "url")
i.Enclosures[li].Length = lv.GetAttri64("", "length")
i.Enclosures[li].Type = lv.GetAttr("", "type")
tl = item.SelectNodes("", "enclosure")
for _, lv := range tl {
enc := new(Enclosure)
enc.Url = lv.GetAttr("", "url")
enc.Length = lv.GetAttri64("", "length")
enc.Type = lv.GetAttr("", "type")
i.Enclosures = append(i.Enclosures, enc)
}
src := item.SelectNode("", "source")
if src != nil {
i.Source = Source{}
if src := item.SelectNode("", "source"); src != nil {
i.Source = new(Source)
i.Source.Url = src.GetAttr("", "url")
i.Source.Text = src.Value
}
@ -126,27 +180,9 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err os.Error) {
ch.Items = append(ch.Items, i)
}
this.Channels = append(this.Channels, ch)
if itemcount != len(ch.Items) && this.itemhandler != nil {
this.itemhandler(this, ch, ch.Items[itemcount:])
}
}
return
}
func mapDay(day string) int {
switch day {
case "Monday":
return 1
case "Tuesday":
return 2
case "Wednesday":
return 3
case "Thursday":
return 4
case "Friday":
return 5
case "Saturday":
return 6
case "Sunday":
return 7
}
return 1
}