commit
269db380a3
49
atom.go
49
atom.go
|
@ -6,32 +6,15 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
|
||||||
ns := "http://www.w3.org/2005/Atom"
|
ns := "http://www.w3.org/2005/Atom"
|
||||||
channels := doc.SelectNodes(ns, "feed")
|
channels := doc.SelectNodes(ns, "feed")
|
||||||
|
|
||||||
getChan := func(id, title string) *Channel {
|
var foundChannels []*Channel
|
||||||
for _, c := range this.Channels {
|
|
||||||
switch {
|
|
||||||
case len(id) > 0:
|
|
||||||
if c.Id == id {
|
|
||||||
return c
|
|
||||||
}
|
|
||||||
case len(title) > 0:
|
|
||||||
if c.Title == title {
|
|
||||||
return c
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var ch *Channel
|
var ch *Channel
|
||||||
var i *Item
|
var i *Item
|
||||||
var tn *xmlx.Node
|
var tn *xmlx.Node
|
||||||
var list []*xmlx.Node
|
var list []*xmlx.Node
|
||||||
|
|
||||||
for _, node := range channels {
|
for _, node := range channels {
|
||||||
if ch = getChan(node.S(ns, "id"), node.S(ns, "title")); ch == nil {
|
ch = new(Channel)
|
||||||
ch = new(Channel)
|
foundChannels = append(foundChannels, ch)
|
||||||
this.Channels = append(this.Channels, ch)
|
|
||||||
}
|
|
||||||
|
|
||||||
ch.Title = node.S(ns, "title")
|
ch.Title = node.S(ns, "title")
|
||||||
ch.LastBuildDate = node.S(ns, "updated")
|
ch.LastBuildDate = node.S(ns, "updated")
|
||||||
|
@ -67,14 +50,9 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
|
||||||
ch.Author.Email = tn.S("", "email")
|
ch.Author.Email = tn.S("", "email")
|
||||||
}
|
}
|
||||||
|
|
||||||
itemcount := len(ch.Items)
|
|
||||||
list = node.SelectNodes(ns, "entry")
|
list = node.SelectNodes(ns, "entry")
|
||||||
|
|
||||||
for _, item := range list {
|
for _, item := range list {
|
||||||
if isItemPresent(ch, item.S(ns, "id"), item.S(ns, "title")) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
i = new(Item)
|
i = new(Item)
|
||||||
i.Title = item.S(ns, "title")
|
i.Title = item.S(ns, "title")
|
||||||
i.Id = item.S(ns, "id")
|
i.Id = item.S(ns, "id")
|
||||||
|
@ -120,26 +98,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
|
||||||
|
|
||||||
ch.Items = append(ch.Items, i)
|
ch.Items = append(ch.Items, i)
|
||||||
}
|
}
|
||||||
|
|
||||||
if itemcount != len(ch.Items) && this.itemhandler != nil {
|
|
||||||
this.itemhandler(this, ch, ch.Items[itemcount:])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
this.Channels = foundChannels
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func isItemPresent(ch *Channel, id, title string) bool {
|
|
||||||
for _, item := range ch.Items {
|
|
||||||
switch {
|
|
||||||
case len(id) > 0:
|
|
||||||
if item.Id == id {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
case len(title) > 0:
|
|
||||||
if item.Title == title {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
|
@ -28,3 +28,12 @@ type Channel struct {
|
||||||
Author Author
|
Author Author
|
||||||
SubTitle SubTitle
|
SubTitle SubTitle
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Channel) Key() string {
|
||||||
|
switch {
|
||||||
|
case len(c.Id) != 0:
|
||||||
|
return c.Id
|
||||||
|
default:
|
||||||
|
return c.Title
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
/*
|
||||||
|
Credits go to github.com/SlyMarbo/rss for inspiring this solution.
|
||||||
|
*/
|
||||||
|
package feeder
|
||||||
|
|
||||||
|
type database struct {
|
||||||
|
request chan string
|
||||||
|
response chan bool
|
||||||
|
known map[string]struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *database) Run() {
|
||||||
|
d.known = make(map[string]struct{})
|
||||||
|
var s string
|
||||||
|
|
||||||
|
for {
|
||||||
|
s = <-d.request
|
||||||
|
if _, ok := d.known[s]; ok {
|
||||||
|
d.response <- true
|
||||||
|
} else {
|
||||||
|
d.response <- false
|
||||||
|
d.known[s] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewDatabase() *database {
|
||||||
|
database := new(database)
|
||||||
|
database.request = make(chan string)
|
||||||
|
database.response = make(chan bool)
|
||||||
|
go database.Run()
|
||||||
|
return database
|
||||||
|
}
|
35
feed.go
35
feed.go
|
@ -58,6 +58,9 @@ type Feed struct {
|
||||||
// Url from which this feed was created.
|
// Url from which this feed was created.
|
||||||
Url string
|
Url string
|
||||||
|
|
||||||
|
// Database containing a list of known Items and Channels for this instance
|
||||||
|
database *database
|
||||||
|
|
||||||
// A notification function, used to notify the host when a new channel
|
// A notification function, used to notify the host when a new channel
|
||||||
// has been found.
|
// has been found.
|
||||||
chanhandler ChannelHandler
|
chanhandler ChannelHandler
|
||||||
|
@ -76,6 +79,7 @@ func New(cachetimeout int, enforcecachelimit bool, ch ChannelHandler, ih ItemHan
|
||||||
v.CacheTimeout = cachetimeout
|
v.CacheTimeout = cachetimeout
|
||||||
v.EnforceCacheLimit = enforcecachelimit
|
v.EnforceCacheLimit = enforcecachelimit
|
||||||
v.Type = "none"
|
v.Type = "none"
|
||||||
|
v.database = NewDatabase()
|
||||||
v.chanhandler = ch
|
v.chanhandler = ch
|
||||||
v.itemhandler = ih
|
v.itemhandler = ih
|
||||||
return v
|
return v
|
||||||
|
@ -150,24 +154,43 @@ func (this *Feed) makeFeed(doc *xmlx.Document) (err error) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
chancount := len(this.Channels)
|
|
||||||
if err = this.buildFeed(doc); err != nil || len(this.Channels) == 0 {
|
if err = this.buildFeed(doc); err != nil || len(this.Channels) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Notify host of new channels
|
|
||||||
if chancount != len(this.Channels) && this.chanhandler != nil {
|
|
||||||
this.chanhandler(this, this.Channels[chancount:])
|
|
||||||
}
|
|
||||||
|
|
||||||
// reset cache timeout values according to feed specified values (TTL)
|
// reset cache timeout values according to feed specified values (TTL)
|
||||||
if this.EnforceCacheLimit && this.CacheTimeout < this.Channels[0].TTL {
|
if this.EnforceCacheLimit && this.CacheTimeout < this.Channels[0].TTL {
|
||||||
this.CacheTimeout = this.Channels[0].TTL
|
this.CacheTimeout = this.Channels[0].TTL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.notifyListeners()
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (this *Feed) notifyListeners() {
|
||||||
|
var newchannels []*Channel
|
||||||
|
for _, channel := range this.Channels {
|
||||||
|
if this.database.request <- channel.Key(); !<-this.database.response {
|
||||||
|
newchannels = append(newchannels, channel)
|
||||||
|
}
|
||||||
|
|
||||||
|
var newitems []*Item
|
||||||
|
for _, item := range channel.Items {
|
||||||
|
if this.database.request <- item.Key(); !<-this.database.response {
|
||||||
|
newitems = append(newitems, item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(newitems) > 0 && this.itemhandler != nil {
|
||||||
|
this.itemhandler(this, channel, newitems)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(newchannels) > 0 && this.chanhandler != nil {
|
||||||
|
this.chanhandler(this, newchannels)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// This function returns true or false, depending on whether the CacheTimeout
|
// This function returns true or false, depending on whether the CacheTimeout
|
||||||
// value has expired or not. Additionally, it will ensure that we adhere to the
|
// value has expired or not. Additionally, it will ensure that we adhere to the
|
||||||
// RSS spec's SkipDays and SkipHours values (if Feed.EnforceCacheLimit is set to
|
// RSS spec's SkipDays and SkipHours values (if Feed.EnforceCacheLimit is set to
|
||||||
|
|
40
feed_test.go
40
feed_test.go
|
@ -7,20 +7,6 @@ import (
|
||||||
|
|
||||||
var items []*Item
|
var items []*Item
|
||||||
|
|
||||||
func Test_NewItem(t *testing.T) {
|
|
||||||
content, _ := ioutil.ReadFile("testdata/initial.atom")
|
|
||||||
feed := New(1, true, chanHandler, itemHandler)
|
|
||||||
err := feed.FetchBytes("http://example.com", content, nil)
|
|
||||||
if err != nil { t.Error(err) }
|
|
||||||
|
|
||||||
content, _ = ioutil.ReadFile("testdata/initial_plus_one_new.atom")
|
|
||||||
feed.FetchBytes("http://example.com", content, nil)
|
|
||||||
expected := "Second title"
|
|
||||||
if expected != items[0].Title {
|
|
||||||
t.Errorf("Expected %s, got %s", expected, items[0].Title)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFeed(t *testing.T) {
|
func TestFeed(t *testing.T) {
|
||||||
urilist := []string{
|
urilist := []string{
|
||||||
//"http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml", // Non-utf8 encoding.
|
//"http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml", // Non-utf8 encoding.
|
||||||
|
@ -43,6 +29,26 @@ func TestFeed(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_NewItem(t *testing.T) {
|
||||||
|
content, _ := ioutil.ReadFile("testdata/initial.atom")
|
||||||
|
feed := New(1, true, chanHandler, itemHandler)
|
||||||
|
err := feed.FetchBytes("http://example.com", content, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
content, _ = ioutil.ReadFile("testdata/initial_plus_one_new.atom")
|
||||||
|
feed.FetchBytes("http://example.com", content, nil)
|
||||||
|
expected := "Second title"
|
||||||
|
if len(items) != 1 {
|
||||||
|
t.Errorf("Expected %s new item, got %s", 1, len(items))
|
||||||
|
}
|
||||||
|
|
||||||
|
if expected != items[0].Title {
|
||||||
|
t.Errorf("Expected %s, got %s", expected, items[0].Title)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func Test_AtomAuthor(t *testing.T) {
|
func Test_AtomAuthor(t *testing.T) {
|
||||||
content, err := ioutil.ReadFile("testdata/idownload.atom")
|
content, err := ioutil.ReadFile("testdata/idownload.atom")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -51,7 +57,7 @@ func Test_AtomAuthor(t *testing.T) {
|
||||||
feed := New(1, true, chanHandler, itemHandler)
|
feed := New(1, true, chanHandler, itemHandler)
|
||||||
err = feed.FetchBytes("http://example.com", content, nil)
|
err = feed.FetchBytes("http://example.com", content, nil)
|
||||||
|
|
||||||
item := items[0]
|
item := feed.Channels[0].Items[0]
|
||||||
expected := "Cody Lee"
|
expected := "Cody Lee"
|
||||||
if item.Author.Name != expected {
|
if item.Author.Name != expected {
|
||||||
t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name)
|
t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name)
|
||||||
|
@ -63,7 +69,7 @@ func Test_RssAuthor(t *testing.T) {
|
||||||
feed := New(1, true, chanHandler, itemHandler)
|
feed := New(1, true, chanHandler, itemHandler)
|
||||||
feed.FetchBytes("http://example.com", content, nil)
|
feed.FetchBytes("http://example.com", content, nil)
|
||||||
|
|
||||||
item := items[0]
|
item := feed.Channels[0].Items[0]
|
||||||
expected := "Cory Doctorow"
|
expected := "Cory Doctorow"
|
||||||
if item.Author.Name != expected {
|
if item.Author.Name != expected {
|
||||||
t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name)
|
t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name)
|
||||||
|
@ -75,7 +81,7 @@ func Test_CData(t *testing.T) {
|
||||||
feed := New(1, true, chanHandler, itemHandler)
|
feed := New(1, true, chanHandler, itemHandler)
|
||||||
feed.FetchBytes("http://example.com", content, nil)
|
feed.FetchBytes("http://example.com", content, nil)
|
||||||
|
|
||||||
item := items[0]
|
item := feed.Channels[0].Items[0]
|
||||||
expected := `<p>abc<div>"def"</div>ghi`
|
expected := `<p>abc<div>"def"</div>ghi`
|
||||||
if item.Description != expected {
|
if item.Description != expected {
|
||||||
t.Errorf("Expected item.Description to be [%s] but item.Description=[%s]", expected, item.Description)
|
t.Errorf("Expected item.Description to be [%s] but item.Description=[%s]", expected, item.Description)
|
||||||
|
|
20
item.go
20
item.go
|
@ -1,5 +1,10 @@
|
||||||
package feeder
|
package feeder
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/md5"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
type Item struct {
|
type Item struct {
|
||||||
// RSS and Shared fields
|
// RSS and Shared fields
|
||||||
Title string
|
Title string
|
||||||
|
@ -19,3 +24,18 @@ type Item struct {
|
||||||
Contributors []string
|
Contributors []string
|
||||||
Content *Content
|
Content *Content
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *Item) Key() string {
|
||||||
|
switch {
|
||||||
|
case i.Guid != nil && len(*i.Guid) != 0:
|
||||||
|
return *i.Guid
|
||||||
|
case len(i.Id) != 0:
|
||||||
|
return i.Id
|
||||||
|
case len(i.Title) > 0 && len(i.PubDate) > 0:
|
||||||
|
return i.Title + i.PubDate
|
||||||
|
default:
|
||||||
|
h := md5.New()
|
||||||
|
io.WriteString(h, i.Description)
|
||||||
|
return string(h.Sum(nil))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
48
rss.go
48
rss.go
|
@ -6,32 +6,18 @@ import (
|
||||||
xmlx "github.com/jteeuwen/go-pkg-xmlx"
|
xmlx "github.com/jteeuwen/go-pkg-xmlx"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var days = map[string]int{
|
||||||
|
"Monday": 1,
|
||||||
|
"Tuesday": 2,
|
||||||
|
"Wednesday": 3,
|
||||||
|
"Thursday": 4,
|
||||||
|
"Friday": 5,
|
||||||
|
"Saturday": 6,
|
||||||
|
"Sunday": 7,
|
||||||
|
}
|
||||||
|
|
||||||
func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
|
func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
|
||||||
days := make(map[string]int)
|
var foundChannels []*Channel
|
||||||
days["Monday"] = 1
|
|
||||||
days["Tuesday"] = 2
|
|
||||||
days["Wednesday"] = 3
|
|
||||||
days["Thursday"] = 4
|
|
||||||
days["Friday"] = 5
|
|
||||||
days["Saturday"] = 6
|
|
||||||
days["Sunday"] = 7
|
|
||||||
|
|
||||||
getChan := func(pubdate, title string) *Channel {
|
|
||||||
for _, c := range this.Channels {
|
|
||||||
switch {
|
|
||||||
case len(pubdate) > 0:
|
|
||||||
if c.PubDate == pubdate {
|
|
||||||
return c
|
|
||||||
}
|
|
||||||
case len(title) > 0:
|
|
||||||
if c.Title == title {
|
|
||||||
return c
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var ch *Channel
|
var ch *Channel
|
||||||
var i *Item
|
var i *Item
|
||||||
var n *xmlx.Node
|
var n *xmlx.Node
|
||||||
|
@ -49,10 +35,8 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
|
||||||
|
|
||||||
channels := root.SelectNodes(ns, "channel")
|
channels := root.SelectNodes(ns, "channel")
|
||||||
for _, node := range channels {
|
for _, node := range channels {
|
||||||
if ch = getChan(node.S(ns, "pubDate"), node.S(ns, "title")); ch == nil {
|
ch = new(Channel)
|
||||||
ch = new(Channel)
|
foundChannels = append(foundChannels, ch)
|
||||||
this.Channels = append(this.Channels, ch)
|
|
||||||
}
|
|
||||||
|
|
||||||
ch.Title = node.S(ns, "title")
|
ch.Title = node.S(ns, "title")
|
||||||
list = node.SelectNodes(ns, "link")
|
list = node.SelectNodes(ns, "link")
|
||||||
|
@ -125,7 +109,6 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
|
||||||
ch.TextInput.Link = n.S(ns, "link")
|
ch.TextInput.Link = n.S(ns, "link")
|
||||||
}
|
}
|
||||||
|
|
||||||
itemcount := len(ch.Items)
|
|
||||||
list = node.SelectNodes(ns, "item")
|
list = node.SelectNodes(ns, "item")
|
||||||
if len(list) == 0 {
|
if len(list) == 0 {
|
||||||
list = doc.SelectNodes(ns, "item")
|
list = doc.SelectNodes(ns, "item")
|
||||||
|
@ -193,10 +176,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
|
||||||
|
|
||||||
ch.Items = append(ch.Items, i)
|
ch.Items = append(ch.Items, i)
|
||||||
}
|
}
|
||||||
|
|
||||||
if itemcount != len(ch.Items) && this.itemhandler != nil {
|
|
||||||
this.itemhandler(this, ch, ch.Items[itemcount:])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
this.Channels = foundChannels
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue