new file: README

new file:   src/Makefile
	new file:   src/atom.go
	new file:   src/author.go
	new file:   src/category.go
	new file:   src/channel.go
	new file:   src/cloud.go
	new file:   src/content.go
	new file:   src/enclosure.go
	new file:   src/feed.go
	new file:   src/feed_test.go
	new file:   src/generator.go
	new file:   src/image.go
	new file:   src/input.go
	new file:   src/item.go
	new file:   src/link.go
	new file:   src/rss.go
	new file:   src/source.go
	new file:   src/subtitle.go
This commit is contained in:
jim teeuwen 2009-11-23 13:46:12 +01:00
commit c2601a5ce3
19 changed files with 682 additions and 0 deletions

23
README Normal file
View File

@ -0,0 +1,23 @@
Author: jim teeuwen <jimteeuwen@gmail.com>
Dependencies: go-pkg-xmlx (http://github.com/jteeuwen/go-pkg-xmlx)
This package allows us to fetch Rss and Atom feeds from the internet.
They are parsed into an object tree which is a hyvrid of both the RSS and Atom
standards.
Supported feeds are:
- Rss v0.91, 0.91 and 2.0
- Atom 1.0
The package allows us to maintain cache timeout management. This prevents us
from querying the servers for feed updates too often and risk ip bams. Appart
from setting a cache timeout manually, the package also optionally adheres to
the TTL, SkipDays and SkipHours values specied in the feeds themselves.
Note that the TTL, SkipDays and SkipHour fields are only part of the RSS spec.
For Atom feeds, we use the CacheTimeout in the Feed struct.
Because the object structure is a hybrid between both RSS and Atom specs, not
all fields will be filled when requesting either an RSS or Atom feed. I have
tried to create as many shared fields as possiblem but some of them simply do
not occur in either the RSS or Atom spec.

10
src/Makefile Normal file
View File

@ -0,0 +1,10 @@
include $(GOROOT)/src/Make.$(GOARCH)
TARG=feeder
GOFILES=feed.go rss.go atom.go channel.go image.go item.go cloud.go \
enclosure.go source.go input.go category.go generator.go link.go\
subtitle.go author.go content.go\
include $(GOROOT)/src/Make.pkg

97
src/atom.go Normal file
View File

@ -0,0 +1,97 @@
package feeder
import "os"
import "xmlx"
func (this *Feed) readAtom(doc *xmlx.Document) (err os.Error) {
ns := "http://www.w3.org/2005/Atom";
channels := doc.SelectNodes(ns, "feed");
for _, node := range channels {
ch := Channel{};
ch.Title = node.GetValue(ns, "title");
ch.LastBuildDate = node.GetValue(ns, "updated");
ch.Id = node.GetValue(ns, "id");
ch.Rights = node.GetValue(ns, "rights");
list := node.SelectNodes(ns, "link");
ch.Links = make([]Link, len(list));
for i, v := range list {
ch.Links[i].Href = v.GetAttr("", "href");
ch.Links[i].Rel = v.GetAttr("", "rel");
ch.Links[i].Type = v.GetAttr("", "type");
ch.Links[i].HrefLang = v.GetAttr("", "hreflang");
}
tn := node.SelectNode(ns, "subtitle");
if tn != nil {
ch.SubTitle = SubTitle{};
ch.SubTitle.Type = tn.GetAttr("", "type");
ch.SubTitle.Text = tn.Value;
}
tn = node.SelectNode(ns, "generator");
if tn != nil {
ch.Generator = Generator{};
ch.Generator.Uri = tn.GetAttr("", "uri");
ch.Generator.Version = tn.GetAttr("", "version");
ch.Generator.Text = tn.Value;
}
tn = node.SelectNode(ns, "author");
if tn != nil {
ch.Author = Author{};
ch.Author.Name = tn.GetValue("", "name");
ch.Author.Uri = tn.GetValue("", "uri");
ch.Author.Email = tn.GetValue("", "email");
}
list = node.SelectNodes(ns, "entry");
ch.Items = make([]Item, len(list));
for _, v := range list {
item := Item{};
item.Title = v.GetValue(ns, "title");
item.Id = v.GetValue(ns, "id");
item.PubDate = v.GetValue(ns, "updated");
item.Description = v.GetValue(ns, "summary");
list = v.SelectNodes(ns, "link");
item.Links = make([]Link, 0);
for _, lv := range list {
if tn.GetAttr(ns, "rel") == "enclosure" {
enc := Enclosure{};
enc.Url = lv.GetAttr("", "href");
enc.Type = lv.GetAttr("", "type");
item.addEnclosure(enc);
} else {
lnk := Link{};
lnk.Href = lv.GetAttr("", "href");
lnk.Rel = lv.GetAttr("", "rel");
lnk.Type = lv.GetAttr("", "type");
lnk.HrefLang = lv.GetAttr("", "hreflang");
item.addLink(lnk);
}
}
list = v.SelectNodes(ns, "contributor");
item.Contributors = make([]string, len(list));
for ci, cv := range list {
item.Contributors[ci] = cv.GetValue("", "name");
}
tn = v.SelectNode(ns, "content");
if tn != nil {
item.Content = Content{};
item.Content.Type = tn.GetAttr("", "type");
item.Content.Lang = tn.GetValue("xml", "lang");
item.Content.Base = tn.GetValue("xml", "base");
item.Content.Text = tn.Value;
}
ch.addItem(item);
}
this.addChannel(ch);
}
return
}

7
src/author.go Normal file
View File

@ -0,0 +1,7 @@
package feeder
type Author struct {
Name string;
Uri string;
Email string;
}

6
src/category.go Normal file
View File

@ -0,0 +1,6 @@
package feeder
type Category struct {
Domain string;
Text string;
}

49
src/channel.go Normal file
View File

@ -0,0 +1,49 @@
package feeder
type Channel struct {
Title string;
Links []Link;
Description string;
Language string;
Copyright string;
ManagingEditor string;
WebMaster string;
PubDate string;
LastBuildDate string;
Docs string;
Categories []Category;
Generator Generator;
TTL int;
Rating string;
SkipHours []int;
SkipDays []int;
Image Image;
Items []Item;
Cloud Cloud;
TextInput Input;
// Atom fields
Id string;
Rights string;
Author Author;
SubTitle SubTitle;
}
func (this *Channel) addItem(item Item) {
slice := make([]Item, len(this.Items) + 1);
for i,v := range this.Items {
slice[i] = v;
}
slice[len(slice) - 1] = item;
this.Items = slice;
}
func (this *Channel) addLink(l Link) {
slice := make([]Link, len(this.Links) + 1);
for i,v := range this.Links {
slice[i] = v;
}
slice[len(slice) - 1] = l;
this.Links = slice;
}

10
src/cloud.go Normal file
View File

@ -0,0 +1,10 @@
package feeder
type Cloud struct {
Domain string;
Port int;
Path string;
RegisterProcedure string;
Protocol string;
}

8
src/content.go Normal file
View File

@ -0,0 +1,8 @@
package feeder
type Content struct {
Type string;
Lang string;
Base string;
Text string;
}

8
src/enclosure.go Normal file
View File

@ -0,0 +1,8 @@
package feeder
type Enclosure struct {
Url string;
Length int64;
Type string;
}

205
src/feed.go Normal file
View File

@ -0,0 +1,205 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Author: jim teeuwen <jimteeuwen@gmail.com>
Dependencies: go-pkg-xmlx (http://github.com/jteeuwen/go-pkg-xmlx)
This package allows us to fetch Rss and Atom feeds from the internet.
They are parsed into an object tree which is a hyvrid of both the RSS and Atom
standards.
Supported feeds are:
- Rss v0.91, 0.91 and 2.0
- Atom 1.0
The package allows us to maintain cache timeout management. This prevents us
from querying the servers for feed updates too often and risk ip bams. Appart
from setting a cache timeout manually, the package also optionally adheres to
the TTL, SkipDays and SkipHours values specied in the feeds themselves.
Note that the TTL, SkipDays and SkipHour fields are only part of the RSS spec.
For Atom feeds, we use the CacheTimeout in the Feed struct.
Because the object structure is a hybrid between both RSS and Atom specs, not
all fields will be filled when requesting either an RSS or Atom feed. I have
tried to create as many shared fields as possiblem but some of them simply do
not occur in either the RSS or Atom spec.
*/
package feeder
import "os"
import "http"
import "io"
import "time"
import "xmlx"
import "fmt"
import "strconv"
import "strings"
type Feed struct {
// Custom cache timeout in minutes.
CacheTimeout int;
// Make sure we adhere to the cache timeout specified in the feed. If
// our CacheTimeout is higher than that, we will use that instead.
EnforceCacheLimit bool;
// Type of feed. Rss, Atom, etc
Type string;
// Version of the feed. Major and Minor.
Version [2]int;
// Channels with content.
Channels []Channel;
// Url from which this feed was created.
Url string;
// Last time content was fetched. Used in conjunction with CacheTimeout
// to ensure we don't get content too often.
lastupdate int64;
}
func New(cachetimeout int, enforcecachelimit bool) *Feed {
return &Feed{
CacheTimeout: cachetimeout,
EnforceCacheLimit: enforcecachelimit,
Type: "none",
Version: [2]int{0, 0},
Channels: make([]Channel, 0),
}
}
func (this *Feed) addChannel(ch Channel) {
slice := make([]Channel, len(this.Channels) + 1);
for i,v := range this.Channels {
slice[i] = v;
}
slice[len(slice) - 1] = ch;
this.Channels = slice;
}
func (this *Feed) Fetch(uri string) (err os.Error) {
if !this.canUpdate() { return }
// Fetch data from remote location.
r, _, err := http.Get(uri);
if err != nil { return }
defer r.Body.Close();
b, err := io.ReadAll(r.Body);
if err != nil { return }
content := string(b);
this.Url = uri;
// Extract type and version of the feed so we can have the appropriate
// function parse it (rss 0.91, rss 0.92, rss 2, atom etc).
doc := xmlx.New();
err = doc.LoadString(content);
if err != nil { return }
this.Type, this.Version = this.GetVersionInfo(doc);
ok := this.testVersions();
if !ok {
err = os.NewError(fmt.Sprintf("Unsupported feed: %s, version: %+v", this.Type, this.Version));
return;
}
err = this.buildFeed(doc);
if err != nil || len(this.Channels) == 0 { return }
// reset cache timeout values according to feed specified values (TTL)
if this.EnforceCacheLimit && this.CacheTimeout < this.Channels[0].TTL {
this.CacheTimeout = this.Channels[0].TTL;
}
return;
}
func (this *Feed) canUpdate() bool {
// Make sure we are not within the specified cache-limit.
// This ensures we don't request data too often.
utc := time.UTC();
if utc.Seconds() - this.lastupdate < int64(this.CacheTimeout * 60) {
return false
}
// If skipDays or skipHours are set in the RSS feed, use these to see if
// we can update.
if len(this.Channels) == 0 && this.Type == "rss" {
if this.EnforceCacheLimit && len(this.Channels[0].SkipDays) > 0 {
for _ ,v := range this.Channels[0].SkipDays {
if v == utc.Weekday {
return false
}
}
}
if this.EnforceCacheLimit && len(this.Channels[0].SkipHours) > 0 {
for _ ,v := range this.Channels[0].SkipHours {
if v == utc.Hour {
return false
}
}
}
}
this.lastupdate = utc.Seconds();
return true
}
func (this *Feed) buildFeed(doc *xmlx.Document) (err os.Error) {
switch this.Type {
case "rss": err = this.readRss2(doc);
case "atom": err = this.readAtom(doc);
}
return
}
func (this *Feed) testVersions() bool {
switch this.Type {
case "rss":
if this.Version[0] > 2 || (this.Version[0] == 2 && this.Version[1] > 0) {
return false
}
case "atom":
if this.Version[0] > 1 || (this.Version[0] == 1 && this.Version[1] > 0) {
return false
}
default:
return false
}
return true;
}
func (this *Feed) GetVersionInfo(doc *xmlx.Document) (ftype string, fversion [2]int) {
node := doc.SelectNode("http://www.w3.org/2005/Atom", "feed");
if node == nil { goto rss }
ftype = "atom";
fversion = [2]int{1, 0};
return;
rss:
node = doc.SelectNode("", "rss");
if node == nil { goto end }
ftype = "rss";
version := node.GetAttr("", "version");
p := strings.Index(version, ".");
major, _ := strconv.Atoi(version[0:p]);
minor, _ := strconv.Atoi(version[p+1 : len(version)]);
fversion = [2]int{major, minor};
return;
end:
ftype = "unknown";
fversion = [2]int{0, 0};
return;
}

22
src/feed_test.go Normal file
View File

@ -0,0 +1,22 @@
package feeder
import "testing"
func TestFeed(t *testing.T) {
urilist := []string{
"http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml",
"http://cyber.law.harvard.edu/rss/examples/sampleRss092.xml",
"http://cyber.law.harvard.edu/rss/examples/rss2sample.xml",
"http://blog.case.edu/news/feed.atom",
};
for _, uri := range urilist {
feed := New(5, true);
err := feed.Fetch(uri);
if err != nil {
t.Errorf("%s >>> %s", uri, err);
continue;
}
}
}

8
src/generator.go Normal file
View File

@ -0,0 +1,8 @@
package feeder
type Generator struct {
Uri string;
Version string;
Text string;
}

10
src/image.go Normal file
View File

@ -0,0 +1,10 @@
package feeder
type Image struct {
Title string;
Url string;
Link string;
Width int;
Height int;
Description string;
}

8
src/input.go Normal file
View File

@ -0,0 +1,8 @@
package feeder
type Input struct {
Title string;
Description string;
Name string;
Link string;
}

39
src/item.go Normal file
View File

@ -0,0 +1,39 @@
package feeder
type Item struct {
// RSS and Shared fields
Title string;
Links []Link;
Description string;
Author Author;
Categories []Category;
Comments string;
Enclosures []Enclosure;
Guid string;
PubDate string;
Source Source;
// Atom specific fields
Id string;
Generator Generator;
Contributors []string;
Content Content;
}
func (this *Item) addEnclosure(e Enclosure) {
slice := make([]Enclosure, len(this.Enclosures) + 1);
for i,v := range this.Enclosures {
slice[i] = v;
}
slice[len(slice) - 1] = e;
this.Enclosures = slice;
}
func (this *Item) addLink(l Link) {
slice := make([]Link, len(this.Links) + 1);
for i,v := range this.Links {
slice[i] = v;
}
slice[len(slice) - 1] = l;
this.Links = slice;
}

9
src/link.go Normal file
View File

@ -0,0 +1,9 @@
package feeder
type Link struct {
Href string;
Rel string;
Type string;
HrefLang string;
}

150
src/rss.go Normal file
View File

@ -0,0 +1,150 @@
package feeder
import "os"
import "xmlx"
func (this *Feed) readRss2(doc *xmlx.Document) (err os.Error) {
channels := doc.SelectNodes("", "channel");
for _, node := range channels {
ch := Channel{};
ch.Title = node.GetValue("", "title");
list := node.SelectNodes("", "link");
ch.Links = make([]Link, len(list));
for i, v := range list {
ch.Links[i].Href = v.Value;
}
ch.Description = node.GetValue("", "description");
ch.Language = node.GetValue("", "language");
ch.Copyright = node.GetValue("", "copyright");
ch.ManagingEditor = node.GetValue("", "managingEditor");
ch.WebMaster = node.GetValue("", "webMaster");
ch.PubDate = node.GetValue("", "pubDate");
ch.LastBuildDate = node.GetValue("", "lastBuildDate");
ch.Docs = node.GetValue("", "docs");
list = node.SelectNodes("", "category");
ch.Categories = make([]Category, len(list));
for i, v := range list {
ch.Categories[i].Domain = v.GetAttr("", "domain");
ch.Categories[i].Text = v.Value;
}
n := node.SelectNode("", "generator");
if n != nil {
ch.Generator = Generator{};
ch.Generator.Text = n.Value;
}
ch.TTL = node.GetValuei("", "ttl");
ch.Rating = node.GetValue("", "rating");
list = node.SelectNodes("", "hour");
ch.SkipHours = make([]int, len(list));
for i, v := range list {
ch.SkipHours[i] = int(v.GetValuei("", "hour"));
}
list = node.SelectNodes("", "days");
ch.SkipDays = make([]int, len(list));
for i, v := range list {
ch.SkipDays[i] = mapDay(v.Value);
}
n = node.SelectNode("", "image");
if n != nil {
ch.Image.Title = n.GetValue("", "title");
ch.Image.Url = n.GetValue("", "url");
ch.Image.Link = n.GetValue("", "link");
ch.Image.Width = n.GetValuei("", "width");
ch.Image.Height = n.GetValuei("", "height");
ch.Image.Description = n.GetValue("", "description");
}
n = node.SelectNode("", "cloud");
if n != nil {
ch.Cloud = Cloud{};
ch.Cloud.Domain = n.GetAttr("", "domain");
ch.Cloud.Port = n.GetAttri("", "port");
ch.Cloud.Path = n.GetAttr("", "path");
ch.Cloud.RegisterProcedure = n.GetAttr("", "registerProcedure");
ch.Cloud.Protocol = n.GetAttr("", "protocol");
}
n = node.SelectNode("", "textInput");
if n != nil {
ch.TextInput = Input{};
ch.TextInput.Title = n.GetValue("", "title");
ch.TextInput.Description = n.GetValue("", "description");
ch.TextInput.Name = n.GetValue("", "name");
ch.TextInput.Link = n.GetValue("", "link");
}
list = node.SelectNodes("", "item");
for _, item := range list {
i := Item{};
i.Title = item.GetValue("", "title");
i.Description = item.GetValue("", "description");
list = node.SelectNodes("", "link");
i.Links = make([]Link, 0);
for _, v := range list {
lnk := Link{};
lnk.Href = v.Value;
i.addLink(lnk);
}
n = item.SelectNode("", "author");
if n != nil {
i.Author = Author{};
i.Author.Name = n.Value;
}
i.Comments = item.GetValue("", "comments");
i.Guid = item.GetValue("", "guid");
i.PubDate = item.GetValue("", "pubDate");
list := item.SelectNodes("", "category");
i.Categories = make([]Category, len(list));
for li, lv := range list {
i.Categories[li].Domain = lv.GetAttr("", "domain");
i.Categories[li].Text = lv.Value;
}
list = item.SelectNodes("", "enclosure");
i.Enclosures = make([]Enclosure, len(list));
for li, lv := range list {
i.Enclosures[li].Url = lv.GetAttr("", "url");
i.Enclosures[li].Length = lv.GetAttri64("", "length");
i.Enclosures[li].Type = lv.GetAttr("", "type");
}
src := item.SelectNode("", "source");
if src != nil {
i.Source = Source{};
i.Source.Url = src.GetAttr("", "url");
i.Source.Text = src.Value;
}
ch.addItem(i);
}
this.addChannel(ch);
}
return
}
func mapDay(day string) int {
switch day {
case "Monday": return 1;
case "Tuesday": return 2;
case "Wednesday": return 3;
case "Thursday": return 4;
case "Friday": return 5;
case "Saturday": return 6;
case "Sunday": return 7;
}
return 1;
}

6
src/source.go Normal file
View File

@ -0,0 +1,6 @@
package feeder
type Source struct {
Url string;
Text string;
}

7
src/subtitle.go Normal file
View File

@ -0,0 +1,7 @@
package feeder
type SubTitle struct {
Type string;
Text string;
}