diff --git a/atom.go b/atom.go index cab3935..37f568c 100644 --- a/atom.go +++ b/atom.go @@ -56,7 +56,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { i = new(Item) i.Title = item.S(ns, "title") i.Id = item.S(ns, "id") - i.PubDate = item.S(ns, "updated") + i.PubDate, _ = parseTime(item.S(ns, "updated")) i.Description = item.S(ns, "summary") links := item.SelectNodes(ns, "link") diff --git a/item.go b/item.go index 74045e4..5c002f7 100644 --- a/item.go +++ b/item.go @@ -3,6 +3,7 @@ package feeder import ( "crypto/md5" "io" + "time" ) type Item struct { @@ -15,7 +16,7 @@ type Item struct { Comments string Enclosures []*Enclosure Guid *string - PubDate string + PubDate time.Time Source *Source // Atom specific fields @@ -33,8 +34,8 @@ func (i *Item) Key() string { return *i.Guid case len(i.Id) != 0: return i.Id - case len(i.Title) > 0 && len(i.PubDate) > 0: - return i.Title + i.PubDate + case len(i.Title) > 0 && !i.PubDate.IsZero(): + return i.Title + i.PubDate.String() default: h := md5.New() io.WriteString(h, i.Description) diff --git a/rss.go b/rss.go index 1c347ca..e5e749f 100644 --- a/rss.go +++ b/rss.go @@ -162,7 +162,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { i.Guid = &guid } - i.PubDate = item.S(ns, "pubDate") + i.PubDate, _ = parseTime(item.S(ns, "pubDate")) tl = item.SelectNodes(ns, "category") for _, lv := range tl { diff --git a/timeparser.go b/timeparser.go new file mode 100644 index 0000000..5498052 --- /dev/null +++ b/timeparser.go @@ -0,0 +1,35 @@ +package feeder + +import ( + "strings" + "time" +) + +func parseTime(formatted string) (time.Time, error) { + var layouts = [...]string{ + "Mon, _2 Jan 2006 15:04:05 MST", + "Mon, _2 Jan 2006 15:04:05 -0700", + time.ANSIC, + time.UnixDate, + time.RubyDate, + time.RFC822, + time.RFC822Z, + time.RFC850, + time.RFC1123, + time.RFC1123Z, + time.RFC3339, + time.RFC3339Nano, + "Mon, 2, Jan 2006 15:4", + "02 Jan 2006 15:04:05 MST", + } + var t time.Time + var err error + formatted = strings.TrimSpace(formatted) + for _, layout := range layouts { + t, err = time.Parse(layout, formatted) + if !t.IsZero() { + break + } + } + return t, err +} diff --git a/timeparser_test.go b/timeparser_test.go new file mode 100644 index 0000000..fc95d22 --- /dev/null +++ b/timeparser_test.go @@ -0,0 +1,94 @@ +package feeder + +import ( + "time" + "testing" +) + +func Test_InvalidDate(t *testing.T) { + date, err := parseTime("invalid") + if !date.IsZero() { + t.Errorf("Invalid date should parse to zero") + } + if err == nil { + t.Errorf("error should not be nil") + } +} + +func Test_ParseLayout0(t *testing.T) { + date, err := parseTime("2014-03-07T05:38:00-05:00") + expected := time.Date(2014, time.March, 7, 5, 38, 0, 0, time.FixedZone("-0500", -18000)) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout1(t *testing.T) { + date, err := parseTime("Fri, 07 Mar 2014 17:42:51 GMT") + expected := time.Date(2014, time.March, 7, 17, 42, 51, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout2(t *testing.T) { + date, err := parseTime("2014-02-05T23:33:34Z") + expected := time.Date(2014, time.February, 5, 23, 33, 34, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout3(t *testing.T) { + date, err := parseTime("Mon, 03 Mar 2014 02:12:25 +0000") + expected := time.Date(2014, time.March, 3, 2, 12, 25, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout4(t *testing.T) { + date, err := parseTime("Fri, 21, Mar 2014 10:41") + expected := time.Date(2014, time.March, 21, 10, 41, 0, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout4_1(t *testing.T) { + date, err := parseTime("Fri, 17, Jan 2014 11:1") + expected := time.Date(2014, time.January, 17, 11, 1, 0, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout4_2(t *testing.T) { + date, err := parseTime("Thu, 9, Jan 2014 10:19") + expected := time.Date(2014, time.January, 9, 10, 19, 0, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout5(t *testing.T) { + date, err := parseTime("22 Jul 2013 14:55:01 EST") + expected := time.Date(2013, time.July, 22, 14, 55, 1, 0, time.FixedZone("EST", -18000)) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func assertEqualTime(t *testing.T, expected, actual time.Time) { + if !expected.Equal(actual) { + t.Errorf("expected %v but was %v", expected, actual) + } +}