Time parsing.
Rather than just using a string for PubDate, we attempt to parse it. This includes a couple of crazy non-standard time formats that I've seen in the wild. Breaking change: Item.PubDate is no longer a string, it is time.Time.
This commit is contained in:
		
							parent
							
								
									2b6dc03ede
								
							
						
					
					
						commit
						2c67b94a04
					
				
					 5 changed files with 135 additions and 5 deletions
				
			
		
							
								
								
									
										2
									
								
								atom.go
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								atom.go
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -56,7 +56,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
 | 
			
		|||
			i = new(Item)
 | 
			
		||||
			i.Title = item.S(ns, "title")
 | 
			
		||||
			i.Id = item.S(ns, "id")
 | 
			
		||||
			i.PubDate = item.S(ns, "updated")
 | 
			
		||||
			i.PubDate, _ = parseTime(item.S(ns, "updated"))
 | 
			
		||||
			i.Description = item.S(ns, "summary")
 | 
			
		||||
 | 
			
		||||
			links := item.SelectNodes(ns, "link")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										7
									
								
								item.go
									
										
									
									
									
								
							
							
						
						
									
										7
									
								
								item.go
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -3,6 +3,7 @@ package feeder
 | 
			
		|||
import (
 | 
			
		||||
	"crypto/md5"
 | 
			
		||||
	"io"
 | 
			
		||||
    "time"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type Item struct {
 | 
			
		||||
| 
						 | 
				
			
			@ -15,7 +16,7 @@ type Item struct {
 | 
			
		|||
	Comments    string
 | 
			
		||||
	Enclosures  []*Enclosure
 | 
			
		||||
	Guid        *string
 | 
			
		||||
	PubDate     string
 | 
			
		||||
	PubDate     time.Time
 | 
			
		||||
	Source      *Source
 | 
			
		||||
 | 
			
		||||
	// Atom specific fields
 | 
			
		||||
| 
						 | 
				
			
			@ -33,8 +34,8 @@ func (i *Item) Key() string {
 | 
			
		|||
		return *i.Guid
 | 
			
		||||
	case len(i.Id) != 0:
 | 
			
		||||
		return i.Id
 | 
			
		||||
	case len(i.Title) > 0 && len(i.PubDate) > 0:
 | 
			
		||||
		return i.Title + i.PubDate
 | 
			
		||||
	case len(i.Title) > 0 && !i.PubDate.IsZero():
 | 
			
		||||
		return i.Title + i.PubDate.String()
 | 
			
		||||
	default:
 | 
			
		||||
		h := md5.New()
 | 
			
		||||
		io.WriteString(h, i.Description)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										2
									
								
								rss.go
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								rss.go
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -162,7 +162,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
 | 
			
		|||
				i.Guid = &guid
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			i.PubDate = item.S(ns, "pubDate")
 | 
			
		||||
			i.PubDate, _ = parseTime(item.S(ns, "pubDate"))
 | 
			
		||||
 | 
			
		||||
			tl = item.SelectNodes(ns, "category")
 | 
			
		||||
			for _, lv := range tl {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										35
									
								
								timeparser.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								timeparser.go
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,35 @@
 | 
			
		|||
package feeder
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
    "strings"
 | 
			
		||||
    "time"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func parseTime(formatted string) (time.Time, error) {
 | 
			
		||||
    var layouts = [...]string{
 | 
			
		||||
        "Mon, _2 Jan 2006 15:04:05 MST",
 | 
			
		||||
        "Mon, _2 Jan 2006 15:04:05 -0700",
 | 
			
		||||
        time.ANSIC,
 | 
			
		||||
        time.UnixDate,
 | 
			
		||||
        time.RubyDate,
 | 
			
		||||
        time.RFC822,
 | 
			
		||||
        time.RFC822Z,
 | 
			
		||||
        time.RFC850,
 | 
			
		||||
        time.RFC1123,
 | 
			
		||||
        time.RFC1123Z,
 | 
			
		||||
        time.RFC3339,
 | 
			
		||||
        time.RFC3339Nano,
 | 
			
		||||
        "Mon, 2, Jan 2006 15:4",
 | 
			
		||||
        "02 Jan 2006 15:04:05 MST",
 | 
			
		||||
    }
 | 
			
		||||
    var t time.Time
 | 
			
		||||
    var err error
 | 
			
		||||
    formatted = strings.TrimSpace(formatted)
 | 
			
		||||
    for _, layout := range layouts {
 | 
			
		||||
        t, err = time.Parse(layout, formatted)
 | 
			
		||||
        if !t.IsZero() {
 | 
			
		||||
            break
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return t, err
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										94
									
								
								timeparser_test.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								timeparser_test.go
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,94 @@
 | 
			
		|||
package feeder
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
    "time"
 | 
			
		||||
    "testing"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func Test_InvalidDate(t *testing.T) {
 | 
			
		||||
    date, err := parseTime("invalid")
 | 
			
		||||
    if !date.IsZero() {
 | 
			
		||||
        t.Errorf("Invalid date should parse to zero")
 | 
			
		||||
    }
 | 
			
		||||
    if err == nil {
 | 
			
		||||
        t.Errorf("error should not be nil")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Test_ParseLayout0(t *testing.T) {
 | 
			
		||||
    date, err := parseTime("2014-03-07T05:38:00-05:00")
 | 
			
		||||
    expected := time.Date(2014, time.March, 7, 5, 38, 0, 0, time.FixedZone("-0500", -18000))
 | 
			
		||||
    assertEqualTime(t, expected, date)
 | 
			
		||||
    if err != nil {
 | 
			
		||||
        t.Errorf("err should be nil")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Test_ParseLayout1(t *testing.T) {
 | 
			
		||||
    date, err := parseTime("Fri, 07 Mar 2014 17:42:51 GMT")
 | 
			
		||||
    expected := time.Date(2014, time.March, 7, 17, 42, 51, 0, time.UTC)
 | 
			
		||||
    assertEqualTime(t, expected, date)
 | 
			
		||||
    if err != nil {
 | 
			
		||||
        t.Errorf("err should be nil")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Test_ParseLayout2(t *testing.T) {
 | 
			
		||||
    date, err := parseTime("2014-02-05T23:33:34Z")
 | 
			
		||||
    expected := time.Date(2014, time.February, 5, 23, 33, 34, 0, time.UTC)
 | 
			
		||||
    assertEqualTime(t, expected, date)
 | 
			
		||||
    if err != nil {
 | 
			
		||||
        t.Errorf("err should be nil")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Test_ParseLayout3(t *testing.T) {
 | 
			
		||||
    date, err := parseTime("Mon, 03 Mar 2014 02:12:25 +0000")
 | 
			
		||||
    expected := time.Date(2014, time.March, 3, 2, 12, 25, 0, time.UTC)
 | 
			
		||||
    assertEqualTime(t, expected, date)
 | 
			
		||||
    if err != nil {
 | 
			
		||||
        t.Errorf("err should be nil")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Test_ParseLayout4(t *testing.T) {
 | 
			
		||||
    date, err := parseTime("Fri, 21, Mar 2014 10:41")
 | 
			
		||||
    expected := time.Date(2014, time.March, 21, 10, 41, 0, 0, time.UTC)
 | 
			
		||||
    assertEqualTime(t, expected, date)
 | 
			
		||||
    if err != nil {
 | 
			
		||||
        t.Errorf("err should be nil")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Test_ParseLayout4_1(t *testing.T) {
 | 
			
		||||
    date, err := parseTime("Fri, 17, Jan 2014 11:1")
 | 
			
		||||
    expected := time.Date(2014, time.January, 17, 11, 1, 0, 0, time.UTC)
 | 
			
		||||
    assertEqualTime(t, expected, date)
 | 
			
		||||
    if err != nil {
 | 
			
		||||
        t.Errorf("err should be nil")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Test_ParseLayout4_2(t *testing.T) {
 | 
			
		||||
    date, err := parseTime("Thu, 9, Jan 2014 10:19")
 | 
			
		||||
    expected := time.Date(2014, time.January, 9, 10, 19, 0, 0, time.UTC)
 | 
			
		||||
    assertEqualTime(t, expected, date)
 | 
			
		||||
    if err != nil {
 | 
			
		||||
        t.Errorf("err should be nil")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Test_ParseLayout5(t *testing.T) {
 | 
			
		||||
    date, err := parseTime("22 Jul 2013 14:55:01 EST")
 | 
			
		||||
    expected := time.Date(2013, time.July, 22, 14, 55, 1, 0, time.FixedZone("EST", -18000))
 | 
			
		||||
    assertEqualTime(t, expected, date)
 | 
			
		||||
    if err != nil {
 | 
			
		||||
        t.Errorf("err should be nil")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func assertEqualTime(t *testing.T, expected, actual time.Time) {
 | 
			
		||||
    if !expected.Equal(actual) {
 | 
			
		||||
        t.Errorf("expected %v but was %v", expected, actual)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue