Fix bug in document.correctEncoding() which failed to properly parse xml doctypes.
This commit is contained in:
parent
9b53d04d9d
commit
c271c20e08
9
README
9
README
|
@ -20,10 +20,11 @@
|
|||
|
||||
Getting the package up and running is simple enough:
|
||||
|
||||
$ cd /path/to/code/dir
|
||||
$ git clone http://github.com/jteeuwen/go-pkg-xmlx.git
|
||||
$ cd go-pkg-xmlx
|
||||
$ make
|
||||
$ goinstall github.com/jteeuwen/go-pkg-xmlx
|
||||
|
||||
Using it:
|
||||
|
||||
import xmlx "github.com/jteeuwen/go-pkg-xmlx"
|
||||
|
||||
================================================================================
|
||||
API
|
||||
|
|
22
document.go
22
document.go
|
@ -240,24 +240,34 @@ loop:
|
|||
if err == os.EOF {
|
||||
break loop
|
||||
}
|
||||
|
||||
return "", err
|
||||
}
|
||||
|
||||
switch tt := tok.(type) {
|
||||
case xml.ProcInst:
|
||||
if tt.Target == "xml" { // xml doctype
|
||||
enc = strings.ToLower(string(tt.Inst))
|
||||
if i := strings.Index(enc, `encoding="`); i > -1 {
|
||||
enc = enc[i+len(`encoding="`):]
|
||||
i = strings.Index(enc, `"`)
|
||||
enc = enc[:i]
|
||||
break loop
|
||||
var pair []string
|
||||
var entry string
|
||||
|
||||
list := strings.Split(string(tt.Inst), " ", -1)
|
||||
for _, entry = range list {
|
||||
if pair = strings.Split(entry, "=", -1); len(pair) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
switch pair[0] {
|
||||
case "encoding":
|
||||
enc = pair[1][1:len(pair[1])-1]
|
||||
break loop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if enc == "utf-8" {
|
||||
// Data already in utf-8 format. Nothing to do here.
|
||||
return data, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ func TestLoadLocal(t *testing.T) {
|
|||
func TestLoadRemote(t *testing.T) {
|
||||
doc := New()
|
||||
|
||||
if err := doc.LoadUri("http://rss.cnn.com/rss/cnn_latest.rss"); err != nil {
|
||||
if err := doc.LoadUri("http://blog.golang.org/feeds/posts/default"); err != nil {
|
||||
t.Error(err.String())
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue