Fix bug in document.correctEncoding() which failed to properly parse xml doctypes.

This commit is contained in:
jim teeuwen 2011-01-27 22:10:38 +01:00
parent 9b53d04d9d
commit c271c20e08
3 changed files with 22 additions and 11 deletions

9
README
View File

@ -20,10 +20,11 @@
Getting the package up and running is simple enough:
$ cd /path/to/code/dir
$ git clone http://github.com/jteeuwen/go-pkg-xmlx.git
$ cd go-pkg-xmlx
$ make
$ goinstall github.com/jteeuwen/go-pkg-xmlx
Using it:
import xmlx "github.com/jteeuwen/go-pkg-xmlx"
================================================================================
API

View File

@ -240,24 +240,34 @@ loop:
if err == os.EOF {
break loop
}
return "", err
}
switch tt := tok.(type) {
case xml.ProcInst:
if tt.Target == "xml" { // xml doctype
enc = strings.ToLower(string(tt.Inst))
if i := strings.Index(enc, `encoding="`); i > -1 {
enc = enc[i+len(`encoding="`):]
i = strings.Index(enc, `"`)
enc = enc[:i]
break loop
var pair []string
var entry string
list := strings.Split(string(tt.Inst), " ", -1)
for _, entry = range list {
if pair = strings.Split(entry, "=", -1); len(pair) < 2 {
continue
}
switch pair[0] {
case "encoding":
enc = pair[1][1:len(pair[1])-1]
break loop
}
}
}
}
}
if enc == "utf-8" {
// Data already in utf-8 format. Nothing to do here.
return data, nil
}

View File

@ -27,7 +27,7 @@ func TestLoadLocal(t *testing.T) {
func TestLoadRemote(t *testing.T) {
doc := New()
if err := doc.LoadUri("http://rss.cnn.com/rss/cnn_latest.rss"); err != nil {
if err := doc.LoadUri("http://blog.golang.org/feeds/posts/default"); err != nil {
t.Error(err.String())
return
}