Fix bug in document.correctEncoding() which failed to properly parse xml doctypes.

This commit is contained in:
jim teeuwen 2011-01-27 22:10:38 +01:00
parent 9b53d04d9d
commit c271c20e08
3 changed files with 22 additions and 11 deletions

9
README
View File

@ -20,10 +20,11 @@
Getting the package up and running is simple enough: Getting the package up and running is simple enough:
$ cd /path/to/code/dir $ goinstall github.com/jteeuwen/go-pkg-xmlx
$ git clone http://github.com/jteeuwen/go-pkg-xmlx.git
$ cd go-pkg-xmlx Using it:
$ make
import xmlx "github.com/jteeuwen/go-pkg-xmlx"
================================================================================ ================================================================================
API API

View File

@ -240,24 +240,34 @@ loop:
if err == os.EOF { if err == os.EOF {
break loop break loop
} }
return "", err return "", err
} }
switch tt := tok.(type) { switch tt := tok.(type) {
case xml.ProcInst: case xml.ProcInst:
if tt.Target == "xml" { // xml doctype if tt.Target == "xml" { // xml doctype
enc = strings.ToLower(string(tt.Inst)) var pair []string
if i := strings.Index(enc, `encoding="`); i > -1 { var entry string
enc = enc[i+len(`encoding="`):]
i = strings.Index(enc, `"`) list := strings.Split(string(tt.Inst), " ", -1)
enc = enc[:i] for _, entry = range list {
break loop if pair = strings.Split(entry, "=", -1); len(pair) < 2 {
continue
}
switch pair[0] {
case "encoding":
enc = pair[1][1:len(pair[1])-1]
break loop
}
} }
} }
} }
} }
if enc == "utf-8" { if enc == "utf-8" {
// Data already in utf-8 format. Nothing to do here.
return data, nil return data, nil
} }

View File

@ -27,7 +27,7 @@ func TestLoadLocal(t *testing.T) {
func TestLoadRemote(t *testing.T) { func TestLoadRemote(t *testing.T) {
doc := New() doc := New()
if err := doc.LoadUri("http://rss.cnn.com/rss/cnn_latest.rss"); err != nil { if err := doc.LoadUri("http://blog.golang.org/feeds/posts/default"); err != nil {
t.Error(err.String()) t.Error(err.String())
return return
} }