Fix issue 1: Added dependency to go-iconv. this lib is needed to ensure we pass valud UTF-8 encoded data to the XML tokenizer.
This commit is contained in:
parent
bacbff0e71
commit
02d19ed0bd
2
LICENSE
2
LICENSE
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
Copyright (c) 2010, Jim Teeuwen.
|
Copyright (c) 2010-2011, Jim Teeuwen.
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification,
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
|
6
README
6
README
|
@ -8,6 +8,12 @@
|
||||||
Nodes now simply become collections and don't require you to read them in the
|
Nodes now simply become collections and don't require you to read them in the
|
||||||
order in which the xml.Parser finds them.
|
order in which the xml.Parser finds them.
|
||||||
|
|
||||||
|
================================================================================
|
||||||
|
DEPENDENCIES
|
||||||
|
================================================================================
|
||||||
|
|
||||||
|
go-iconv: https://github.com/sloonz/go-iconv
|
||||||
|
|
||||||
================================================================================
|
================================================================================
|
||||||
USAGE
|
USAGE
|
||||||
================================================================================
|
================================================================================
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
include $(GOROOT)/src/Make.inc
|
include $(GOROOT)/src/Make.inc
|
||||||
|
|
||||||
TARG = xmlx
|
TARG = xmlx
|
||||||
GOFILES = document.go node.go io.go entitymap.go\
|
GOFILES = document.go node.go entitymap.go
|
||||||
|
|
||||||
include $(GOROOT)/src/Make.pkg
|
include $(GOROOT)/src/Make.pkg
|
||||||
|
|
|
@ -1,41 +1,40 @@
|
||||||
|
// Copyright (c) 2010, Jim Teeuwen. All rights reserved.
|
||||||
|
// This code is subject to a 1-clause BSD license.
|
||||||
|
// The contents of which can be found in the LICENSE file.
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Copyright (c) 2010, Jim Teeuwen.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
This code is subject to a 1-clause BSD license.
|
|
||||||
The contents of which can be found in the LICENSE file.
|
|
||||||
|
|
||||||
|
|
||||||
This package wraps the standard XML library and uses it to build a node tree of
|
This package wraps the standard XML library and uses it to build a node tree of
|
||||||
any document you load. This allows you to look up nodes forwards and backwards,
|
any document you load. This allows you to look up nodes forwards and backwards,
|
||||||
as well as perform search queries (no xpath support yet).
|
as well as perform simple search queries.
|
||||||
|
|
||||||
Nodes now simply become collections and don't require you to read them in the
|
Nodes now simply become collections and don't require you to read them in the
|
||||||
order in which the xml.Parser finds them.
|
order in which the xml.Parser finds them.
|
||||||
|
|
||||||
The Document currently implements 2 simple search functions which allow you to
|
The Document currently implements 2 search functions which allow you to
|
||||||
look for specific nodes.
|
look for specific nodes.
|
||||||
|
|
||||||
Document.SelectNode(namespace, name string) *Node;
|
*xmlx.Document.SelectNode(namespace, name string) *Node;
|
||||||
Document.SelectNodes(namespace, name string) []*Node;
|
*xmlx.Document.SelectNodes(namespace, name string) []*Node;
|
||||||
|
|
||||||
SelectNode() returns the first, single node it finds matching the given name
|
SelectNode() returns the first, single node it finds matching the given name
|
||||||
and namespace. SelectNodes() returns a slice containing all the matching nodes.
|
and namespace. SelectNodes() returns a slice containing all the matching nodes.
|
||||||
|
|
||||||
Note that these search functions can be invoked on individual nodes as well.
|
Note that these search functions can be invoked on individual nodes as well.
|
||||||
This allows you to search only a subset of the entire document.
|
This allows you to search only a subset of the entire document.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
package xmlx
|
package xmlx
|
||||||
|
|
||||||
import "os"
|
import (
|
||||||
import "io"
|
"os"
|
||||||
import "io/ioutil"
|
"io"
|
||||||
import "path"
|
"io/ioutil"
|
||||||
import "strings"
|
"path"
|
||||||
import "xml"
|
"strings"
|
||||||
import "fmt"
|
"xml"
|
||||||
import "http"
|
"fmt"
|
||||||
|
"http"
|
||||||
|
"iconv"
|
||||||
|
)
|
||||||
|
|
||||||
type Document struct {
|
type Document struct {
|
||||||
Version string
|
Version string
|
||||||
|
@ -82,6 +81,12 @@ func (this *Document) SelectNodes(namespace, name string) []*Node {
|
||||||
// *** Satisfy ILoader interface
|
// *** Satisfy ILoader interface
|
||||||
// *****************************************************************************
|
// *****************************************************************************
|
||||||
func (this *Document) LoadString(s string) (err os.Error) {
|
func (this *Document) LoadString(s string) (err os.Error) {
|
||||||
|
// Ensure we are passing UTF-8 encoding content to the XML tokenizer.
|
||||||
|
if s, err = this.correctEncoding(s); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// tokenize data
|
||||||
xp := xml.NewParser(strings.NewReader(s))
|
xp := xml.NewParser(strings.NewReader(s))
|
||||||
xp.Entity = this.Entity
|
xp.Entity = this.Entity
|
||||||
|
|
||||||
|
@ -218,3 +223,48 @@ func (this *Document) SaveStream(w io.Writer) (err os.Error) {
|
||||||
_, err = w.Write([]byte(s))
|
_, err = w.Write([]byte(s))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use libiconv to ensure we get UTF-8 encoded data. The Go Xml tokenizer will
|
||||||
|
// throw a tantrum if we give it anything else.
|
||||||
|
func (this *Document) correctEncoding(data string) (ret string, err os.Error) {
|
||||||
|
var cd *iconv.Iconv
|
||||||
|
var tok xml.Token
|
||||||
|
|
||||||
|
enc := "utf-8"
|
||||||
|
xp := xml.NewParser(strings.NewReader(data))
|
||||||
|
xp.Entity = this.Entity
|
||||||
|
|
||||||
|
loop:
|
||||||
|
for {
|
||||||
|
if tok, err = xp.Token(); err != nil {
|
||||||
|
if err == os.EOF {
|
||||||
|
break loop
|
||||||
|
}
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch tt := tok.(type) {
|
||||||
|
case xml.ProcInst:
|
||||||
|
if tt.Target == "xml" { // xml doctype
|
||||||
|
enc = strings.ToLower(string(tt.Inst))
|
||||||
|
if i := strings.Index(enc, `encoding="`); i > -1 {
|
||||||
|
enc = enc[i+len(`encoding="`):]
|
||||||
|
i = strings.Index(enc, `"`)
|
||||||
|
enc = enc[:i]
|
||||||
|
break loop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if enc == "utf-8" {
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if cd, err = iconv.Open("utf-8", enc); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
defer cd.Close()
|
||||||
|
return cd.Conv(data)
|
||||||
|
}
|
||||||
|
|
|
@ -1,11 +1,6 @@
|
||||||
/*
|
// Copyright (c) 2010, Jim Teeuwen. All rights reserved.
|
||||||
Copyright (c) 2010, Jim Teeuwen.
|
// This code is subject to a 1-clause BSD license.
|
||||||
All rights reserved.
|
// The contents of which can be found in the LICENSE file.
|
||||||
|
|
||||||
This code is subject to a 1-clause BSD license.
|
|
||||||
The contents of which can be found in the LICENSE file.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package xmlx
|
package xmlx
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -20,11 +15,13 @@ package xmlx
|
||||||
"â" (â) is not the same as "Â" (Â).
|
"â" (â) is not the same as "Â" (Â).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import "os"
|
import (
|
||||||
import "fmt"
|
"os"
|
||||||
import "utf8"
|
"fmt"
|
||||||
import "regexp"
|
"utf8"
|
||||||
import "strconv"
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
var reg_entnumeric = regexp.MustCompile("^&#[0-9]+;$")
|
var reg_entnumeric = regexp.MustCompile("^&#[0-9]+;$")
|
||||||
var reg_entnamed = regexp.MustCompile("^&[a-zA-Z]+;$")
|
var reg_entnamed = regexp.MustCompile("^&[a-zA-Z]+;$")
|
||||||
|
|
30
xmlx/io.go
30
xmlx/io.go
|
@ -1,30 +0,0 @@
|
||||||
/*
|
|
||||||
Copyright (c) 2010, Jim Teeuwen.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
This code is subject to a 1-clause BSD license.
|
|
||||||
The contents of which can be found in the LICENSE file.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package xmlx
|
|
||||||
|
|
||||||
import "os"
|
|
||||||
import "io"
|
|
||||||
|
|
||||||
type ILoader interface {
|
|
||||||
LoadUrl(string) os.Error
|
|
||||||
LoadFile(string) os.Error
|
|
||||||
LoadString(string) os.Error
|
|
||||||
LoadStream(*io.Reader) os.Error
|
|
||||||
}
|
|
||||||
|
|
||||||
type ISaver interface {
|
|
||||||
SaveFile(string) os.Error
|
|
||||||
SaveString(string) (string, os.Error)
|
|
||||||
SaveStream(*io.Writer) os.Error
|
|
||||||
}
|
|
||||||
|
|
||||||
type ILoaderSaver interface {
|
|
||||||
ILoader
|
|
||||||
ISaver
|
|
||||||
}
|
|
14
xmlx/node.go
14
xmlx/node.go
|
@ -8,12 +8,14 @@ The contents of which can be found in the LICENSE file.
|
||||||
|
|
||||||
package xmlx
|
package xmlx
|
||||||
|
|
||||||
import "os"
|
import (
|
||||||
import "strings"
|
"os"
|
||||||
import "xml"
|
"strings"
|
||||||
import "bytes"
|
"xml"
|
||||||
import "fmt"
|
"bytes"
|
||||||
import "strconv"
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
NT_ROOT = iota
|
NT_ROOT = iota
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
<!DOCTYPE xml>
|
<!DOCTYPE xml>
|
||||||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rss version="0.91">
|
<rss version="0.91">
|
||||||
<channel>
|
<channel>
|
||||||
<title>WriteTheWeb</title>
|
<title>WriteTheWeb</title>
|
||||||
|
|
|
@ -14,7 +14,7 @@ func TestLoadLocal(t *testing.T) {
|
||||||
doc := New()
|
doc := New()
|
||||||
|
|
||||||
if err := doc.LoadFile("test.xml"); err != nil {
|
if err := doc.LoadFile("test.xml"); err != nil {
|
||||||
t.Errorf("%s", err)
|
t.Error(err.String())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,11 +24,11 @@ func TestLoadLocal(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func _TestLoadRemote(t *testing.T) {
|
func TestLoadRemote(t *testing.T) {
|
||||||
doc := New()
|
doc := New()
|
||||||
|
|
||||||
if err := doc.LoadUri("http://www.w3schools.com/xml/plant_catalog.xml"); err != nil {
|
if err := doc.LoadUri("http://rss.cnn.com/rss/cnn_latest.rss"); err != nil {
|
||||||
t.Errorf("%s", err)
|
t.Error(err.String())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue