Refactored some code to be faster and a little more elegant.

This commit is contained in:
jim teeuwen 2010-08-22 05:07:38 +02:00
parent 79794996d0
commit 6d8b8289d8
10 changed files with 438 additions and 420 deletions

12
makefile Normal file
View File

@ -0,0 +1,12 @@
all:
make -C xmlx install
test:
make -C xmlx test
clean:
make -C xmlx clean
format:
gofmt -w .

View File

@ -1,311 +0,0 @@
package xmlx
import "fmt"
import "utf8"
import "regexp"
import "strconv"
var reg_entity = regexp.MustCompile("^&#[0-9]+;$")
// Converts a single numerical html entity to a regular Go utf-token.
// ex: "♣" -> "♣"
func HtmlToUTF8(entity string) string {
// Make sure we have a valid entity: {
ok := reg_entity.MatchString(entity)
if !ok {
return ""
}
// Convert entity to number
num, err := strconv.Atoi(entity[2 : len(entity)-1])
if err != nil {
return ""
}
var arr [3]byte
size := utf8.EncodeRune(num, &arr)
if size == 0 {
return ""
}
return string(&arr)
}
// Converts a single Go utf-token to it's an Html entity.
// ex: "♣" -> "♣"
func UTF8ToHtml(token string) string {
rune, size := utf8.DecodeRuneInString(token)
if size == 0 {
return ""
}
return fmt.Sprintf("&#%d;", rune)
}
/*
http://www.w3.org/TR/html4/sgml/entities.html
Portions © International Organization for Standardization 1986
Permission to copy in any form is granted for use with
conforming SGML systems and applications as defined in
ISO 8879, provided this notice is included in all copies.
Fills the supplied map with html entities mapped to their Go utf8
equivalents. This map can be assigned to xml.Parser.Entity
It will be used to map non-standard xml entities to a proper value.
If the parser encounters any unknown entities, it will throw a syntax
error and abort the parsing. Hence the ability to supply this map.
*/
func loadNonStandardEntities(em *map[string]string) {
(*em)["pi"] = "\u03c0"
(*em)["nabla"] = "\u2207"
(*em)["isin"] = "\u2208"
(*em)["loz"] = "\u25ca"
(*em)["prop"] = "\u221d"
(*em)["para"] = "\u00b6"
(*em)["Aring"] = "\u00c5"
(*em)["euro"] = "\u20ac"
(*em)["sup3"] = "\u00b3"
(*em)["sup2"] = "\u00b2"
(*em)["sup1"] = "\u00b9"
(*em)["prod"] = "\u220f"
(*em)["gamma"] = "\u03b3"
(*em)["perp"] = "\u22a5"
(*em)["lfloor"] = "\u230a"
(*em)["fnof"] = "\u0192"
(*em)["frasl"] = "\u2044"
(*em)["rlm"] = "\u200f"
(*em)["omega"] = "\u03c9"
(*em)["part"] = "\u2202"
(*em)["euml"] = "\u00eb"
(*em)["Kappa"] = "\u039a"
(*em)["nbsp"] = "\u00a0"
(*em)["Eacute"] = "\u00c9"
(*em)["brvbar"] = "\u00a6"
(*em)["otimes"] = "\u2297"
(*em)["ndash"] = "\u2013"
(*em)["thinsp"] = "\u2009"
(*em)["nu"] = "\u03bd"
(*em)["Upsilon"] = "\u03a5"
(*em)["upsih"] = "\u03d2"
(*em)["raquo"] = "\u00bb"
(*em)["yacute"] = "\u00fd"
(*em)["delta"] = "\u03b4"
(*em)["eth"] = "\u00f0"
(*em)["supe"] = "\u2287"
(*em)["ne"] = "\u2260"
(*em)["ni"] = "\u220b"
(*em)["eta"] = "\u03b7"
(*em)["uArr"] = "\u21d1"
(*em)["image"] = "\u2111"
(*em)["asymp"] = "\u2248"
(*em)["oacute"] = "\u00f3"
(*em)["rarr"] = "\u2192"
(*em)["emsp"] = "\u2003"
(*em)["acirc"] = "\u00e2"
(*em)["shy"] = "\u00ad"
(*em)["yuml"] = "\u00ff"
(*em)["acute"] = "\u00b4"
(*em)["int"] = "\u222b"
(*em)["ccedil"] = "\u00e7"
(*em)["Acirc"] = "\u00c2"
(*em)["Ograve"] = "\u00d2"
(*em)["times"] = "\u00d7"
(*em)["weierp"] = "\u2118"
(*em)["Tau"] = "\u03a4"
(*em)["omicron"] = "\u03bf"
(*em)["lt"] = "\u003c"
(*em)["Mu"] = "\u039c"
(*em)["Ucirc"] = "\u00db"
(*em)["sub"] = "\u2282"
(*em)["le"] = "\u2264"
(*em)["sum"] = "\u2211"
(*em)["sup"] = "\u2283"
(*em)["lrm"] = "\u200e"
(*em)["frac34"] = "\u00be"
(*em)["Iota"] = "\u0399"
(*em)["Ugrave"] = "\u00d9"
(*em)["THORN"] = "\u00de"
(*em)["rsaquo"] = "\u203a"
(*em)["not"] = "\u00ac"
(*em)["sigma"] = "\u03c3"
(*em)["iuml"] = "\u00ef"
(*em)["epsilon"] = "\u03b5"
(*em)["spades"] = "\u2660"
(*em)["theta"] = "\u03b8"
(*em)["divide"] = "\u00f7"
(*em)["Atilde"] = "\u00c3"
(*em)["uacute"] = "\u00fa"
(*em)["Rho"] = "\u03a1"
(*em)["trade"] = "\u2122"
(*em)["chi"] = "\u03c7"
(*em)["agrave"] = "\u00e0"
(*em)["or"] = "\u2228"
(*em)["circ"] = "\u02c6"
(*em)["middot"] = "\u00b7"
(*em)["plusmn"] = "\u00b1"
(*em)["aring"] = "\u00e5"
(*em)["lsquo"] = "\u2018"
(*em)["Yacute"] = "\u00dd"
(*em)["oline"] = "\u203e"
(*em)["copy"] = "\u00a9"
(*em)["icirc"] = "\u00ee"
(*em)["lowast"] = "\u2217"
(*em)["Oacute"] = "\u00d3"
(*em)["aacute"] = "\u00e1"
(*em)["oplus"] = "\u2295"
(*em)["crarr"] = "\u21b5"
(*em)["thetasym"] = "\u03d1"
(*em)["Beta"] = "\u0392"
(*em)["laquo"] = "\u00ab"
(*em)["rang"] = "\u232a"
(*em)["tilde"] = "\u02dc"
(*em)["Uuml"] = "\u00dc"
(*em)["zwj"] = "\u200d"
(*em)["mu"] = "\u03bc"
(*em)["Ccedil"] = "\u00c7"
(*em)["infin"] = "\u221e"
(*em)["ouml"] = "\u00f6"
(*em)["rfloor"] = "\u230b"
(*em)["pound"] = "\u00a3"
(*em)["szlig"] = "\u00df"
(*em)["thorn"] = "\u00fe"
(*em)["forall"] = "\u2200"
(*em)["piv"] = "\u03d6"
(*em)["rdquo"] = "\u201d"
(*em)["frac12"] = "\u00bd"
(*em)["frac14"] = "\u00bc"
(*em)["Ocirc"] = "\u00d4"
(*em)["Ecirc"] = "\u00ca"
(*em)["kappa"] = "\u03ba"
(*em)["Euml"] = "\u00cb"
(*em)["minus"] = "\u2212"
(*em)["cong"] = "\u2245"
(*em)["hellip"] = "\u2026"
(*em)["equiv"] = "\u2261"
(*em)["cent"] = "\u00a2"
(*em)["Uacute"] = "\u00da"
(*em)["darr"] = "\u2193"
(*em)["Eta"] = "\u0397"
(*em)["sbquo"] = "\u201a"
(*em)["rArr"] = "\u21d2"
(*em)["igrave"] = "\u00ec"
(*em)["uml"] = "\u00a8"
(*em)["lambda"] = "\u03bb"
(*em)["oelig"] = "\u0153"
(*em)["harr"] = "\u2194"
(*em)["ang"] = "\u2220"
(*em)["clubs"] = "\u2663"
(*em)["and"] = "\u2227"
(*em)["permil"] = "\u2030"
(*em)["larr"] = "\u2190"
(*em)["Yuml"] = "\u0178"
(*em)["cup"] = "\u222a"
(*em)["Xi"] = "\u039e"
(*em)["Alpha"] = "\u0391"
(*em)["phi"] = "\u03c6"
(*em)["ucirc"] = "\u00fb"
(*em)["oslash"] = "\u00f8"
(*em)["rsquo"] = "\u2019"
(*em)["AElig"] = "\u00c6"
(*em)["mdash"] = "\u2014"
(*em)["psi"] = "\u03c8"
(*em)["eacute"] = "\u00e9"
(*em)["otilde"] = "\u00f5"
(*em)["yen"] = "\u00a5"
(*em)["gt"] = "\u003e"
(*em)["Iuml"] = "\u00cf"
(*em)["Prime"] = "\u2033"
(*em)["Chi"] = "\u03a7"
(*em)["ge"] = "\u2265"
(*em)["reg"] = "\u00ae"
(*em)["hearts"] = "\u2665"
(*em)["auml"] = "\u00e4"
(*em)["Agrave"] = "\u00c0"
(*em)["sect"] = "\u00a7"
(*em)["sube"] = "\u2286"
(*em)["sigmaf"] = "\u03c2"
(*em)["Gamma"] = "\u0393"
(*em)["amp"] = "\u0026"
(*em)["ensp"] = "\u2002"
(*em)["ETH"] = "\u00d0"
(*em)["Igrave"] = "\u00cc"
(*em)["Omega"] = "\u03a9"
(*em)["Lambda"] = "\u039b"
(*em)["Omicron"] = "\u039f"
(*em)["there4"] = "\u2234"
(*em)["ntilde"] = "\u00f1"
(*em)["xi"] = "\u03be"
(*em)["dagger"] = "\u2020"
(*em)["egrave"] = "\u00e8"
(*em)["Delta"] = "\u0394"
(*em)["OElig"] = "\u0152"
(*em)["diams"] = "\u2666"
(*em)["ldquo"] = "\u201c"
(*em)["radic"] = "\u221a"
(*em)["Oslash"] = "\u00d8"
(*em)["Ouml"] = "\u00d6"
(*em)["lceil"] = "\u2308"
(*em)["uarr"] = "\u2191"
(*em)["atilde"] = "\u00e3"
(*em)["iquest"] = "\u00bf"
(*em)["lsaquo"] = "\u2039"
(*em)["Epsilon"] = "\u0395"
(*em)["iacute"] = "\u00ed"
(*em)["cap"] = "\u2229"
(*em)["deg"] = "\u00b0"
(*em)["Otilde"] = "\u00d5"
(*em)["zeta"] = "\u03b6"
(*em)["ocirc"] = "\u00f4"
(*em)["scaron"] = "\u0161"
(*em)["ecirc"] = "\u00ea"
(*em)["ordm"] = "\u00ba"
(*em)["tau"] = "\u03c4"
(*em)["Auml"] = "\u00c4"
(*em)["dArr"] = "\u21d3"
(*em)["ordf"] = "\u00aa"
(*em)["alefsym"] = "\u2135"
(*em)["notin"] = "\u2209"
(*em)["Pi"] = "\u03a0"
(*em)["sdot"] = "\u22c5"
(*em)["upsilon"] = "\u03c5"
(*em)["iota"] = "\u03b9"
(*em)["hArr"] = "\u21d4"
(*em)["Sigma"] = "\u03a3"
(*em)["lang"] = "\u2329"
(*em)["curren"] = "\u00a4"
(*em)["Theta"] = "\u0398"
(*em)["lArr"] = "\u21d0"
(*em)["Phi"] = "\u03a6"
(*em)["Nu"] = "\u039d"
(*em)["rho"] = "\u03c1"
(*em)["alpha"] = "\u03b1"
(*em)["iexcl"] = "\u00a1"
(*em)["micro"] = "\u00b5"
(*em)["cedil"] = "\u00b8"
(*em)["Ntilde"] = "\u00d1"
(*em)["Psi"] = "\u03a8"
(*em)["Dagger"] = "\u2021"
(*em)["Egrave"] = "\u00c8"
(*em)["Icirc"] = "\u00ce"
(*em)["nsub"] = "\u2284"
(*em)["bdquo"] = "\u201e"
(*em)["empty"] = "\u2205"
(*em)["aelig"] = "\u00e6"
(*em)["ograve"] = "\u00f2"
(*em)["macr"] = "\u00af"
(*em)["Zeta"] = "\u0396"
(*em)["beta"] = "\u03b2"
(*em)["sim"] = "\u223c"
(*em)["uuml"] = "\u00fc"
(*em)["Aacute"] = "\u00c1"
(*em)["Iacute"] = "\u00cd"
(*em)["exist"] = "\u2203"
(*em)["prime"] = "\u2032"
(*em)["rceil"] = "\u2309"
(*em)["real"] = "\u211c"
(*em)["zwnj"] = "\u200c"
(*em)["bull"] = "\u2022"
(*em)["quot"] = "\u0022"
(*em)["Scaron"] = "\u0160"
(*em)["ugrave"] = "\u00f9"
}

View File

@ -1,8 +1,6 @@
include $(GOROOT)/src/Make.$(GOARCH)
TARG=xmlx
GOFILES=document.go node.go io.go entitymap.go\
include $(GOROOT)/src/Make.pkg

View File

@ -29,6 +29,7 @@ package xmlx
import "os"
import "io"
import "bytes"
import "io/ioutil"
import "path"
import "strings"
@ -62,7 +63,7 @@ func New() *Document {
// set only those entities needed manually using the document.Entity map, but
// if need be, this method can be called to fill the map with the entire set
// defined on http://www.w3.org/TR/html4/sgml/entities.html
func (this *Document) LoadExtendedEntityMap() { loadNonStandardEntities(&this.Entity) }
func (this *Document) LoadExtendedEntityMap() { loadNonStandardEntities(this.Entity) }
func (this *Document) String() string {
s, _ := this.SaveString()
@ -88,6 +89,11 @@ func (this *Document) LoadString(s string) (err os.Error) {
ct := this.Root
var tok xml.Token
var t *Node
var i int
var doctype string
var v xml.Attr
for {
if tok, err = xp.Token(); err != nil {
if err == os.EOF {
@ -104,20 +110,21 @@ func (this *Document) LoadString(s string) (err os.Error) {
case xml.SyntaxError:
return os.NewError(tt.String())
case xml.CharData:
ct.Value = strings.TrimSpace(string(tt))
ct.Value = strings.TrimSpace(string([]byte(tt)))
case xml.Comment:
t := NewNode(NT_COMMENT)
t.Value = strings.TrimSpace(string(tt))
t.Value = strings.TrimSpace(string([]byte(tt)))
ct.AddChild(t)
case xml.Directive:
t := NewNode(NT_DIRECTIVE)
t.Value = strings.TrimSpace(string(tt))
t = NewNode(NT_DIRECTIVE)
t.Value = strings.TrimSpace(string([]byte(tt)))
ct.AddChild(t)
case xml.StartElement:
t := NewNode(NT_ELEMENT)
t = NewNode(NT_ELEMENT)
t.Name = tt.Name
t.Attributes = make([]Attr, len(tt.Attr))
for i, v := range tt.Attr {
t.Attributes = make([]*Attr, len(tt.Attr))
for i, v = range tt.Attr {
t.Attributes[i] = new(Attr)
t.Attributes[i].Name = v.Name
t.Attributes[i].Value = v.Value
}
@ -125,15 +132,14 @@ func (this *Document) LoadString(s string) (err os.Error) {
ct = t
case xml.ProcInst:
if tt.Target == "xml" { // xml doctype
doctype := strings.TrimSpace(string(tt.Inst))
pos := strings.Index(doctype, `standalone="`)
if pos > -1 {
this.StandAlone = doctype[pos+len(`standalone="`) : len(doctype)]
pos = strings.Index(this.StandAlone, `"`)
this.StandAlone = this.StandAlone[0:pos]
doctype = strings.TrimSpace(string(tt.Inst))
if i = strings.Index(doctype, `standalone="`); i > -1 {
this.StandAlone = doctype[i+len(`standalone="`) : len(doctype)]
i = strings.Index(this.StandAlone, `"`)
this.StandAlone = this.StandAlone[0:i]
}
} else {
t := NewNode(NT_PROCINST)
t = NewNode(NT_PROCINST)
t.Target = strings.TrimSpace(tt.Target)
t.Value = strings.TrimSpace(string(tt.Inst))
ct.AddChild(t)
@ -176,17 +182,20 @@ func (this *Document) LoadUri(uri string) (err os.Error) {
}
func (this *Document) LoadStream(r *io.Reader) (err os.Error) {
content := ""
buff := make([]byte, 256)
var data []byte
t := bytes.NewBuffer(data)
s := make([]byte, 1024)
for {
_, err := r.Read(buff)
_, err := r.Read(s)
if err != nil {
break
}
content += string(buff)
t.Write(s)
}
err = this.LoadString(content)
err = this.LoadString(t.String())
return
}
@ -194,19 +203,12 @@ func (this *Document) LoadStream(r *io.Reader) (err os.Error) {
// *** Satisfy ISaver interface
// *****************************************************************************
func (this *Document) SaveFile(path string) (err os.Error) {
file, err := os.Open(path, os.O_WRONLY|os.O_CREAT, 0600)
if err != nil {
return
}
defer file.Close()
content, err := this.SaveString()
if err != nil {
var data string
if data, err = this.SaveString(); err != nil {
return
}
file.Write([]byte(content))
return
return ioutil.WriteFile(path, []byte(data), 0600)
}
func (this *Document) SaveString() (s string, err os.Error) {

311
xmlx/entitymap.go Normal file
View File

@ -0,0 +1,311 @@
package xmlx
import "fmt"
import "utf8"
import "regexp"
import "strconv"
var reg_entity = regexp.MustCompile("^&#[0-9]+;$")
// Converts a single numerical html entity to a regular Go utf-token.
// ex: "♣" -> "♣"
func HtmlToUTF8(entity string) string {
// Make sure we have a valid entity: {
ok := reg_entity.MatchString(entity)
if !ok {
return ""
}
// Convert entity to number
num, err := strconv.Atoi(entity[2 : len(entity)-1])
if err != nil {
return ""
}
var arr []byte
size := utf8.EncodeRune(num, arr)
if size == 0 {
return ""
}
return string(arr)
}
// Converts a single Go utf-token to it's an Html entity.
// ex: "♣" -> "♣"
func UTF8ToHtml(token string) string {
rune, size := utf8.DecodeRuneInString(token)
if size == 0 {
return ""
}
return fmt.Sprintf("&#%d;", rune)
}
/*
http://www.w3.org/TR/html4/sgml/entities.html
Portions © International Organization for Standardization 1986
Permission to copy in any form is granted for use with
conforming SGML systems and applications as defined in
ISO 8879, provided this notice is included in all copies.
Fills the supplied map with html entities mapped to their Go utf8
equivalents. This map can be assigned to xml.Parser.Entity
It will be used to map non-standard xml entities to a proper value.
If the parser encounters any unknown entities, it will throw a syntax
error and abort the parsing. Hence the ability to supply this map.
*/
func loadNonStandardEntities(em map[string]string) {
em["pi"] = "\u03c0"
em["nabla"] = "\u2207"
em["isin"] = "\u2208"
em["loz"] = "\u25ca"
em["prop"] = "\u221d"
em["para"] = "\u00b6"
em["Aring"] = "\u00c5"
em["euro"] = "\u20ac"
em["sup3"] = "\u00b3"
em["sup2"] = "\u00b2"
em["sup1"] = "\u00b9"
em["prod"] = "\u220f"
em["gamma"] = "\u03b3"
em["perp"] = "\u22a5"
em["lfloor"] = "\u230a"
em["fnof"] = "\u0192"
em["frasl"] = "\u2044"
em["rlm"] = "\u200f"
em["omega"] = "\u03c9"
em["part"] = "\u2202"
em["euml"] = "\u00eb"
em["Kappa"] = "\u039a"
em["nbsp"] = "\u00a0"
em["Eacute"] = "\u00c9"
em["brvbar"] = "\u00a6"
em["otimes"] = "\u2297"
em["ndash"] = "\u2013"
em["thinsp"] = "\u2009"
em["nu"] = "\u03bd"
em["Upsilon"] = "\u03a5"
em["upsih"] = "\u03d2"
em["raquo"] = "\u00bb"
em["yacute"] = "\u00fd"
em["delta"] = "\u03b4"
em["eth"] = "\u00f0"
em["supe"] = "\u2287"
em["ne"] = "\u2260"
em["ni"] = "\u220b"
em["eta"] = "\u03b7"
em["uArr"] = "\u21d1"
em["image"] = "\u2111"
em["asymp"] = "\u2248"
em["oacute"] = "\u00f3"
em["rarr"] = "\u2192"
em["emsp"] = "\u2003"
em["acirc"] = "\u00e2"
em["shy"] = "\u00ad"
em["yuml"] = "\u00ff"
em["acute"] = "\u00b4"
em["int"] = "\u222b"
em["ccedil"] = "\u00e7"
em["Acirc"] = "\u00c2"
em["Ograve"] = "\u00d2"
em["times"] = "\u00d7"
em["weierp"] = "\u2118"
em["Tau"] = "\u03a4"
em["omicron"] = "\u03bf"
em["lt"] = "\u003c"
em["Mu"] = "\u039c"
em["Ucirc"] = "\u00db"
em["sub"] = "\u2282"
em["le"] = "\u2264"
em["sum"] = "\u2211"
em["sup"] = "\u2283"
em["lrm"] = "\u200e"
em["frac34"] = "\u00be"
em["Iota"] = "\u0399"
em["Ugrave"] = "\u00d9"
em["THORN"] = "\u00de"
em["rsaquo"] = "\u203a"
em["not"] = "\u00ac"
em["sigma"] = "\u03c3"
em["iuml"] = "\u00ef"
em["epsilon"] = "\u03b5"
em["spades"] = "\u2660"
em["theta"] = "\u03b8"
em["divide"] = "\u00f7"
em["Atilde"] = "\u00c3"
em["uacute"] = "\u00fa"
em["Rho"] = "\u03a1"
em["trade"] = "\u2122"
em["chi"] = "\u03c7"
em["agrave"] = "\u00e0"
em["or"] = "\u2228"
em["circ"] = "\u02c6"
em["middot"] = "\u00b7"
em["plusmn"] = "\u00b1"
em["aring"] = "\u00e5"
em["lsquo"] = "\u2018"
em["Yacute"] = "\u00dd"
em["oline"] = "\u203e"
em["copy"] = "\u00a9"
em["icirc"] = "\u00ee"
em["lowast"] = "\u2217"
em["Oacute"] = "\u00d3"
em["aacute"] = "\u00e1"
em["oplus"] = "\u2295"
em["crarr"] = "\u21b5"
em["thetasym"] = "\u03d1"
em["Beta"] = "\u0392"
em["laquo"] = "\u00ab"
em["rang"] = "\u232a"
em["tilde"] = "\u02dc"
em["Uuml"] = "\u00dc"
em["zwj"] = "\u200d"
em["mu"] = "\u03bc"
em["Ccedil"] = "\u00c7"
em["infin"] = "\u221e"
em["ouml"] = "\u00f6"
em["rfloor"] = "\u230b"
em["pound"] = "\u00a3"
em["szlig"] = "\u00df"
em["thorn"] = "\u00fe"
em["forall"] = "\u2200"
em["piv"] = "\u03d6"
em["rdquo"] = "\u201d"
em["frac12"] = "\u00bd"
em["frac14"] = "\u00bc"
em["Ocirc"] = "\u00d4"
em["Ecirc"] = "\u00ca"
em["kappa"] = "\u03ba"
em["Euml"] = "\u00cb"
em["minus"] = "\u2212"
em["cong"] = "\u2245"
em["hellip"] = "\u2026"
em["equiv"] = "\u2261"
em["cent"] = "\u00a2"
em["Uacute"] = "\u00da"
em["darr"] = "\u2193"
em["Eta"] = "\u0397"
em["sbquo"] = "\u201a"
em["rArr"] = "\u21d2"
em["igrave"] = "\u00ec"
em["uml"] = "\u00a8"
em["lambda"] = "\u03bb"
em["oelig"] = "\u0153"
em["harr"] = "\u2194"
em["ang"] = "\u2220"
em["clubs"] = "\u2663"
em["and"] = "\u2227"
em["permil"] = "\u2030"
em["larr"] = "\u2190"
em["Yuml"] = "\u0178"
em["cup"] = "\u222a"
em["Xi"] = "\u039e"
em["Alpha"] = "\u0391"
em["phi"] = "\u03c6"
em["ucirc"] = "\u00fb"
em["oslash"] = "\u00f8"
em["rsquo"] = "\u2019"
em["AElig"] = "\u00c6"
em["mdash"] = "\u2014"
em["psi"] = "\u03c8"
em["eacute"] = "\u00e9"
em["otilde"] = "\u00f5"
em["yen"] = "\u00a5"
em["gt"] = "\u003e"
em["Iuml"] = "\u00cf"
em["Prime"] = "\u2033"
em["Chi"] = "\u03a7"
em["ge"] = "\u2265"
em["reg"] = "\u00ae"
em["hearts"] = "\u2665"
em["auml"] = "\u00e4"
em["Agrave"] = "\u00c0"
em["sect"] = "\u00a7"
em["sube"] = "\u2286"
em["sigmaf"] = "\u03c2"
em["Gamma"] = "\u0393"
em["amp"] = "\u0026"
em["ensp"] = "\u2002"
em["ETH"] = "\u00d0"
em["Igrave"] = "\u00cc"
em["Omega"] = "\u03a9"
em["Lambda"] = "\u039b"
em["Omicron"] = "\u039f"
em["there4"] = "\u2234"
em["ntilde"] = "\u00f1"
em["xi"] = "\u03be"
em["dagger"] = "\u2020"
em["egrave"] = "\u00e8"
em["Delta"] = "\u0394"
em["OElig"] = "\u0152"
em["diams"] = "\u2666"
em["ldquo"] = "\u201c"
em["radic"] = "\u221a"
em["Oslash"] = "\u00d8"
em["Ouml"] = "\u00d6"
em["lceil"] = "\u2308"
em["uarr"] = "\u2191"
em["atilde"] = "\u00e3"
em["iquest"] = "\u00bf"
em["lsaquo"] = "\u2039"
em["Epsilon"] = "\u0395"
em["iacute"] = "\u00ed"
em["cap"] = "\u2229"
em["deg"] = "\u00b0"
em["Otilde"] = "\u00d5"
em["zeta"] = "\u03b6"
em["ocirc"] = "\u00f4"
em["scaron"] = "\u0161"
em["ecirc"] = "\u00ea"
em["ordm"] = "\u00ba"
em["tau"] = "\u03c4"
em["Auml"] = "\u00c4"
em["dArr"] = "\u21d3"
em["ordf"] = "\u00aa"
em["alefsym"] = "\u2135"
em["notin"] = "\u2209"
em["Pi"] = "\u03a0"
em["sdot"] = "\u22c5"
em["upsilon"] = "\u03c5"
em["iota"] = "\u03b9"
em["hArr"] = "\u21d4"
em["Sigma"] = "\u03a3"
em["lang"] = "\u2329"
em["curren"] = "\u00a4"
em["Theta"] = "\u0398"
em["lArr"] = "\u21d0"
em["Phi"] = "\u03a6"
em["Nu"] = "\u039d"
em["rho"] = "\u03c1"
em["alpha"] = "\u03b1"
em["iexcl"] = "\u00a1"
em["micro"] = "\u00b5"
em["cedil"] = "\u00b8"
em["Ntilde"] = "\u00d1"
em["Psi"] = "\u03a8"
em["Dagger"] = "\u2021"
em["Egrave"] = "\u00c8"
em["Icirc"] = "\u00ce"
em["nsub"] = "\u2284"
em["bdquo"] = "\u201e"
em["empty"] = "\u2205"
em["aelig"] = "\u00e6"
em["ograve"] = "\u00f2"
em["macr"] = "\u00af"
em["Zeta"] = "\u0396"
em["beta"] = "\u03b2"
em["sim"] = "\u223c"
em["uuml"] = "\u00fc"
em["Aacute"] = "\u00c1"
em["Iacute"] = "\u00cd"
em["exist"] = "\u2203"
em["prime"] = "\u2032"
em["rceil"] = "\u2309"
em["real"] = "\u211c"
em["zwnj"] = "\u200c"
em["bull"] = "\u2022"
em["quot"] = "\u0022"
em["Scaron"] = "\u0160"
em["ugrave"] = "\u00f9"
}

View File

@ -3,15 +3,16 @@ package xmlx
import "os"
import "strings"
import "xml"
import "bytes"
import "fmt"
import "strconv"
const (
NT_ROOT = 0x00
NT_DIRECTIVE = 0x01
NT_PROCINST = 0x02
NT_COMMENT = 0x03
NT_ELEMENT = 0x04
NT_ROOT = iota
NT_DIRECTIVE
NT_PROCINST
NT_COMMENT
NT_ELEMENT
)
type Attr struct {
@ -23,13 +24,19 @@ type Node struct {
Type byte
Name xml.Name
Children []*Node
Attributes []Attr
Attributes []*Attr
Parent *Node
Value string
Target string // procinst field
}
func NewNode(tid byte) *Node { return &Node{Type: tid} }
func NewNode(tid byte) *Node {
n := new(Node)
n.Type = tid
n.Children = make([]*Node, 0, 10)
n.Attributes = make([]*Attr, 0, 10)
return n
}
// This wraps the standard xml.Unmarshal function and supplies this particular
// node as the content to be unmarshalled.
@ -49,10 +56,7 @@ func (this *Node) GetValue(namespace, name string) string {
// Get node value as int
func (this *Node) GetValuei(namespace, name string) int {
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
if node == nil || node.Value == "" {
return 0
}
n, _ := strconv.Atoi(node.Value)
@ -62,10 +66,7 @@ func (this *Node) GetValuei(namespace, name string) int {
// Get node value as int64
func (this *Node) GetValuei64(namespace, name string) int64 {
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
if node == nil || node.Value == "" {
return 0
}
n, _ := strconv.Atoi64(node.Value)
@ -75,10 +76,7 @@ func (this *Node) GetValuei64(namespace, name string) int64 {
// Get node value as uint
func (this *Node) GetValueui(namespace, name string) uint {
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
if node == nil || node.Value == "" {
return 0
}
n, _ := strconv.Atoui(node.Value)
@ -88,10 +86,7 @@ func (this *Node) GetValueui(namespace, name string) uint {
// Get node value as uint64
func (this *Node) GetValueui64(namespace, name string) uint64 {
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
if node == nil || node.Value == "" {
return 0
}
n, _ := strconv.Atoui64(node.Value)
@ -101,10 +96,7 @@ func (this *Node) GetValueui64(namespace, name string) uint64 {
// Get node value as float
func (this *Node) GetValuef(namespace, name string) float {
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
if node == nil || node.Value == "" {
return 0
}
n, _ := strconv.Atof(node.Value)
@ -114,10 +106,7 @@ func (this *Node) GetValuef(namespace, name string) float {
// Get node value as float32
func (this *Node) GetValuef32(namespace, name string) float32 {
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
if node == nil || node.Value == "" {
return 0
}
n, _ := strconv.Atof32(node.Value)
@ -127,10 +116,7 @@ func (this *Node) GetValuef32(namespace, name string) float32 {
// Get node value as float64
func (this *Node) GetValuef64(namespace, name string) float64 {
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
if node == nil || node.Value == "" {
return 0
}
n, _ := strconv.Atof64(node.Value)
@ -237,9 +223,9 @@ func rec_SelectNode(cn *Node, namespace, name string) *Node {
return cn
}
var tn *Node
for _, v := range cn.Children {
tn := rec_SelectNode(v, namespace, name)
if tn != nil {
if tn = rec_SelectNode(v, namespace, name); tn != nil {
return tn
}
}
@ -248,17 +234,21 @@ func rec_SelectNode(cn *Node, namespace, name string) *Node {
// Select multiple nodes by name
func (this *Node) SelectNodes(namespace, name string) []*Node {
list := make([]*Node, 0)
list := make([]*Node, 0, 16)
rec_SelectNodes(this, namespace, name, &list)
return list
}
func rec_SelectNodes(cn *Node, namespace, name string, list *[]*Node) {
if cn.Name.Space == namespace && cn.Name.Local == name {
c := make([]*Node, len(*list)+1)
copy(c, *list)
c[len(c)-1] = cn
*list = c
l := len(*list)
if l >= cap(*list) {
c := make([]*Node, l, l+16)
copy(c, *list)
*list = c
}
*list = (*list)[0 : l+1]
(*list)[l] = cn
return
}
@ -288,60 +278,73 @@ func (this *Node) String() (s string) {
}
func (this *Node) printRoot() (s string) {
var data []byte
buf := bytes.NewBuffer(data)
for _, v := range this.Children {
s += v.String()
buf.WriteString(v.String())
}
return
return buf.String()
}
func (this *Node) printProcInst() (s string) {
s = "<?" + this.Target + " " + this.Value + "?>"
return
func (this *Node) printProcInst() string {
return "<?" + this.Target + " " + this.Value + "?>"
}
func (this *Node) printComment() (s string) {
s = "<!-- " + this.Value + " -->"
return
func (this *Node) printComment() string {
return "<!-- " + this.Value + " -->"
}
func (this *Node) printDirective() (s string) {
s = "<!" + this.Value + "!>"
return
func (this *Node) printDirective() string {
return "<!" + this.Value + "!>"
}
func (this *Node) printElement() (s string) {
func (this *Node) printElement() string {
var data []byte
buf := bytes.NewBuffer(data)
if len(this.Name.Space) > 0 {
s = "<" + this.Name.Space + ":" + this.Name.Local
buf.WriteRune('<')
buf.WriteString(this.Name.Space)
buf.WriteRune(':')
buf.WriteString(this.Name.Local)
} else {
s = "<" + this.Name.Local
buf.WriteRune('<')
buf.WriteString(this.Name.Local)
}
for _, v := range this.Attributes {
if len(v.Name.Space) > 0 {
s += fmt.Sprintf(` %s:%s="%s"`, v.Name.Space, v.Name.Local, v.Value)
buf.WriteString(fmt.Sprintf(` %s:%s="%s"`, v.Name.Space, v.Name.Local, v.Value))
} else {
s += fmt.Sprintf(` %s="%s"`, v.Name.Local, v.Value)
buf.WriteString(fmt.Sprintf(` %s="%s"`, v.Name.Local, v.Value))
}
}
if len(this.Children) == 0 && len(this.Value) == 0 {
s += " />"
return
buf.WriteString(" />")
return buf.String()
}
s += ">"
buf.WriteRune('>')
for _, v := range this.Children {
s += v.String()
buf.WriteString(v.String())
}
s += this.Value
buf.WriteString(this.Value)
if len(this.Name.Space) > 0 {
s += "</" + this.Name.Space + ":" + this.Name.Local + ">"
buf.WriteString("</")
buf.WriteString(this.Name.Space)
buf.WriteRune(':')
buf.WriteString(this.Name.Local)
buf.WriteRune('>')
} else {
s += "</" + this.Name.Local + ">"
buf.WriteString("</")
buf.WriteString(this.Name.Local)
buf.WriteRune('>')
}
return
return buf.String()
}
// Add a child node
@ -351,10 +354,15 @@ func (this *Node) AddChild(t *Node) {
}
t.Parent = this
c := make([]*Node, len(this.Children)+1)
copy(c, this.Children)
c[len(c)-1] = t
this.Children = c
l := len(this.Children)
if l >= cap(this.Children) {
c := make([]*Node, l, l+10)
copy(c, this.Children)
this.Children = c
}
this.Children = this.Children[0 : l+1]
this.Children[l] = t
}
// Remove a child node
@ -371,10 +379,8 @@ func (this *Node) RemoveChild(t *Node) {
return
}
c := make([]*Node, len(this.Children)-1)
copy(c, this.Children[0:p])
copy(c[p:], this.Children[p+1:])
this.Children = c
copy(this.Children[p:], this.Children[p+1:])
this.Children = this.Children[0 : len(this.Children)-1]
t.Parent = nil
}

1
xmlx/test1.xml Normal file
View File

@ -0,0 +1 @@
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="0.91"><channel><title>WriteTheWeb</title><link>http://writetheweb.com</link><description>News for web users that write back</description><language>en-us</language><copyright>Copyright 2000, WriteTheWeb team.</copyright><managingEditor>editor@writetheweb.com</managingEditor><webMaster>webmaster@writetheweb.com</webMaster><image><title>WriteTheWeb</title><url>http://writetheweb.com/images/mynetscape88.gif</url><link>http://writetheweb.com</link><width>88</width><height>31</height><description>News for web users that write back</description></image><item><title>Giving the world a pluggable Gnutella</title><link>http://writetheweb.com/read.php?item=24</link><description>WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.</description></item><item><title>Syndication discussions hot up</title><link>http://writetheweb.com/read.php?item=23</link><description>After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication.</description></item><item><title>Personal web server integrates file sharing and messaging</title><link>http://writetheweb.com/read.php?item=22</link><description>The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices.</description></item><item><title>Syndication and Metadata</title><link>http://writetheweb.com/read.php?item=21</link><description>RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF.</description></item><item><title>UK bloggers get organised</title><link>http://writetheweb.com/read.php?item=20</link><description>Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups.</description></item><item><title>Yournamehere.com more important than anything</title><link>http://writetheweb.com/read.php?item=19</link><description>Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman.</description></item></channel></rss>

View File

@ -16,7 +16,7 @@ func TestLoadLocal(t *testing.T) {
}
}
func TestLoadRemote(t *testing.T) {
func _TestLoadRemote(t *testing.T) {
doc := New()
if err := doc.LoadUri("http://www.w3schools.com/xml/plant_catalog.xml"); err != nil {
@ -89,8 +89,7 @@ func TestUnmarshal(t *testing.T) {
}
img := Image{}
err = node.Unmarshal(&img)
if err != nil {
if err = node.Unmarshal(&img); err != nil {
t.Errorf("Unmarshal(): %s", err)
return
}