From 6d8b8289d8ba80578e33c04d132845dba6f527a9 Mon Sep 17 00:00:00 2001 From: jim teeuwen Date: Sun, 22 Aug 2010 05:07:38 +0200 Subject: [PATCH] Refactored some code to be faster and a little more elegant. --- makefile | 12 ++ src/entitymap.go | 311 ------------------------------------- {src => xmlx}/Makefile | 2 - {src => xmlx}/document.go | 62 ++++---- xmlx/entitymap.go | 311 +++++++++++++++++++++++++++++++++++++ {src => xmlx}/io.go | 0 {src => xmlx}/node.go | 154 +++++++++--------- {src => xmlx}/test.xml | 0 xmlx/test1.xml | 1 + {src => xmlx}/xmlx_test.go | 5 +- 10 files changed, 438 insertions(+), 420 deletions(-) create mode 100644 makefile delete mode 100644 src/entitymap.go rename {src => xmlx}/Makefile (98%) rename {src => xmlx}/document.go (83%) create mode 100644 xmlx/entitymap.go rename {src => xmlx}/io.go (100%) rename {src => xmlx}/node.go (73%) rename {src => xmlx}/test.xml (100%) create mode 100644 xmlx/test1.xml rename {src => xmlx}/xmlx_test.go (95%) diff --git a/makefile b/makefile new file mode 100644 index 0000000..f1578b2 --- /dev/null +++ b/makefile @@ -0,0 +1,12 @@ + +all: + make -C xmlx install + +test: + make -C xmlx test + +clean: + make -C xmlx clean + +format: + gofmt -w . diff --git a/src/entitymap.go b/src/entitymap.go deleted file mode 100644 index 14aef1f..0000000 --- a/src/entitymap.go +++ /dev/null @@ -1,311 +0,0 @@ -package xmlx - -import "fmt" -import "utf8" -import "regexp" -import "strconv" - -var reg_entity = regexp.MustCompile("^&#[0-9]+;$") - -// Converts a single numerical html entity to a regular Go utf-token. -// ex: "♣" -> "♣" -func HtmlToUTF8(entity string) string { - // Make sure we have a valid entity: { - ok := reg_entity.MatchString(entity) - if !ok { - return "" - } - - // Convert entity to number - num, err := strconv.Atoi(entity[2 : len(entity)-1]) - if err != nil { - return "" - } - - var arr [3]byte - size := utf8.EncodeRune(num, &arr) - if size == 0 { - return "" - } - - return string(&arr) -} - -// Converts a single Go utf-token to it's an Html entity. -// ex: "♣" -> "♣" -func UTF8ToHtml(token string) string { - rune, size := utf8.DecodeRuneInString(token) - if size == 0 { - return "" - } - return fmt.Sprintf("&#%d;", rune) -} - -/* - http://www.w3.org/TR/html4/sgml/entities.html - - Portions © International Organization for Standardization 1986 - Permission to copy in any form is granted for use with - conforming SGML systems and applications as defined in - ISO 8879, provided this notice is included in all copies. - - Fills the supplied map with html entities mapped to their Go utf8 - equivalents. This map can be assigned to xml.Parser.Entity - It will be used to map non-standard xml entities to a proper value. - If the parser encounters any unknown entities, it will throw a syntax - error and abort the parsing. Hence the ability to supply this map. -*/ -func loadNonStandardEntities(em *map[string]string) { - (*em)["pi"] = "\u03c0" - (*em)["nabla"] = "\u2207" - (*em)["isin"] = "\u2208" - (*em)["loz"] = "\u25ca" - (*em)["prop"] = "\u221d" - (*em)["para"] = "\u00b6" - (*em)["Aring"] = "\u00c5" - (*em)["euro"] = "\u20ac" - (*em)["sup3"] = "\u00b3" - (*em)["sup2"] = "\u00b2" - (*em)["sup1"] = "\u00b9" - (*em)["prod"] = "\u220f" - (*em)["gamma"] = "\u03b3" - (*em)["perp"] = "\u22a5" - (*em)["lfloor"] = "\u230a" - (*em)["fnof"] = "\u0192" - (*em)["frasl"] = "\u2044" - (*em)["rlm"] = "\u200f" - (*em)["omega"] = "\u03c9" - (*em)["part"] = "\u2202" - (*em)["euml"] = "\u00eb" - (*em)["Kappa"] = "\u039a" - (*em)["nbsp"] = "\u00a0" - (*em)["Eacute"] = "\u00c9" - (*em)["brvbar"] = "\u00a6" - (*em)["otimes"] = "\u2297" - (*em)["ndash"] = "\u2013" - (*em)["thinsp"] = "\u2009" - (*em)["nu"] = "\u03bd" - (*em)["Upsilon"] = "\u03a5" - (*em)["upsih"] = "\u03d2" - (*em)["raquo"] = "\u00bb" - (*em)["yacute"] = "\u00fd" - (*em)["delta"] = "\u03b4" - (*em)["eth"] = "\u00f0" - (*em)["supe"] = "\u2287" - (*em)["ne"] = "\u2260" - (*em)["ni"] = "\u220b" - (*em)["eta"] = "\u03b7" - (*em)["uArr"] = "\u21d1" - (*em)["image"] = "\u2111" - (*em)["asymp"] = "\u2248" - (*em)["oacute"] = "\u00f3" - (*em)["rarr"] = "\u2192" - (*em)["emsp"] = "\u2003" - (*em)["acirc"] = "\u00e2" - (*em)["shy"] = "\u00ad" - (*em)["yuml"] = "\u00ff" - (*em)["acute"] = "\u00b4" - (*em)["int"] = "\u222b" - (*em)["ccedil"] = "\u00e7" - (*em)["Acirc"] = "\u00c2" - (*em)["Ograve"] = "\u00d2" - (*em)["times"] = "\u00d7" - (*em)["weierp"] = "\u2118" - (*em)["Tau"] = "\u03a4" - (*em)["omicron"] = "\u03bf" - (*em)["lt"] = "\u003c" - (*em)["Mu"] = "\u039c" - (*em)["Ucirc"] = "\u00db" - (*em)["sub"] = "\u2282" - (*em)["le"] = "\u2264" - (*em)["sum"] = "\u2211" - (*em)["sup"] = "\u2283" - (*em)["lrm"] = "\u200e" - (*em)["frac34"] = "\u00be" - (*em)["Iota"] = "\u0399" - (*em)["Ugrave"] = "\u00d9" - (*em)["THORN"] = "\u00de" - (*em)["rsaquo"] = "\u203a" - (*em)["not"] = "\u00ac" - (*em)["sigma"] = "\u03c3" - (*em)["iuml"] = "\u00ef" - (*em)["epsilon"] = "\u03b5" - (*em)["spades"] = "\u2660" - (*em)["theta"] = "\u03b8" - (*em)["divide"] = "\u00f7" - (*em)["Atilde"] = "\u00c3" - (*em)["uacute"] = "\u00fa" - (*em)["Rho"] = "\u03a1" - (*em)["trade"] = "\u2122" - (*em)["chi"] = "\u03c7" - (*em)["agrave"] = "\u00e0" - (*em)["or"] = "\u2228" - (*em)["circ"] = "\u02c6" - (*em)["middot"] = "\u00b7" - (*em)["plusmn"] = "\u00b1" - (*em)["aring"] = "\u00e5" - (*em)["lsquo"] = "\u2018" - (*em)["Yacute"] = "\u00dd" - (*em)["oline"] = "\u203e" - (*em)["copy"] = "\u00a9" - (*em)["icirc"] = "\u00ee" - (*em)["lowast"] = "\u2217" - (*em)["Oacute"] = "\u00d3" - (*em)["aacute"] = "\u00e1" - (*em)["oplus"] = "\u2295" - (*em)["crarr"] = "\u21b5" - (*em)["thetasym"] = "\u03d1" - (*em)["Beta"] = "\u0392" - (*em)["laquo"] = "\u00ab" - (*em)["rang"] = "\u232a" - (*em)["tilde"] = "\u02dc" - (*em)["Uuml"] = "\u00dc" - (*em)["zwj"] = "\u200d" - (*em)["mu"] = "\u03bc" - (*em)["Ccedil"] = "\u00c7" - (*em)["infin"] = "\u221e" - (*em)["ouml"] = "\u00f6" - (*em)["rfloor"] = "\u230b" - (*em)["pound"] = "\u00a3" - (*em)["szlig"] = "\u00df" - (*em)["thorn"] = "\u00fe" - (*em)["forall"] = "\u2200" - (*em)["piv"] = "\u03d6" - (*em)["rdquo"] = "\u201d" - (*em)["frac12"] = "\u00bd" - (*em)["frac14"] = "\u00bc" - (*em)["Ocirc"] = "\u00d4" - (*em)["Ecirc"] = "\u00ca" - (*em)["kappa"] = "\u03ba" - (*em)["Euml"] = "\u00cb" - (*em)["minus"] = "\u2212" - (*em)["cong"] = "\u2245" - (*em)["hellip"] = "\u2026" - (*em)["equiv"] = "\u2261" - (*em)["cent"] = "\u00a2" - (*em)["Uacute"] = "\u00da" - (*em)["darr"] = "\u2193" - (*em)["Eta"] = "\u0397" - (*em)["sbquo"] = "\u201a" - (*em)["rArr"] = "\u21d2" - (*em)["igrave"] = "\u00ec" - (*em)["uml"] = "\u00a8" - (*em)["lambda"] = "\u03bb" - (*em)["oelig"] = "\u0153" - (*em)["harr"] = "\u2194" - (*em)["ang"] = "\u2220" - (*em)["clubs"] = "\u2663" - (*em)["and"] = "\u2227" - (*em)["permil"] = "\u2030" - (*em)["larr"] = "\u2190" - (*em)["Yuml"] = "\u0178" - (*em)["cup"] = "\u222a" - (*em)["Xi"] = "\u039e" - (*em)["Alpha"] = "\u0391" - (*em)["phi"] = "\u03c6" - (*em)["ucirc"] = "\u00fb" - (*em)["oslash"] = "\u00f8" - (*em)["rsquo"] = "\u2019" - (*em)["AElig"] = "\u00c6" - (*em)["mdash"] = "\u2014" - (*em)["psi"] = "\u03c8" - (*em)["eacute"] = "\u00e9" - (*em)["otilde"] = "\u00f5" - (*em)["yen"] = "\u00a5" - (*em)["gt"] = "\u003e" - (*em)["Iuml"] = "\u00cf" - (*em)["Prime"] = "\u2033" - (*em)["Chi"] = "\u03a7" - (*em)["ge"] = "\u2265" - (*em)["reg"] = "\u00ae" - (*em)["hearts"] = "\u2665" - (*em)["auml"] = "\u00e4" - (*em)["Agrave"] = "\u00c0" - (*em)["sect"] = "\u00a7" - (*em)["sube"] = "\u2286" - (*em)["sigmaf"] = "\u03c2" - (*em)["Gamma"] = "\u0393" - (*em)["amp"] = "\u0026" - (*em)["ensp"] = "\u2002" - (*em)["ETH"] = "\u00d0" - (*em)["Igrave"] = "\u00cc" - (*em)["Omega"] = "\u03a9" - (*em)["Lambda"] = "\u039b" - (*em)["Omicron"] = "\u039f" - (*em)["there4"] = "\u2234" - (*em)["ntilde"] = "\u00f1" - (*em)["xi"] = "\u03be" - (*em)["dagger"] = "\u2020" - (*em)["egrave"] = "\u00e8" - (*em)["Delta"] = "\u0394" - (*em)["OElig"] = "\u0152" - (*em)["diams"] = "\u2666" - (*em)["ldquo"] = "\u201c" - (*em)["radic"] = "\u221a" - (*em)["Oslash"] = "\u00d8" - (*em)["Ouml"] = "\u00d6" - (*em)["lceil"] = "\u2308" - (*em)["uarr"] = "\u2191" - (*em)["atilde"] = "\u00e3" - (*em)["iquest"] = "\u00bf" - (*em)["lsaquo"] = "\u2039" - (*em)["Epsilon"] = "\u0395" - (*em)["iacute"] = "\u00ed" - (*em)["cap"] = "\u2229" - (*em)["deg"] = "\u00b0" - (*em)["Otilde"] = "\u00d5" - (*em)["zeta"] = "\u03b6" - (*em)["ocirc"] = "\u00f4" - (*em)["scaron"] = "\u0161" - (*em)["ecirc"] = "\u00ea" - (*em)["ordm"] = "\u00ba" - (*em)["tau"] = "\u03c4" - (*em)["Auml"] = "\u00c4" - (*em)["dArr"] = "\u21d3" - (*em)["ordf"] = "\u00aa" - (*em)["alefsym"] = "\u2135" - (*em)["notin"] = "\u2209" - (*em)["Pi"] = "\u03a0" - (*em)["sdot"] = "\u22c5" - (*em)["upsilon"] = "\u03c5" - (*em)["iota"] = "\u03b9" - (*em)["hArr"] = "\u21d4" - (*em)["Sigma"] = "\u03a3" - (*em)["lang"] = "\u2329" - (*em)["curren"] = "\u00a4" - (*em)["Theta"] = "\u0398" - (*em)["lArr"] = "\u21d0" - (*em)["Phi"] = "\u03a6" - (*em)["Nu"] = "\u039d" - (*em)["rho"] = "\u03c1" - (*em)["alpha"] = "\u03b1" - (*em)["iexcl"] = "\u00a1" - (*em)["micro"] = "\u00b5" - (*em)["cedil"] = "\u00b8" - (*em)["Ntilde"] = "\u00d1" - (*em)["Psi"] = "\u03a8" - (*em)["Dagger"] = "\u2021" - (*em)["Egrave"] = "\u00c8" - (*em)["Icirc"] = "\u00ce" - (*em)["nsub"] = "\u2284" - (*em)["bdquo"] = "\u201e" - (*em)["empty"] = "\u2205" - (*em)["aelig"] = "\u00e6" - (*em)["ograve"] = "\u00f2" - (*em)["macr"] = "\u00af" - (*em)["Zeta"] = "\u0396" - (*em)["beta"] = "\u03b2" - (*em)["sim"] = "\u223c" - (*em)["uuml"] = "\u00fc" - (*em)["Aacute"] = "\u00c1" - (*em)["Iacute"] = "\u00cd" - (*em)["exist"] = "\u2203" - (*em)["prime"] = "\u2032" - (*em)["rceil"] = "\u2309" - (*em)["real"] = "\u211c" - (*em)["zwnj"] = "\u200c" - (*em)["bull"] = "\u2022" - (*em)["quot"] = "\u0022" - (*em)["Scaron"] = "\u0160" - (*em)["ugrave"] = "\u00f9" -} diff --git a/src/Makefile b/xmlx/Makefile similarity index 98% rename from src/Makefile rename to xmlx/Makefile index 6d841a3..a9284c6 100644 --- a/src/Makefile +++ b/xmlx/Makefile @@ -1,8 +1,6 @@ - include $(GOROOT)/src/Make.$(GOARCH) TARG=xmlx GOFILES=document.go node.go io.go entitymap.go\ - include $(GOROOT)/src/Make.pkg diff --git a/src/document.go b/xmlx/document.go similarity index 83% rename from src/document.go rename to xmlx/document.go index cf89829..c52d988 100644 --- a/src/document.go +++ b/xmlx/document.go @@ -29,6 +29,7 @@ package xmlx import "os" import "io" +import "bytes" import "io/ioutil" import "path" import "strings" @@ -62,7 +63,7 @@ func New() *Document { // set only those entities needed manually using the document.Entity map, but // if need be, this method can be called to fill the map with the entire set // defined on http://www.w3.org/TR/html4/sgml/entities.html -func (this *Document) LoadExtendedEntityMap() { loadNonStandardEntities(&this.Entity) } +func (this *Document) LoadExtendedEntityMap() { loadNonStandardEntities(this.Entity) } func (this *Document) String() string { s, _ := this.SaveString() @@ -88,6 +89,11 @@ func (this *Document) LoadString(s string) (err os.Error) { ct := this.Root var tok xml.Token + var t *Node + var i int + var doctype string + var v xml.Attr + for { if tok, err = xp.Token(); err != nil { if err == os.EOF { @@ -104,20 +110,21 @@ func (this *Document) LoadString(s string) (err os.Error) { case xml.SyntaxError: return os.NewError(tt.String()) case xml.CharData: - ct.Value = strings.TrimSpace(string(tt)) + ct.Value = strings.TrimSpace(string([]byte(tt))) case xml.Comment: t := NewNode(NT_COMMENT) - t.Value = strings.TrimSpace(string(tt)) + t.Value = strings.TrimSpace(string([]byte(tt))) ct.AddChild(t) case xml.Directive: - t := NewNode(NT_DIRECTIVE) - t.Value = strings.TrimSpace(string(tt)) + t = NewNode(NT_DIRECTIVE) + t.Value = strings.TrimSpace(string([]byte(tt))) ct.AddChild(t) case xml.StartElement: - t := NewNode(NT_ELEMENT) + t = NewNode(NT_ELEMENT) t.Name = tt.Name - t.Attributes = make([]Attr, len(tt.Attr)) - for i, v := range tt.Attr { + t.Attributes = make([]*Attr, len(tt.Attr)) + for i, v = range tt.Attr { + t.Attributes[i] = new(Attr) t.Attributes[i].Name = v.Name t.Attributes[i].Value = v.Value } @@ -125,15 +132,14 @@ func (this *Document) LoadString(s string) (err os.Error) { ct = t case xml.ProcInst: if tt.Target == "xml" { // xml doctype - doctype := strings.TrimSpace(string(tt.Inst)) - pos := strings.Index(doctype, `standalone="`) - if pos > -1 { - this.StandAlone = doctype[pos+len(`standalone="`) : len(doctype)] - pos = strings.Index(this.StandAlone, `"`) - this.StandAlone = this.StandAlone[0:pos] + doctype = strings.TrimSpace(string(tt.Inst)) + if i = strings.Index(doctype, `standalone="`); i > -1 { + this.StandAlone = doctype[i+len(`standalone="`) : len(doctype)] + i = strings.Index(this.StandAlone, `"`) + this.StandAlone = this.StandAlone[0:i] } } else { - t := NewNode(NT_PROCINST) + t = NewNode(NT_PROCINST) t.Target = strings.TrimSpace(tt.Target) t.Value = strings.TrimSpace(string(tt.Inst)) ct.AddChild(t) @@ -176,17 +182,20 @@ func (this *Document) LoadUri(uri string) (err os.Error) { } func (this *Document) LoadStream(r *io.Reader) (err os.Error) { - content := "" - buff := make([]byte, 256) + var data []byte + + t := bytes.NewBuffer(data) + s := make([]byte, 1024) + for { - _, err := r.Read(buff) + _, err := r.Read(s) if err != nil { break } - content += string(buff) + t.Write(s) } - err = this.LoadString(content) + err = this.LoadString(t.String()) return } @@ -194,19 +203,12 @@ func (this *Document) LoadStream(r *io.Reader) (err os.Error) { // *** Satisfy ISaver interface // ***************************************************************************** func (this *Document) SaveFile(path string) (err os.Error) { - file, err := os.Open(path, os.O_WRONLY|os.O_CREAT, 0600) - if err != nil { - return - } - defer file.Close() - - content, err := this.SaveString() - if err != nil { + var data string + if data, err = this.SaveString(); err != nil { return } - file.Write([]byte(content)) - return + return ioutil.WriteFile(path, []byte(data), 0600) } func (this *Document) SaveString() (s string, err os.Error) { diff --git a/xmlx/entitymap.go b/xmlx/entitymap.go new file mode 100644 index 0000000..292f436 --- /dev/null +++ b/xmlx/entitymap.go @@ -0,0 +1,311 @@ +package xmlx + +import "fmt" +import "utf8" +import "regexp" +import "strconv" + +var reg_entity = regexp.MustCompile("^&#[0-9]+;$") + +// Converts a single numerical html entity to a regular Go utf-token. +// ex: "♣" -> "♣" +func HtmlToUTF8(entity string) string { + // Make sure we have a valid entity: { + ok := reg_entity.MatchString(entity) + if !ok { + return "" + } + + // Convert entity to number + num, err := strconv.Atoi(entity[2 : len(entity)-1]) + if err != nil { + return "" + } + + var arr []byte + size := utf8.EncodeRune(num, arr) + if size == 0 { + return "" + } + + return string(arr) +} + +// Converts a single Go utf-token to it's an Html entity. +// ex: "♣" -> "♣" +func UTF8ToHtml(token string) string { + rune, size := utf8.DecodeRuneInString(token) + if size == 0 { + return "" + } + return fmt.Sprintf("&#%d;", rune) +} + +/* + http://www.w3.org/TR/html4/sgml/entities.html + + Portions © International Organization for Standardization 1986 + Permission to copy in any form is granted for use with + conforming SGML systems and applications as defined in + ISO 8879, provided this notice is included in all copies. + + Fills the supplied map with html entities mapped to their Go utf8 + equivalents. This map can be assigned to xml.Parser.Entity + It will be used to map non-standard xml entities to a proper value. + If the parser encounters any unknown entities, it will throw a syntax + error and abort the parsing. Hence the ability to supply this map. +*/ +func loadNonStandardEntities(em map[string]string) { + em["pi"] = "\u03c0" + em["nabla"] = "\u2207" + em["isin"] = "\u2208" + em["loz"] = "\u25ca" + em["prop"] = "\u221d" + em["para"] = "\u00b6" + em["Aring"] = "\u00c5" + em["euro"] = "\u20ac" + em["sup3"] = "\u00b3" + em["sup2"] = "\u00b2" + em["sup1"] = "\u00b9" + em["prod"] = "\u220f" + em["gamma"] = "\u03b3" + em["perp"] = "\u22a5" + em["lfloor"] = "\u230a" + em["fnof"] = "\u0192" + em["frasl"] = "\u2044" + em["rlm"] = "\u200f" + em["omega"] = "\u03c9" + em["part"] = "\u2202" + em["euml"] = "\u00eb" + em["Kappa"] = "\u039a" + em["nbsp"] = "\u00a0" + em["Eacute"] = "\u00c9" + em["brvbar"] = "\u00a6" + em["otimes"] = "\u2297" + em["ndash"] = "\u2013" + em["thinsp"] = "\u2009" + em["nu"] = "\u03bd" + em["Upsilon"] = "\u03a5" + em["upsih"] = "\u03d2" + em["raquo"] = "\u00bb" + em["yacute"] = "\u00fd" + em["delta"] = "\u03b4" + em["eth"] = "\u00f0" + em["supe"] = "\u2287" + em["ne"] = "\u2260" + em["ni"] = "\u220b" + em["eta"] = "\u03b7" + em["uArr"] = "\u21d1" + em["image"] = "\u2111" + em["asymp"] = "\u2248" + em["oacute"] = "\u00f3" + em["rarr"] = "\u2192" + em["emsp"] = "\u2003" + em["acirc"] = "\u00e2" + em["shy"] = "\u00ad" + em["yuml"] = "\u00ff" + em["acute"] = "\u00b4" + em["int"] = "\u222b" + em["ccedil"] = "\u00e7" + em["Acirc"] = "\u00c2" + em["Ograve"] = "\u00d2" + em["times"] = "\u00d7" + em["weierp"] = "\u2118" + em["Tau"] = "\u03a4" + em["omicron"] = "\u03bf" + em["lt"] = "\u003c" + em["Mu"] = "\u039c" + em["Ucirc"] = "\u00db" + em["sub"] = "\u2282" + em["le"] = "\u2264" + em["sum"] = "\u2211" + em["sup"] = "\u2283" + em["lrm"] = "\u200e" + em["frac34"] = "\u00be" + em["Iota"] = "\u0399" + em["Ugrave"] = "\u00d9" + em["THORN"] = "\u00de" + em["rsaquo"] = "\u203a" + em["not"] = "\u00ac" + em["sigma"] = "\u03c3" + em["iuml"] = "\u00ef" + em["epsilon"] = "\u03b5" + em["spades"] = "\u2660" + em["theta"] = "\u03b8" + em["divide"] = "\u00f7" + em["Atilde"] = "\u00c3" + em["uacute"] = "\u00fa" + em["Rho"] = "\u03a1" + em["trade"] = "\u2122" + em["chi"] = "\u03c7" + em["agrave"] = "\u00e0" + em["or"] = "\u2228" + em["circ"] = "\u02c6" + em["middot"] = "\u00b7" + em["plusmn"] = "\u00b1" + em["aring"] = "\u00e5" + em["lsquo"] = "\u2018" + em["Yacute"] = "\u00dd" + em["oline"] = "\u203e" + em["copy"] = "\u00a9" + em["icirc"] = "\u00ee" + em["lowast"] = "\u2217" + em["Oacute"] = "\u00d3" + em["aacute"] = "\u00e1" + em["oplus"] = "\u2295" + em["crarr"] = "\u21b5" + em["thetasym"] = "\u03d1" + em["Beta"] = "\u0392" + em["laquo"] = "\u00ab" + em["rang"] = "\u232a" + em["tilde"] = "\u02dc" + em["Uuml"] = "\u00dc" + em["zwj"] = "\u200d" + em["mu"] = "\u03bc" + em["Ccedil"] = "\u00c7" + em["infin"] = "\u221e" + em["ouml"] = "\u00f6" + em["rfloor"] = "\u230b" + em["pound"] = "\u00a3" + em["szlig"] = "\u00df" + em["thorn"] = "\u00fe" + em["forall"] = "\u2200" + em["piv"] = "\u03d6" + em["rdquo"] = "\u201d" + em["frac12"] = "\u00bd" + em["frac14"] = "\u00bc" + em["Ocirc"] = "\u00d4" + em["Ecirc"] = "\u00ca" + em["kappa"] = "\u03ba" + em["Euml"] = "\u00cb" + em["minus"] = "\u2212" + em["cong"] = "\u2245" + em["hellip"] = "\u2026" + em["equiv"] = "\u2261" + em["cent"] = "\u00a2" + em["Uacute"] = "\u00da" + em["darr"] = "\u2193" + em["Eta"] = "\u0397" + em["sbquo"] = "\u201a" + em["rArr"] = "\u21d2" + em["igrave"] = "\u00ec" + em["uml"] = "\u00a8" + em["lambda"] = "\u03bb" + em["oelig"] = "\u0153" + em["harr"] = "\u2194" + em["ang"] = "\u2220" + em["clubs"] = "\u2663" + em["and"] = "\u2227" + em["permil"] = "\u2030" + em["larr"] = "\u2190" + em["Yuml"] = "\u0178" + em["cup"] = "\u222a" + em["Xi"] = "\u039e" + em["Alpha"] = "\u0391" + em["phi"] = "\u03c6" + em["ucirc"] = "\u00fb" + em["oslash"] = "\u00f8" + em["rsquo"] = "\u2019" + em["AElig"] = "\u00c6" + em["mdash"] = "\u2014" + em["psi"] = "\u03c8" + em["eacute"] = "\u00e9" + em["otilde"] = "\u00f5" + em["yen"] = "\u00a5" + em["gt"] = "\u003e" + em["Iuml"] = "\u00cf" + em["Prime"] = "\u2033" + em["Chi"] = "\u03a7" + em["ge"] = "\u2265" + em["reg"] = "\u00ae" + em["hearts"] = "\u2665" + em["auml"] = "\u00e4" + em["Agrave"] = "\u00c0" + em["sect"] = "\u00a7" + em["sube"] = "\u2286" + em["sigmaf"] = "\u03c2" + em["Gamma"] = "\u0393" + em["amp"] = "\u0026" + em["ensp"] = "\u2002" + em["ETH"] = "\u00d0" + em["Igrave"] = "\u00cc" + em["Omega"] = "\u03a9" + em["Lambda"] = "\u039b" + em["Omicron"] = "\u039f" + em["there4"] = "\u2234" + em["ntilde"] = "\u00f1" + em["xi"] = "\u03be" + em["dagger"] = "\u2020" + em["egrave"] = "\u00e8" + em["Delta"] = "\u0394" + em["OElig"] = "\u0152" + em["diams"] = "\u2666" + em["ldquo"] = "\u201c" + em["radic"] = "\u221a" + em["Oslash"] = "\u00d8" + em["Ouml"] = "\u00d6" + em["lceil"] = "\u2308" + em["uarr"] = "\u2191" + em["atilde"] = "\u00e3" + em["iquest"] = "\u00bf" + em["lsaquo"] = "\u2039" + em["Epsilon"] = "\u0395" + em["iacute"] = "\u00ed" + em["cap"] = "\u2229" + em["deg"] = "\u00b0" + em["Otilde"] = "\u00d5" + em["zeta"] = "\u03b6" + em["ocirc"] = "\u00f4" + em["scaron"] = "\u0161" + em["ecirc"] = "\u00ea" + em["ordm"] = "\u00ba" + em["tau"] = "\u03c4" + em["Auml"] = "\u00c4" + em["dArr"] = "\u21d3" + em["ordf"] = "\u00aa" + em["alefsym"] = "\u2135" + em["notin"] = "\u2209" + em["Pi"] = "\u03a0" + em["sdot"] = "\u22c5" + em["upsilon"] = "\u03c5" + em["iota"] = "\u03b9" + em["hArr"] = "\u21d4" + em["Sigma"] = "\u03a3" + em["lang"] = "\u2329" + em["curren"] = "\u00a4" + em["Theta"] = "\u0398" + em["lArr"] = "\u21d0" + em["Phi"] = "\u03a6" + em["Nu"] = "\u039d" + em["rho"] = "\u03c1" + em["alpha"] = "\u03b1" + em["iexcl"] = "\u00a1" + em["micro"] = "\u00b5" + em["cedil"] = "\u00b8" + em["Ntilde"] = "\u00d1" + em["Psi"] = "\u03a8" + em["Dagger"] = "\u2021" + em["Egrave"] = "\u00c8" + em["Icirc"] = "\u00ce" + em["nsub"] = "\u2284" + em["bdquo"] = "\u201e" + em["empty"] = "\u2205" + em["aelig"] = "\u00e6" + em["ograve"] = "\u00f2" + em["macr"] = "\u00af" + em["Zeta"] = "\u0396" + em["beta"] = "\u03b2" + em["sim"] = "\u223c" + em["uuml"] = "\u00fc" + em["Aacute"] = "\u00c1" + em["Iacute"] = "\u00cd" + em["exist"] = "\u2203" + em["prime"] = "\u2032" + em["rceil"] = "\u2309" + em["real"] = "\u211c" + em["zwnj"] = "\u200c" + em["bull"] = "\u2022" + em["quot"] = "\u0022" + em["Scaron"] = "\u0160" + em["ugrave"] = "\u00f9" +} diff --git a/src/io.go b/xmlx/io.go similarity index 100% rename from src/io.go rename to xmlx/io.go diff --git a/src/node.go b/xmlx/node.go similarity index 73% rename from src/node.go rename to xmlx/node.go index 7021ba7..895a3d1 100644 --- a/src/node.go +++ b/xmlx/node.go @@ -3,15 +3,16 @@ package xmlx import "os" import "strings" import "xml" +import "bytes" import "fmt" import "strconv" const ( - NT_ROOT = 0x00 - NT_DIRECTIVE = 0x01 - NT_PROCINST = 0x02 - NT_COMMENT = 0x03 - NT_ELEMENT = 0x04 + NT_ROOT = iota + NT_DIRECTIVE + NT_PROCINST + NT_COMMENT + NT_ELEMENT ) type Attr struct { @@ -23,13 +24,19 @@ type Node struct { Type byte Name xml.Name Children []*Node - Attributes []Attr + Attributes []*Attr Parent *Node Value string Target string // procinst field } -func NewNode(tid byte) *Node { return &Node{Type: tid} } +func NewNode(tid byte) *Node { + n := new(Node) + n.Type = tid + n.Children = make([]*Node, 0, 10) + n.Attributes = make([]*Attr, 0, 10) + return n +} // This wraps the standard xml.Unmarshal function and supplies this particular // node as the content to be unmarshalled. @@ -49,10 +56,7 @@ func (this *Node) GetValue(namespace, name string) string { // Get node value as int func (this *Node) GetValuei(namespace, name string) int { node := rec_SelectNode(this, namespace, name) - if node == nil { - return 0 - } - if node.Value == "" { + if node == nil || node.Value == "" { return 0 } n, _ := strconv.Atoi(node.Value) @@ -62,10 +66,7 @@ func (this *Node) GetValuei(namespace, name string) int { // Get node value as int64 func (this *Node) GetValuei64(namespace, name string) int64 { node := rec_SelectNode(this, namespace, name) - if node == nil { - return 0 - } - if node.Value == "" { + if node == nil || node.Value == "" { return 0 } n, _ := strconv.Atoi64(node.Value) @@ -75,10 +76,7 @@ func (this *Node) GetValuei64(namespace, name string) int64 { // Get node value as uint func (this *Node) GetValueui(namespace, name string) uint { node := rec_SelectNode(this, namespace, name) - if node == nil { - return 0 - } - if node.Value == "" { + if node == nil || node.Value == "" { return 0 } n, _ := strconv.Atoui(node.Value) @@ -88,10 +86,7 @@ func (this *Node) GetValueui(namespace, name string) uint { // Get node value as uint64 func (this *Node) GetValueui64(namespace, name string) uint64 { node := rec_SelectNode(this, namespace, name) - if node == nil { - return 0 - } - if node.Value == "" { + if node == nil || node.Value == "" { return 0 } n, _ := strconv.Atoui64(node.Value) @@ -101,10 +96,7 @@ func (this *Node) GetValueui64(namespace, name string) uint64 { // Get node value as float func (this *Node) GetValuef(namespace, name string) float { node := rec_SelectNode(this, namespace, name) - if node == nil { - return 0 - } - if node.Value == "" { + if node == nil || node.Value == "" { return 0 } n, _ := strconv.Atof(node.Value) @@ -114,10 +106,7 @@ func (this *Node) GetValuef(namespace, name string) float { // Get node value as float32 func (this *Node) GetValuef32(namespace, name string) float32 { node := rec_SelectNode(this, namespace, name) - if node == nil { - return 0 - } - if node.Value == "" { + if node == nil || node.Value == "" { return 0 } n, _ := strconv.Atof32(node.Value) @@ -127,10 +116,7 @@ func (this *Node) GetValuef32(namespace, name string) float32 { // Get node value as float64 func (this *Node) GetValuef64(namespace, name string) float64 { node := rec_SelectNode(this, namespace, name) - if node == nil { - return 0 - } - if node.Value == "" { + if node == nil || node.Value == "" { return 0 } n, _ := strconv.Atof64(node.Value) @@ -237,9 +223,9 @@ func rec_SelectNode(cn *Node, namespace, name string) *Node { return cn } + var tn *Node for _, v := range cn.Children { - tn := rec_SelectNode(v, namespace, name) - if tn != nil { + if tn = rec_SelectNode(v, namespace, name); tn != nil { return tn } } @@ -248,17 +234,21 @@ func rec_SelectNode(cn *Node, namespace, name string) *Node { // Select multiple nodes by name func (this *Node) SelectNodes(namespace, name string) []*Node { - list := make([]*Node, 0) + list := make([]*Node, 0, 16) rec_SelectNodes(this, namespace, name, &list) return list } func rec_SelectNodes(cn *Node, namespace, name string, list *[]*Node) { if cn.Name.Space == namespace && cn.Name.Local == name { - c := make([]*Node, len(*list)+1) - copy(c, *list) - c[len(c)-1] = cn - *list = c + l := len(*list) + if l >= cap(*list) { + c := make([]*Node, l, l+16) + copy(c, *list) + *list = c + } + *list = (*list)[0 : l+1] + (*list)[l] = cn return } @@ -288,60 +278,73 @@ func (this *Node) String() (s string) { } func (this *Node) printRoot() (s string) { + var data []byte + buf := bytes.NewBuffer(data) for _, v := range this.Children { - s += v.String() + buf.WriteString(v.String()) } - return + return buf.String() } -func (this *Node) printProcInst() (s string) { - s = "" - return +func (this *Node) printProcInst() string { + return "" } -func (this *Node) printComment() (s string) { - s = "" - return +func (this *Node) printComment() string { + return "" } -func (this *Node) printDirective() (s string) { - s = "" - return +func (this *Node) printDirective() string { + return "" } -func (this *Node) printElement() (s string) { +func (this *Node) printElement() string { + var data []byte + buf := bytes.NewBuffer(data) + if len(this.Name.Space) > 0 { - s = "<" + this.Name.Space + ":" + this.Name.Local + buf.WriteRune('<') + buf.WriteString(this.Name.Space) + buf.WriteRune(':') + buf.WriteString(this.Name.Local) } else { - s = "<" + this.Name.Local + buf.WriteRune('<') + buf.WriteString(this.Name.Local) } for _, v := range this.Attributes { if len(v.Name.Space) > 0 { - s += fmt.Sprintf(` %s:%s="%s"`, v.Name.Space, v.Name.Local, v.Value) + buf.WriteString(fmt.Sprintf(` %s:%s="%s"`, v.Name.Space, v.Name.Local, v.Value)) } else { - s += fmt.Sprintf(` %s="%s"`, v.Name.Local, v.Value) + buf.WriteString(fmt.Sprintf(` %s="%s"`, v.Name.Local, v.Value)) } } if len(this.Children) == 0 && len(this.Value) == 0 { - s += " />" - return + buf.WriteString(" />") + return buf.String() } - s += ">" + buf.WriteRune('>') for _, v := range this.Children { - s += v.String() + buf.WriteString(v.String()) } - s += this.Value + buf.WriteString(this.Value) if len(this.Name.Space) > 0 { - s += "" + buf.WriteString("') } else { - s += "" + buf.WriteString("') } - return + + return buf.String() } // Add a child node @@ -351,10 +354,15 @@ func (this *Node) AddChild(t *Node) { } t.Parent = this - c := make([]*Node, len(this.Children)+1) - copy(c, this.Children) - c[len(c)-1] = t - this.Children = c + l := len(this.Children) + if l >= cap(this.Children) { + c := make([]*Node, l, l+10) + copy(c, this.Children) + this.Children = c + } + + this.Children = this.Children[0 : l+1] + this.Children[l] = t } // Remove a child node @@ -371,10 +379,8 @@ func (this *Node) RemoveChild(t *Node) { return } - c := make([]*Node, len(this.Children)-1) - copy(c, this.Children[0:p]) - copy(c[p:], this.Children[p+1:]) - this.Children = c + copy(this.Children[p:], this.Children[p+1:]) + this.Children = this.Children[0 : len(this.Children)-1] t.Parent = nil } diff --git a/src/test.xml b/xmlx/test.xml similarity index 100% rename from src/test.xml rename to xmlx/test.xml diff --git a/xmlx/test1.xml b/xmlx/test1.xml new file mode 100644 index 0000000..db44efa --- /dev/null +++ b/xmlx/test1.xml @@ -0,0 +1 @@ +WriteTheWebhttp://writetheweb.comNews for web users that write backen-usCopyright 2000, WriteTheWeb team.editor@writetheweb.comwebmaster@writetheweb.comWriteTheWebhttp://writetheweb.com/images/mynetscape88.gifhttp://writetheweb.com8831News for web users that write backGiving the world a pluggable Gnutellahttp://writetheweb.com/read.php?item=24WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.Syndication discussions hot uphttp://writetheweb.com/read.php?item=23After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication.Personal web server integrates file sharing and messaginghttp://writetheweb.com/read.php?item=22The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices.Syndication and Metadatahttp://writetheweb.com/read.php?item=21RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF.UK bloggers get organisedhttp://writetheweb.com/read.php?item=20Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups.Yournamehere.com more important than anythinghttp://writetheweb.com/read.php?item=19Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman. \ No newline at end of file diff --git a/src/xmlx_test.go b/xmlx/xmlx_test.go similarity index 95% rename from src/xmlx_test.go rename to xmlx/xmlx_test.go index 56475cf..4314355 100644 --- a/src/xmlx_test.go +++ b/xmlx/xmlx_test.go @@ -16,7 +16,7 @@ func TestLoadLocal(t *testing.T) { } } -func TestLoadRemote(t *testing.T) { +func _TestLoadRemote(t *testing.T) { doc := New() if err := doc.LoadUri("http://www.w3schools.com/xml/plant_catalog.xml"); err != nil { @@ -89,8 +89,7 @@ func TestUnmarshal(t *testing.T) { } img := Image{} - err = node.Unmarshal(&img) - if err != nil { + if err = node.Unmarshal(&img); err != nil { t.Errorf("Unmarshal(): %s", err) return }