From ea7fc452097da4ef2db6bb6fbf6c8a9c5e23cbeb Mon Sep 17 00:00:00 2001 From: jim teeuwen Date: Wed, 11 May 2011 17:44:09 +0200 Subject: [PATCH] Got rid of depracated Node api functions. Removed dependency on go-iconv in favor of go-charset (See README). go-charset is a native Go package and thus requires no CGO functionality. Meaning go-pkg-xmlx and go-pkg-rss can now be used in Google AppEngine. Some speed and memory efficiency improvements added. Should now do a lot less []byte -> string conversions. --- README | 5 +- document.go | 186 +++++++++++++++++----------------------------------- node.go | 178 ++++++++++++++++++------------------------------- test.xml | 1 - test1.xml | 2 +- 5 files changed, 128 insertions(+), 244 deletions(-) diff --git a/README b/README index abda3ae..02c22db 100644 --- a/README +++ b/README @@ -12,13 +12,14 @@ DEPENDENCIES ================================================================================ - goinstall github.com/sloonz/go-iconv/src + goinstall go-charset.googlecode.com/hg/charset ================================================================================ USAGE ================================================================================ - Getting the package up and running is simple enough: + Getting the package up and running is simple enough. This should also + automatically take care of any dependencies for you: $ goinstall github.com/jteeuwen/go-pkg-xmlx diff --git a/document.go b/document.go index 1a95ec8..8f0cc4d 100644 --- a/document.go +++ b/document.go @@ -28,24 +28,26 @@ import ( "os" "io" "io/ioutil" - "path" "strings" + "bytes" "xml" "fmt" "http" - iconv "github.com/sloonz/go-iconv/src" + "go-charset.googlecode.com/hg/charset" ) +// represents a single XML document. type Document struct { - Version string - Encoding string - StandAlone string - SaveDocType bool - Root *Node - Entity map[string]string - Verbose bool + Version string // XML version + Encoding string // Encoding found in document. If absent, assumes UTF-8. + StandAlone string // Value of XML doctype's 'standalone' attribute. + SaveDocType bool // Whether not to include the XML doctype in saves. + Root *Node // The document's root node. + Entity map[string]string // Mapping of custom entity conversions. + Verbose bool // [depracated] Not actually used anymore. } +// Create a new, empty XML document instance. func New() *Document { return &Document{ Version: "1.0", @@ -53,7 +55,6 @@ func New() *Document { StandAlone: "yes", SaveDocType: true, Entity: make(map[string]string), - Verbose: false, } } @@ -64,50 +65,38 @@ func New() *Document { // defined on http://www.w3.org/TR/html4/sgml/entities.html func (this *Document) LoadExtendedEntityMap() { loadNonStandardEntities(this.Entity) } -func (this *Document) String() string { - s, _ := this.SaveString() - return s -} - +// Select a single node with the given namespace and name. Returns nil if no +// matching node was found. func (this *Document) SelectNode(namespace, name string) *Node { return this.Root.SelectNode(namespace, name) } +// Select all nodes with the given namespace and name. Returns an empty slice +// if no matches were found. func (this *Document) SelectNodes(namespace, name string) []*Node { return this.Root.SelectNodes(namespace, name) } -// ***************************************************************************** -// *** Satisfy ILoader interface -// ***************************************************************************** -func (this *Document) LoadString(s string) (err os.Error) { - // Ensure we are passing UTF-8 encoding content to the XML tokenizer. - if s, err = this.correctEncoding(s); err != nil { - return - } - - // tokenize data - xp := xml.NewParser(strings.NewReader(s)) +// Load the contents of this document from the supplied reader. +func (this *Document) LoadStream(r io.Reader) (err os.Error) { + xp := xml.NewParser(r) xp.Entity = this.Entity + xp.CharsetReader = func(enc string, input io.Reader) (io.Reader, os.Error) { + return charset.NewReader(enc, input) + } this.Root = NewNode(NT_ROOT) ct := this.Root var tok xml.Token var t *Node - var i int var doctype string - var v xml.Attr for { if tok, err = xp.Token(); err != nil { if err == os.EOF { return nil } - - if this.Verbose { - fmt.Fprintf(os.Stderr, "Xml Error: %s\n", err) - } return err } @@ -128,7 +117,7 @@ func (this *Document) LoadString(s string) (err os.Error) { t = NewNode(NT_ELEMENT) t.Name = tt.Name t.Attributes = make([]*Attr, len(tt.Attr)) - for i, v = range tt.Attr { + for i, v := range tt.Attr { t.Attributes[i] = new(Attr) t.Attributes[i].Name = v.Name t.Attributes[i].Value = v.Value @@ -138,7 +127,7 @@ func (this *Document) LoadString(s string) (err os.Error) { case xml.ProcInst: if tt.Target == "xml" { // xml doctype doctype = strings.TrimSpace(string(tt.Inst)) - if i = strings.Index(doctype, `standalone="`); i > -1 { + if i := strings.Index(doctype, `standalone="`); i > -1 { this.StandAlone = doctype[i+len(`standalone="`) : len(doctype)] i = strings.Index(this.StandAlone, `"`) this.StandAlone = this.StandAlone[0:i] @@ -159,16 +148,28 @@ func (this *Document) LoadString(s string) (err os.Error) { return } -func (this *Document) LoadFile(filename string) (err os.Error) { - var data []byte +// Load the contents of this document from the supplied byte slice. +func (this *Document) LoadBytes(d []byte) (err os.Error) { + return this.LoadStream(bytes.NewBuffer(d)) +} - if data, err = ioutil.ReadFile(path.Clean(filename)); err != nil { +// Load the contents of this document from the supplied string. +func (this *Document) LoadString(s string) (err os.Error) { + return this.LoadStream(strings.NewReader(s)) +} + +// Load the contents of this document from the supplied file. +func (this *Document) LoadFile(filename string) (err os.Error) { + var fd *os.File + if fd, err = os.Open(filename); err != nil { return } - return this.LoadString(string(data)) + defer fd.Close() + return this.LoadStream(fd) } +// Load the contents of this document from the supplied uri. func (this *Document) LoadUri(uri string) (err os.Error) { var r *http.Response if r, _, err = http.Get(uri); err != nil { @@ -176,105 +177,36 @@ func (this *Document) LoadUri(uri string) (err os.Error) { } defer r.Body.Close() - - var b []byte - if b, err = ioutil.ReadAll(r.Body); err != nil { - return - } - - return this.LoadString(string(b)) + return this.LoadStream(r.Body) } -func (this *Document) LoadStream(r io.Reader) (err os.Error) { - var b []byte - if b, err = ioutil.ReadAll(r); err != nil { - return - } - return this.LoadString(string(b)) +// Save the contents of this document to the supplied file. +func (this *Document) SaveFile(path string) os.Error { + return ioutil.WriteFile(path, this.SaveBytes(), 0600) } -// ***************************************************************************** -// *** Satisfy ISaver interface -// ***************************************************************************** -func (this *Document) SaveFile(path string) (err os.Error) { - var data string - if data, err = this.SaveString(); err != nil { - return - } +// Save the contents of this document as a byte slice. +func (this *Document) SaveBytes() []byte { + var b bytes.Buffer - return ioutil.WriteFile(path, []byte(data), 0600) -} - -func (this *Document) SaveString() (s string, err os.Error) { if this.SaveDocType { - s = fmt.Sprintf(``, - this.Version, this.Encoding, this.StandAlone) + b.WriteString(fmt.Sprintf(``, + this.Version, this.Encoding, this.StandAlone)) } - s += this.Root.String() - return + b.Write(this.Root.Bytes()) + return b.Bytes() } +// Save the contents of this document as a string. +func (this *Document) SaveString() string { return string(this.SaveBytes()) } + +// Alias for Document.SaveString(). This one is invoked by anything looking for +// the standard String() method (eg: fmt.Printf("%s\n", mydoc). +func (this *Document) String() string { return string(this.SaveBytes()) } + +// Save the contents of this document to the supplied writer. func (this *Document) SaveStream(w io.Writer) (err os.Error) { - var s string - if s, err = this.SaveString(); err != nil { - return - } - _, err = w.Write([]byte(s)) + _, err = w.Write(this.SaveBytes()) return } - -// Use libiconv to ensure we get UTF-8 encoded data. The Go Xml tokenizer will -// throw a tantrum if we give it anything else. -func (this *Document) correctEncoding(data string) (ret string, err os.Error) { - var cd *iconv.Iconv - var tok xml.Token - - enc := "utf-8" - xp := xml.NewParser(strings.NewReader(data)) - xp.Entity = this.Entity - -loop: - for { - if tok, err = xp.Token(); err != nil { - if err == os.EOF { - break loop - } - - return "", err - } - - switch tt := tok.(type) { - case xml.ProcInst: - if tt.Target == "xml" { // xml doctype - var pair []string - var entry string - - list := strings.Split(string(tt.Inst), " ", -1) - for _, entry = range list { - if pair = strings.Split(entry, "=", -1); len(pair) < 2 { - continue - } - - switch pair[0] { - case "encoding": - enc = pair[1][1 : len(pair[1])-1] - break loop - } - } - } - } - } - - if strings.ToLower(enc) == "utf-8" { - // Data already in utf-8 format. Nothing to do here. - return data, nil - } - - if cd, err = iconv.Open("utf-8", enc); err != nil { - return - } - - defer cd.Close() - return cd.Conv(data) -} diff --git a/node.go b/node.go index 07c49d7..2b6efe1 100644 --- a/node.go +++ b/node.go @@ -6,7 +6,6 @@ package xmlx import ( "os" - "strings" "xml" "bytes" "fmt" @@ -22,18 +21,18 @@ const ( ) type Attr struct { - Name xml.Name - Value string + Name xml.Name // Attribute namespace and name. + Value string // Attribute value. } type Node struct { - Type byte - Name xml.Name - Children []*Node - Attributes []*Attr - Parent *Node - Value string - Target string // procinst field + Type byte // Node type. + Name xml.Name // Node namespace and name. + Children []*Node // Child nodes. + Attributes []*Attr // Node attributes. + Parent *Node // Parent node. + Value string // Node value. + Target string // procinst field. } func NewNode(tid byte) *Node { @@ -47,7 +46,7 @@ func NewNode(tid byte) *Node { // This wraps the standard xml.Unmarshal function and supplies this particular // node as the content to be unmarshalled. func (this *Node) Unmarshal(obj interface{}) os.Error { - return xml.Unmarshal(strings.NewReader(this.String()), obj) + return xml.Unmarshal(bytes.NewBuffer(this.Bytes()), obj) } // Get node value as string @@ -58,10 +57,6 @@ func (this *Node) S(namespace, name string) string { return "" } -// Deprecated - use Node.S() -func (this *Node) GetValue(namespace, name string) string { return this.S(namespace, name) } - - // Get node value as int func (this *Node) I(namespace, name string) int { if node := rec_SelectNode(this, namespace, name); node != nil && node.Value != "" { @@ -71,10 +66,6 @@ func (this *Node) I(namespace, name string) int { return 0 } -// Deprecated - use Node.I() -func (this *Node) GetValuei(namespace, name string) int { return this.I(namespace, name) } - - // Get node value as int64 func (this *Node) I64(namespace, name string) int64 { if node := rec_SelectNode(this, namespace, name); node != nil && node.Value != "" { @@ -84,10 +75,6 @@ func (this *Node) I64(namespace, name string) int64 { return 0 } -// Deprecated - use Node.I64() -func (this *Node) GetValuei64(namespace, name string) int64 { return this.I64(namespace, name) } - - // Get node value as uint func (this *Node) U(namespace, name string) uint { if node := rec_SelectNode(this, namespace, name); node != nil && node.Value != "" { @@ -97,10 +84,6 @@ func (this *Node) U(namespace, name string) uint { return 0 } -// Deprecated - use Node.U() -func (this *Node) GetValueui(namespace, name string) uint { return this.U(namespace, name) } - - // Get node value as uint64 func (this *Node) U64(namespace, name string) uint64 { if node := rec_SelectNode(this, namespace, name); node != nil && node.Value != "" { @@ -110,9 +93,6 @@ func (this *Node) U64(namespace, name string) uint64 { return 0 } -// Deprecated - use Node.U64() -func (this *Node) GetValueui64(namespace, name string) uint64 { return this.U64(namespace, name) } - // Get node value as float32 func (this *Node) F32(namespace, name string) float32 { if node := rec_SelectNode(this, namespace, name); node != nil && node.Value != "" { @@ -122,10 +102,6 @@ func (this *Node) F32(namespace, name string) float32 { return 0 } -// Deprecated - use Node.F32() -func (this *Node) GetValuef32(namespace, name string) float32 { return this.F32(namespace, name) } - - // Get node value as float64 func (this *Node) F64(namespace, name string) float64 { if node := rec_SelectNode(this, namespace, name); node != nil && node.Value != "" { @@ -135,10 +111,6 @@ func (this *Node) F64(namespace, name string) float64 { return 0 } -// Deprecated - use Node.F64() -func (this *Node) GetValuef64(namespace, name string) float64 { return this.F64(namespace, name) } - - // Get node value as bool func (this *Node) B(namespace, name string) bool { if node := rec_SelectNode(this, namespace, name); node != nil && node.Value != "" { @@ -158,10 +130,6 @@ func (this *Node) As(namespace, name string) string { return "" } -// Deprecated - use Node.As() -func (this *Node) GetAttr(namespace, name string) string { return this.As(namespace, name) } - - // Get attribute value as int func (this *Node) Ai(namespace, name string) int { if s := this.As(namespace, name); s != "" { @@ -171,10 +139,6 @@ func (this *Node) Ai(namespace, name string) int { return 0 } -// Deprecated - use Node.Ai() -func (this *Node) GetAttri(namespace, name string) int { return this.Ai(namespace, name) } - - // Get attribute value as uint func (this *Node) Au(namespace, name string) uint { if s := this.As(namespace, name); s != "" { @@ -184,10 +148,6 @@ func (this *Node) Au(namespace, name string) uint { return 0 } -// Deprecated - use Node.Au() -func (this *Node) GetAttrui(namespace, name string) uint { return this.Au(namespace, name) } - - // Get attribute value as uint64 func (this *Node) Au64(namespace, name string) uint64 { if s := this.As(namespace, name); s != "" { @@ -197,10 +157,6 @@ func (this *Node) Au64(namespace, name string) uint64 { return 0 } -// Deprecated - use Node.Au64() -func (this *Node) GetAttrui64(namespace, name string) uint64 { return this.Au64(namespace, name) } - - // Get attribute value as int64 func (this *Node) Ai64(namespace, name string) int64 { if s := this.As(namespace, name); s != "" { @@ -210,9 +166,6 @@ func (this *Node) Ai64(namespace, name string) int64 { return 0 } -// Deprecated - use Node.Ai64() -func (this *Node) GetAttri64(namespace, name string) int64 { return this.Ai64(namespace, name) } - // Get attribute value as float32 func (this *Node) Af32(namespace, name string) float32 { if s := this.As(namespace, name); s != "" { @@ -222,9 +175,6 @@ func (this *Node) Af32(namespace, name string) float32 { return 0 } -// Deprecated - use Node.Af32() -func (this *Node) GetAttrf32(namespace, name string) float32 { return this.Af32(namespace, name) } - // Get attribute value as float64 func (this *Node) Af64(namespace, name string) float64 { if s := this.As(namespace, name); s != "" { @@ -234,10 +184,6 @@ func (this *Node) Af64(namespace, name string) float64 { return 0 } -// Deprecated - use Node.Af64() -func (this *Node) GetAttrf64(namespace, name string) float64 { return this.Af64(namespace, name) } - - // Get attribute value as bool func (this *Node) Ab(namespace, name string) bool { if s := this.As(namespace, name); s != "" { @@ -298,94 +244,100 @@ func rec_SelectNodes(cn *Node, namespace, name string, list *[]*Node) { } } +// Convert node to appropriate []byte representation based on it's @Type. +// Note that NT_ROOT is a special-case empty node used as the root for a +// Document. This one has no representation by itself. It merely forwards the +// String() call to it's child nodes. +func (this *Node) Bytes() (b []byte) { + switch this.Type { + case NT_PROCINST: + b = this.printProcInst() + case NT_COMMENT: + b = this.printComment() + case NT_DIRECTIVE: + b = this.printDirective() + case NT_ELEMENT: + b = this.printElement() + case NT_ROOT: + b = this.printRoot() + } + return +} + // Convert node to appropriate string representation based on it's @Type. // Note that NT_ROOT is a special-case empty node used as the root for a // Document. This one has no representation by itself. It merely forwards the // String() call to it's child nodes. func (this *Node) String() (s string) { - switch this.Type { - case NT_PROCINST: - s = this.printProcInst() - case NT_COMMENT: - s = this.printComment() - case NT_DIRECTIVE: - s = this.printDirective() - case NT_ELEMENT: - s = this.printElement() - case NT_ROOT: - s = this.printRoot() - } - return + return string(this.Bytes()) } -func (this *Node) printRoot() (s string) { - var data []byte - buf := bytes.NewBuffer(data) +func (this *Node) printRoot() []byte { + var b bytes.Buffer for _, v := range this.Children { - buf.WriteString(v.String()) + b.WriteString(v.String()) } - return buf.String() + return b.Bytes() } -func (this *Node) printProcInst() string { - return "" +func (this *Node) printProcInst() []byte { + return []byte("") } -func (this *Node) printComment() string { - return "" +func (this *Node) printComment() []byte { + return []byte("") } -func (this *Node) printDirective() string { - return "" +func (this *Node) printDirective() []byte { + return []byte("") } -func (this *Node) printElement() string { - var data []byte - buf := bytes.NewBuffer(data) +func (this *Node) printElement() []byte { + var b bytes.Buffer if len(this.Name.Space) > 0 { - buf.WriteRune('<') - buf.WriteString(this.Name.Space) - buf.WriteRune(':') - buf.WriteString(this.Name.Local) + b.WriteRune('<') + b.WriteString(this.Name.Space) + b.WriteRune(':') + b.WriteString(this.Name.Local) } else { - buf.WriteRune('<') - buf.WriteString(this.Name.Local) + b.WriteRune('<') + b.WriteString(this.Name.Local) } for _, v := range this.Attributes { if len(v.Name.Space) > 0 { - buf.WriteString(fmt.Sprintf(` %s:%s="%s"`, v.Name.Space, v.Name.Local, v.Value)) + b.WriteString(fmt.Sprintf(` %s:%s="%s"`, v.Name.Space, v.Name.Local, v.Value)) } else { - buf.WriteString(fmt.Sprintf(` %s="%s"`, v.Name.Local, v.Value)) + b.WriteString(fmt.Sprintf(` %s="%s"`, v.Name.Local, v.Value)) } } if len(this.Children) == 0 && len(this.Value) == 0 { - buf.WriteString(" />") - return buf.String() + b.WriteString(" />") + return b.Bytes() } - buf.WriteRune('>') + b.WriteRune('>') for _, v := range this.Children { - buf.WriteString(v.String()) + b.WriteString(v.String()) } - buf.WriteString(this.Value) + b.WriteString(this.Value) if len(this.Name.Space) > 0 { - buf.WriteString("') + b.WriteString("') } else { - buf.WriteString("') + b.WriteString("') } - return buf.String() + return b.Bytes() } // Add a child node diff --git a/test.xml b/test.xml index ddd248d..e6e8c9f 100644 --- a/test.xml +++ b/test.xml @@ -1,4 +1,3 @@ - diff --git a/test1.xml b/test1.xml index 8a70e87..db44efa 100644 --- a/test1.xml +++ b/test1.xml @@ -1 +1 @@ -WriteTheWebhttp://writetheweb.comNews for web users that write backen-usCopyright 2000, WriteTheWeb team.editor@writetheweb.comwebmaster@writetheweb.comWriteTheWebhttp://writetheweb.com/images/mynetscape88.gifhttp://writetheweb.com8831News for web users that write backGiving the world a pluggable Gnutellahttp://writetheweb.com/read.php?item=24WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.Syndication discussions hot uphttp://writetheweb.com/read.php?item=23After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication.Personal web server integrates file sharing and messaginghttp://writetheweb.com/read.php?item=22The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices.Syndication and Metadatahttp://writetheweb.com/read.php?item=21RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF.UK bloggers get organisedhttp://writetheweb.com/read.php?item=20Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups.Yournamehere.com more important than anythinghttp://writetheweb.com/read.php?item=19Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman. \ No newline at end of file +WriteTheWebhttp://writetheweb.comNews for web users that write backen-usCopyright 2000, WriteTheWeb team.editor@writetheweb.comwebmaster@writetheweb.comWriteTheWebhttp://writetheweb.com/images/mynetscape88.gifhttp://writetheweb.com8831News for web users that write backGiving the world a pluggable Gnutellahttp://writetheweb.com/read.php?item=24WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.Syndication discussions hot uphttp://writetheweb.com/read.php?item=23After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication.Personal web server integrates file sharing and messaginghttp://writetheweb.com/read.php?item=22The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices.Syndication and Metadatahttp://writetheweb.com/read.php?item=21RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF.UK bloggers get organisedhttp://writetheweb.com/read.php?item=20Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups.Yournamehere.com more important than anythinghttp://writetheweb.com/read.php?item=19Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman. \ No newline at end of file