fixed some slice copy routines

This commit is contained in:
jim teeuwen 2010-05-06 05:36:48 +02:00
parent 379dfddd5b
commit 6a776b0cda
5 changed files with 602 additions and 558 deletions

View File

@ -14,13 +14,13 @@
The Document currently implements 2 simple search functions which allow you to
look for specific nodes.
Document.SelectNode(namespace, name string) *Node;
Document.SelectNodes(namespace, name string) []*Node;
SelectNode() returns the first, single node it finds matching the given name
and namespace. SelectNodes() returns a slice containing all the matching nodes.
Note that these search functions can be invoked on individual nodes as well.
This allows you to search only a subset of the entire document.
@ -35,23 +35,23 @@ import "fmt"
import "http"
type Document struct {
Version string;
Encoding string;
StandAlone string;
SaveDocType bool;
Root *Node;
Entity map[string]string;
Verbose bool;
Version string
Encoding string
StandAlone string
SaveDocType bool
Root *Node
Entity map[string]string
Verbose bool
}
func New() *Document {
return &Document{
Version: "1.0",
Encoding: "utf-8",
StandAlone: "yes",
Version: "1.0",
Encoding: "utf-8",
StandAlone: "yes",
SaveDocType: true,
Entity: make(map[string]string),
Verbose: false
Entity: make(map[string]string),
Verbose: false,
}
}
@ -60,89 +60,87 @@ func New() *Document {
// set only those entities needed manually using the document.Entity map, but
// if need be, this method can be called to fill the map with the entire set
// defined on http://www.w3.org/TR/html4/sgml/entities.html
func (this *Document) LoadExtendedEntityMap() {
loadNonStandardEntities(&this.Entity);
}
func (this *Document) LoadExtendedEntityMap() { loadNonStandardEntities(&this.Entity) }
func (this *Document) String() string {
s, _ := this.SaveString();
return s;
s, _ := this.SaveString()
return s
}
func (this *Document) SelectNode(namespace, name string) *Node {
return this.Root.SelectNode(namespace, name);
return this.Root.SelectNode(namespace, name)
}
func (this *Document) SelectNodes(namespace, name string) []*Node {
return this.Root.SelectNodes(namespace, name);
return this.Root.SelectNodes(namespace, name)
}
// *****************************************************************************
// *** Satisfy ILoader interface
// *****************************************************************************
func (this *Document) LoadString(s string) (err os.Error) {
xp := xml.NewParser(strings.NewReader(s));
xp.Entity = this.Entity;
xp := xml.NewParser(strings.NewReader(s))
xp.Entity = this.Entity
this.Root = NewNode(NT_ROOT);
ct := this.Root;
this.Root = NewNode(NT_ROOT)
ct := this.Root
for {
tok, err := xp.Token();
tok, err := xp.Token()
if err != nil {
if err != os.EOF && this.Verbose {
fmt.Fprintf(os.Stderr, "Xml Error: %s\n", err);
fmt.Fprintf(os.Stderr, "Xml Error: %s\n", err)
}
return
}
t1, ok := tok.(xml.SyntaxError);
t1, ok := tok.(xml.SyntaxError)
if ok {
err = os.NewError(t1.String());
err = os.NewError(t1.String())
return
}
t2, ok := tok.(xml.CharData);
t2, ok := tok.(xml.CharData)
if ok && ct != nil {
ct.Value = strings.TrimSpace(string(t2));
ct.Value = strings.TrimSpace(string(t2))
continue
}
t3, ok := tok.(xml.Comment);
t3, ok := tok.(xml.Comment)
if ok && ct != nil {
t := NewNode(NT_COMMENT);
t.Value = strings.TrimSpace(string(t3));
ct.AddChild(t);
t := NewNode(NT_COMMENT)
t.Value = strings.TrimSpace(string(t3))
ct.AddChild(t)
continue
}
t4, ok := tok.(xml.Directive);
t4, ok := tok.(xml.Directive)
if ok && ct != nil {
t := NewNode(NT_DIRECTIVE);
t.Value = strings.TrimSpace(string(t4));
ct.AddChild(t);
t := NewNode(NT_DIRECTIVE)
t.Value = strings.TrimSpace(string(t4))
ct.AddChild(t)
continue
}
t5, ok := tok.(xml.StartElement);
t5, ok := tok.(xml.StartElement)
if ok && ct != nil {
t := NewNode(NT_ELEMENT);
t.Name = t5.Name;
t.Attributes = make([]Attr, len(t5.Attr));
t := NewNode(NT_ELEMENT)
t.Name = t5.Name
t.Attributes = make([]Attr, len(t5.Attr))
for i, v := range t5.Attr {
t.Attributes[i].Name = v.Name;
t.Attributes[i].Value = v.Value;
t.Attributes[i].Name = v.Name
t.Attributes[i].Value = v.Value
}
ct.AddChild(t);
ct = t;
ct.AddChild(t)
ct = t
continue
}
t6, ok := tok.(xml.ProcInst);
t6, ok := tok.(xml.ProcInst)
if ok {
if t6.Target == "xml" { // xml doctype
doctype := strings.TrimSpace(string(t6.Inst));
if t6.Target == "xml" { // xml doctype
doctype := strings.TrimSpace(string(t6.Inst))
/* // Not needed. There is only xml version 1.0
pos := strings.Index(doctype, `version="`);
if pos > -1 {
@ -153,7 +151,7 @@ func (this *Document) LoadString(s string) (err os.Error) {
*/
/* // Not needed. Any string we handle in Go is UTF8
// encoded. This means we will save UTF8 data as well.
// encoded. This means we will save UTF8 data as well.
pos = strings.Index(doctype, `encoding="`);
if pos > -1 {
this.Encoding = doctype[pos+len(`encoding="`) : len(doctype)];
@ -162,100 +160,106 @@ func (this *Document) LoadString(s string) (err os.Error) {
}
*/
pos := strings.Index(doctype, `standalone="`);
pos := strings.Index(doctype, `standalone="`)
if pos > -1 {
this.StandAlone = doctype[pos+len(`standalone="`) : len(doctype)];
pos = strings.Index(this.StandAlone, `"`);
this.StandAlone = this.StandAlone[0:pos];
this.StandAlone = doctype[pos+len(`standalone="`) : len(doctype)]
pos = strings.Index(this.StandAlone, `"`)
this.StandAlone = this.StandAlone[0:pos]
}
} else if ct != nil {
t := NewNode(NT_PROCINST);
t.Target = strings.TrimSpace(t6.Target);
t.Value = strings.TrimSpace(string(t6.Inst));
ct.AddChild(t);
t := NewNode(NT_PROCINST)
t.Target = strings.TrimSpace(t6.Target)
t.Value = strings.TrimSpace(string(t6.Inst))
ct.AddChild(t)
}
continue
}
_, ok = tok.(xml.EndElement);
_, ok = tok.(xml.EndElement)
if ok {
ct = ct.Parent;
ct = ct.Parent
continue
}
}
return;
return
}
func (this *Document) LoadFile(path string) (err os.Error) {
file, err := os.Open(path, os.O_RDONLY, 0600);
file, err := os.Open(path, os.O_RDONLY, 0600)
if err != nil {
return
}
defer file.Close();
defer file.Close()
content := "";
buff := make([]byte, 256);
content := ""
buff := make([]byte, 256)
for {
_, err := file.Read(buff);
_, err := file.Read(buff)
if err != nil {
break
}
content += string(buff);
content += string(buff)
}
err = this.LoadString(content);
return;
err = this.LoadString(content)
return
}
func (this *Document) LoadUri(uri string) (err os.Error) {
r, _, err := http.Get(uri);
r, _, err := http.Get(uri)
if err != nil {
return
}
defer r.Body.Close();
defer r.Body.Close()
b, err := io.ReadAll(r.Body);
if err != nil {
return
data := ""
b := make([]byte, 256)
for {
n, err := io.ReadFull(r.Body, b)
if n == 0 || err == os.EOF {
break
}
data += string(b)
}
err = this.LoadString(string(b));
return;
err = this.LoadString(data)
return
}
func (this *Document) LoadStream(r *io.Reader) (err os.Error) {
content := "";
buff := make([]byte, 256);
content := ""
buff := make([]byte, 256)
for {
_, err := r.Read(buff);
_, err := r.Read(buff)
if err != nil {
break
}
content += string(buff);
content += string(buff)
}
err = this.LoadString(content);
return;
err = this.LoadString(content)
return
}
// *****************************************************************************
// *** Satisfy ISaver interface
// *****************************************************************************
func (this *Document) SaveFile(path string) (err os.Error) {
file, err := os.Open(path, os.O_WRONLY | os.O_CREAT, 0600);
file, err := os.Open(path, os.O_WRONLY|os.O_CREAT, 0600)
if err != nil {
return
}
defer file.Close();
defer file.Close()
content, err := this.SaveString();
content, err := this.SaveString()
if err != nil {
return
}
file.Write(strings.Bytes(content));
file.Write([]byte(content))
return
}
@ -265,16 +269,15 @@ func (this *Document) SaveString() (s string, err os.Error) {
this.Version, this.Encoding, this.StandAlone)
}
s += this.Root.String();
return;
s += this.Root.String()
return
}
func (this *Document) SaveStream(w *io.Writer) (err os.Error) {
s, err := this.SaveString();
s, err := this.SaveString()
if err != nil {
return
}
w.Write(strings.Bytes(s));
return;
w.Write([]byte(s))
return
}

View File

@ -5,32 +5,40 @@ import "utf8"
import "regexp"
import "strconv"
var reg_entity = regexp.MustCompile("^&#[0-9]+;$");
var reg_entity = regexp.MustCompile("^&#[0-9]+;$")
// Converts a single numerical html entity to a regular Go utf-token.
// ex: "♣" -> "♣"
func HtmlToUTF8(entity string) string {
// Make sure we have a valid entity: {
ok := reg_entity.MatchString(entity);
if !ok { return "" }
ok := reg_entity.MatchString(entity)
if !ok {
return ""
}
// Convert entity to number
num, err := strconv.Atoi(entity[2:len(entity)-1]);
if err != nil { return "" }
num, err := strconv.Atoi(entity[2 : len(entity)-1])
if err != nil {
return ""
}
var arr [3]byte;
size := utf8.EncodeRune(num, &arr);
if size == 0 { return "" }
var arr [3]byte
size := utf8.EncodeRune(num, &arr)
if size == 0 {
return ""
}
return string(&arr);
return string(&arr)
}
// Converts a single Go utf-token to it's an Html entity.
// ex: "♣" -> "♣"
func UTF8ToHtml(token string) string {
rune, size := utf8.DecodeRuneInString(token);
if size == 0 { return "" }
return fmt.Sprintf("&#%d;", rune);
rune, size := utf8.DecodeRuneInString(token)
if size == 0 {
return ""
}
return fmt.Sprintf("&#%d;", rune)
}
/*
@ -46,261 +54,258 @@ func UTF8ToHtml(token string) string {
It will be used to map non-standard xml entities to a proper value.
If the parser encounters any unknown entities, it will throw a syntax
error and abort the parsing. Hence the ability to supply this map.
*/
*/
func loadNonStandardEntities(em *map[string]string) {
(*em)["pi"] = "\u03c0";
(*em)["nabla"] = "\u2207";
(*em)["isin"] = "\u2208";
(*em)["loz"] = "\u25ca";
(*em)["prop"] = "\u221d";
(*em)["para"] = "\u00b6";
(*em)["Aring"] = "\u00c5";
(*em)["euro"] = "\u20ac";
(*em)["sup3"] = "\u00b3";
(*em)["sup2"] = "\u00b2";
(*em)["sup1"] = "\u00b9";
(*em)["prod"] = "\u220f";
(*em)["gamma"] = "\u03b3";
(*em)["perp"] = "\u22a5";
(*em)["lfloor"] = "\u230a";
(*em)["fnof"] = "\u0192";
(*em)["frasl"] = "\u2044";
(*em)["rlm"] = "\u200f";
(*em)["omega"] = "\u03c9";
(*em)["part"] = "\u2202";
(*em)["euml"] = "\u00eb";
(*em)["Kappa"] = "\u039a";
(*em)["nbsp"] = "\u00a0";
(*em)["Eacute"] = "\u00c9";
(*em)["brvbar"] = "\u00a6";
(*em)["otimes"] = "\u2297";
(*em)["ndash"] = "\u2013";
(*em)["thinsp"] = "\u2009";
(*em)["nu"] = "\u03bd";
(*em)["Upsilon"] = "\u03a5";
(*em)["upsih"] = "\u03d2";
(*em)["raquo"] = "\u00bb";
(*em)["yacute"] = "\u00fd";
(*em)["delta"] = "\u03b4";
(*em)["eth"] = "\u00f0";
(*em)["supe"] = "\u2287";
(*em)["ne"] = "\u2260";
(*em)["ni"] = "\u220b";
(*em)["eta"] = "\u03b7";
(*em)["uArr"] = "\u21d1";
(*em)["image"] = "\u2111";
(*em)["asymp"] = "\u2248";
(*em)["oacute"] = "\u00f3";
(*em)["rarr"] = "\u2192";
(*em)["emsp"] = "\u2003";
(*em)["acirc"] = "\u00e2";
(*em)["shy"] = "\u00ad";
(*em)["yuml"] = "\u00ff";
(*em)["acute"] = "\u00b4";
(*em)["int"] = "\u222b";
(*em)["ccedil"] = "\u00e7";
(*em)["Acirc"] = "\u00c2";
(*em)["Ograve"] = "\u00d2";
(*em)["times"] = "\u00d7";
(*em)["weierp"] = "\u2118";
(*em)["Tau"] = "\u03a4";
(*em)["omicron"] = "\u03bf";
(*em)["lt"] = "\u003c";
(*em)["Mu"] = "\u039c";
(*em)["Ucirc"] = "\u00db";
(*em)["sub"] = "\u2282";
(*em)["le"] = "\u2264";
(*em)["sum"] = "\u2211";
(*em)["sup"] = "\u2283";
(*em)["lrm"] = "\u200e";
(*em)["frac34"] = "\u00be";
(*em)["Iota"] = "\u0399";
(*em)["Ugrave"] = "\u00d9";
(*em)["THORN"] = "\u00de";
(*em)["rsaquo"] = "\u203a";
(*em)["not"] = "\u00ac";
(*em)["sigma"] = "\u03c3";
(*em)["iuml"] = "\u00ef";
(*em)["epsilon"] = "\u03b5";
(*em)["spades"] = "\u2660";
(*em)["theta"] = "\u03b8";
(*em)["divide"] = "\u00f7";
(*em)["Atilde"] = "\u00c3";
(*em)["uacute"] = "\u00fa";
(*em)["Rho"] = "\u03a1";
(*em)["trade"] = "\u2122";
(*em)["chi"] = "\u03c7";
(*em)["agrave"] = "\u00e0";
(*em)["or"] = "\u2228";
(*em)["circ"] = "\u02c6";
(*em)["middot"] = "\u00b7";
(*em)["plusmn"] = "\u00b1";
(*em)["aring"] = "\u00e5";
(*em)["lsquo"] = "\u2018";
(*em)["Yacute"] = "\u00dd";
(*em)["oline"] = "\u203e";
(*em)["copy"] = "\u00a9";
(*em)["icirc"] = "\u00ee";
(*em)["lowast"] = "\u2217";
(*em)["Oacute"] = "\u00d3";
(*em)["aacute"] = "\u00e1";
(*em)["oplus"] = "\u2295";
(*em)["crarr"] = "\u21b5";
(*em)["thetasym"] = "\u03d1";
(*em)["Beta"] = "\u0392";
(*em)["laquo"] = "\u00ab";
(*em)["rang"] = "\u232a";
(*em)["tilde"] = "\u02dc";
(*em)["Uuml"] = "\u00dc";
(*em)["zwj"] = "\u200d";
(*em)["mu"] = "\u03bc";
(*em)["Ccedil"] = "\u00c7";
(*em)["infin"] = "\u221e";
(*em)["ouml"] = "\u00f6";
(*em)["rfloor"] = "\u230b";
(*em)["pound"] = "\u00a3";
(*em)["szlig"] = "\u00df";
(*em)["thorn"] = "\u00fe";
(*em)["forall"] = "\u2200";
(*em)["piv"] = "\u03d6";
(*em)["rdquo"] = "\u201d";
(*em)["frac12"] = "\u00bd";
(*em)["frac14"] = "\u00bc";
(*em)["Ocirc"] = "\u00d4";
(*em)["Ecirc"] = "\u00ca";
(*em)["kappa"] = "\u03ba";
(*em)["Euml"] = "\u00cb";
(*em)["minus"] = "\u2212";
(*em)["cong"] = "\u2245";
(*em)["hellip"] = "\u2026";
(*em)["equiv"] = "\u2261";
(*em)["cent"] = "\u00a2";
(*em)["Uacute"] = "\u00da";
(*em)["darr"] = "\u2193";
(*em)["Eta"] = "\u0397";
(*em)["sbquo"] = "\u201a";
(*em)["rArr"] = "\u21d2";
(*em)["igrave"] = "\u00ec";
(*em)["uml"] = "\u00a8";
(*em)["lambda"] = "\u03bb";
(*em)["oelig"] = "\u0153";
(*em)["harr"] = "\u2194";
(*em)["ang"] = "\u2220";
(*em)["clubs"] = "\u2663";
(*em)["and"] = "\u2227";
(*em)["permil"] = "\u2030";
(*em)["larr"] = "\u2190";
(*em)["Yuml"] = "\u0178";
(*em)["cup"] = "\u222a";
(*em)["Xi"] = "\u039e";
(*em)["Alpha"] = "\u0391";
(*em)["phi"] = "\u03c6";
(*em)["ucirc"] = "\u00fb";
(*em)["oslash"] = "\u00f8";
(*em)["rsquo"] = "\u2019";
(*em)["AElig"] = "\u00c6";
(*em)["mdash"] = "\u2014";
(*em)["psi"] = "\u03c8";
(*em)["eacute"] = "\u00e9";
(*em)["otilde"] = "\u00f5";
(*em)["yen"] = "\u00a5";
(*em)["gt"] = "\u003e";
(*em)["Iuml"] = "\u00cf";
(*em)["Prime"] = "\u2033";
(*em)["Chi"] = "\u03a7";
(*em)["ge"] = "\u2265";
(*em)["reg"] = "\u00ae";
(*em)["hearts"] = "\u2665";
(*em)["auml"] = "\u00e4";
(*em)["Agrave"] = "\u00c0";
(*em)["sect"] = "\u00a7";
(*em)["sube"] = "\u2286";
(*em)["sigmaf"] = "\u03c2";
(*em)["Gamma"] = "\u0393";
(*em)["amp"] = "\u0026";
(*em)["ensp"] = "\u2002";
(*em)["ETH"] = "\u00d0";
(*em)["Igrave"] = "\u00cc";
(*em)["Omega"] = "\u03a9";
(*em)["Lambda"] = "\u039b";
(*em)["Omicron"] = "\u039f";
(*em)["there4"] = "\u2234";
(*em)["ntilde"] = "\u00f1";
(*em)["xi"] = "\u03be";
(*em)["dagger"] = "\u2020";
(*em)["egrave"] = "\u00e8";
(*em)["Delta"] = "\u0394";
(*em)["OElig"] = "\u0152";
(*em)["diams"] = "\u2666";
(*em)["ldquo"] = "\u201c";
(*em)["radic"] = "\u221a";
(*em)["Oslash"] = "\u00d8";
(*em)["Ouml"] = "\u00d6";
(*em)["lceil"] = "\u2308";
(*em)["uarr"] = "\u2191";
(*em)["atilde"] = "\u00e3";
(*em)["iquest"] = "\u00bf";
(*em)["lsaquo"] = "\u2039";
(*em)["Epsilon"] = "\u0395";
(*em)["iacute"] = "\u00ed";
(*em)["cap"] = "\u2229";
(*em)["deg"] = "\u00b0";
(*em)["Otilde"] = "\u00d5";
(*em)["zeta"] = "\u03b6";
(*em)["ocirc"] = "\u00f4";
(*em)["scaron"] = "\u0161";
(*em)["ecirc"] = "\u00ea";
(*em)["ordm"] = "\u00ba";
(*em)["tau"] = "\u03c4";
(*em)["Auml"] = "\u00c4";
(*em)["dArr"] = "\u21d3";
(*em)["ordf"] = "\u00aa";
(*em)["alefsym"] = "\u2135";
(*em)["notin"] = "\u2209";
(*em)["Pi"] = "\u03a0";
(*em)["sdot"] = "\u22c5";
(*em)["upsilon"] = "\u03c5";
(*em)["iota"] = "\u03b9";
(*em)["hArr"] = "\u21d4";
(*em)["Sigma"] = "\u03a3";
(*em)["lang"] = "\u2329";
(*em)["curren"] = "\u00a4";
(*em)["Theta"] = "\u0398";
(*em)["lArr"] = "\u21d0";
(*em)["Phi"] = "\u03a6";
(*em)["Nu"] = "\u039d";
(*em)["rho"] = "\u03c1";
(*em)["alpha"] = "\u03b1";
(*em)["iexcl"] = "\u00a1";
(*em)["micro"] = "\u00b5";
(*em)["cedil"] = "\u00b8";
(*em)["Ntilde"] = "\u00d1";
(*em)["Psi"] = "\u03a8";
(*em)["Dagger"] = "\u2021";
(*em)["Egrave"] = "\u00c8";
(*em)["Icirc"] = "\u00ce";
(*em)["nsub"] = "\u2284";
(*em)["bdquo"] = "\u201e";
(*em)["empty"] = "\u2205";
(*em)["aelig"] = "\u00e6";
(*em)["ograve"] = "\u00f2";
(*em)["macr"] = "\u00af";
(*em)["Zeta"] = "\u0396";
(*em)["beta"] = "\u03b2";
(*em)["sim"] = "\u223c";
(*em)["uuml"] = "\u00fc";
(*em)["Aacute"] = "\u00c1";
(*em)["Iacute"] = "\u00cd";
(*em)["exist"] = "\u2203";
(*em)["prime"] = "\u2032";
(*em)["rceil"] = "\u2309";
(*em)["real"] = "\u211c";
(*em)["zwnj"] = "\u200c";
(*em)["bull"] = "\u2022";
(*em)["quot"] = "\u0022";
(*em)["Scaron"] = "\u0160";
(*em)["ugrave"] = "\u00f9";
(*em)["pi"] = "\u03c0"
(*em)["nabla"] = "\u2207"
(*em)["isin"] = "\u2208"
(*em)["loz"] = "\u25ca"
(*em)["prop"] = "\u221d"
(*em)["para"] = "\u00b6"
(*em)["Aring"] = "\u00c5"
(*em)["euro"] = "\u20ac"
(*em)["sup3"] = "\u00b3"
(*em)["sup2"] = "\u00b2"
(*em)["sup1"] = "\u00b9"
(*em)["prod"] = "\u220f"
(*em)["gamma"] = "\u03b3"
(*em)["perp"] = "\u22a5"
(*em)["lfloor"] = "\u230a"
(*em)["fnof"] = "\u0192"
(*em)["frasl"] = "\u2044"
(*em)["rlm"] = "\u200f"
(*em)["omega"] = "\u03c9"
(*em)["part"] = "\u2202"
(*em)["euml"] = "\u00eb"
(*em)["Kappa"] = "\u039a"
(*em)["nbsp"] = "\u00a0"
(*em)["Eacute"] = "\u00c9"
(*em)["brvbar"] = "\u00a6"
(*em)["otimes"] = "\u2297"
(*em)["ndash"] = "\u2013"
(*em)["thinsp"] = "\u2009"
(*em)["nu"] = "\u03bd"
(*em)["Upsilon"] = "\u03a5"
(*em)["upsih"] = "\u03d2"
(*em)["raquo"] = "\u00bb"
(*em)["yacute"] = "\u00fd"
(*em)["delta"] = "\u03b4"
(*em)["eth"] = "\u00f0"
(*em)["supe"] = "\u2287"
(*em)["ne"] = "\u2260"
(*em)["ni"] = "\u220b"
(*em)["eta"] = "\u03b7"
(*em)["uArr"] = "\u21d1"
(*em)["image"] = "\u2111"
(*em)["asymp"] = "\u2248"
(*em)["oacute"] = "\u00f3"
(*em)["rarr"] = "\u2192"
(*em)["emsp"] = "\u2003"
(*em)["acirc"] = "\u00e2"
(*em)["shy"] = "\u00ad"
(*em)["yuml"] = "\u00ff"
(*em)["acute"] = "\u00b4"
(*em)["int"] = "\u222b"
(*em)["ccedil"] = "\u00e7"
(*em)["Acirc"] = "\u00c2"
(*em)["Ograve"] = "\u00d2"
(*em)["times"] = "\u00d7"
(*em)["weierp"] = "\u2118"
(*em)["Tau"] = "\u03a4"
(*em)["omicron"] = "\u03bf"
(*em)["lt"] = "\u003c"
(*em)["Mu"] = "\u039c"
(*em)["Ucirc"] = "\u00db"
(*em)["sub"] = "\u2282"
(*em)["le"] = "\u2264"
(*em)["sum"] = "\u2211"
(*em)["sup"] = "\u2283"
(*em)["lrm"] = "\u200e"
(*em)["frac34"] = "\u00be"
(*em)["Iota"] = "\u0399"
(*em)["Ugrave"] = "\u00d9"
(*em)["THORN"] = "\u00de"
(*em)["rsaquo"] = "\u203a"
(*em)["not"] = "\u00ac"
(*em)["sigma"] = "\u03c3"
(*em)["iuml"] = "\u00ef"
(*em)["epsilon"] = "\u03b5"
(*em)["spades"] = "\u2660"
(*em)["theta"] = "\u03b8"
(*em)["divide"] = "\u00f7"
(*em)["Atilde"] = "\u00c3"
(*em)["uacute"] = "\u00fa"
(*em)["Rho"] = "\u03a1"
(*em)["trade"] = "\u2122"
(*em)["chi"] = "\u03c7"
(*em)["agrave"] = "\u00e0"
(*em)["or"] = "\u2228"
(*em)["circ"] = "\u02c6"
(*em)["middot"] = "\u00b7"
(*em)["plusmn"] = "\u00b1"
(*em)["aring"] = "\u00e5"
(*em)["lsquo"] = "\u2018"
(*em)["Yacute"] = "\u00dd"
(*em)["oline"] = "\u203e"
(*em)["copy"] = "\u00a9"
(*em)["icirc"] = "\u00ee"
(*em)["lowast"] = "\u2217"
(*em)["Oacute"] = "\u00d3"
(*em)["aacute"] = "\u00e1"
(*em)["oplus"] = "\u2295"
(*em)["crarr"] = "\u21b5"
(*em)["thetasym"] = "\u03d1"
(*em)["Beta"] = "\u0392"
(*em)["laquo"] = "\u00ab"
(*em)["rang"] = "\u232a"
(*em)["tilde"] = "\u02dc"
(*em)["Uuml"] = "\u00dc"
(*em)["zwj"] = "\u200d"
(*em)["mu"] = "\u03bc"
(*em)["Ccedil"] = "\u00c7"
(*em)["infin"] = "\u221e"
(*em)["ouml"] = "\u00f6"
(*em)["rfloor"] = "\u230b"
(*em)["pound"] = "\u00a3"
(*em)["szlig"] = "\u00df"
(*em)["thorn"] = "\u00fe"
(*em)["forall"] = "\u2200"
(*em)["piv"] = "\u03d6"
(*em)["rdquo"] = "\u201d"
(*em)["frac12"] = "\u00bd"
(*em)["frac14"] = "\u00bc"
(*em)["Ocirc"] = "\u00d4"
(*em)["Ecirc"] = "\u00ca"
(*em)["kappa"] = "\u03ba"
(*em)["Euml"] = "\u00cb"
(*em)["minus"] = "\u2212"
(*em)["cong"] = "\u2245"
(*em)["hellip"] = "\u2026"
(*em)["equiv"] = "\u2261"
(*em)["cent"] = "\u00a2"
(*em)["Uacute"] = "\u00da"
(*em)["darr"] = "\u2193"
(*em)["Eta"] = "\u0397"
(*em)["sbquo"] = "\u201a"
(*em)["rArr"] = "\u21d2"
(*em)["igrave"] = "\u00ec"
(*em)["uml"] = "\u00a8"
(*em)["lambda"] = "\u03bb"
(*em)["oelig"] = "\u0153"
(*em)["harr"] = "\u2194"
(*em)["ang"] = "\u2220"
(*em)["clubs"] = "\u2663"
(*em)["and"] = "\u2227"
(*em)["permil"] = "\u2030"
(*em)["larr"] = "\u2190"
(*em)["Yuml"] = "\u0178"
(*em)["cup"] = "\u222a"
(*em)["Xi"] = "\u039e"
(*em)["Alpha"] = "\u0391"
(*em)["phi"] = "\u03c6"
(*em)["ucirc"] = "\u00fb"
(*em)["oslash"] = "\u00f8"
(*em)["rsquo"] = "\u2019"
(*em)["AElig"] = "\u00c6"
(*em)["mdash"] = "\u2014"
(*em)["psi"] = "\u03c8"
(*em)["eacute"] = "\u00e9"
(*em)["otilde"] = "\u00f5"
(*em)["yen"] = "\u00a5"
(*em)["gt"] = "\u003e"
(*em)["Iuml"] = "\u00cf"
(*em)["Prime"] = "\u2033"
(*em)["Chi"] = "\u03a7"
(*em)["ge"] = "\u2265"
(*em)["reg"] = "\u00ae"
(*em)["hearts"] = "\u2665"
(*em)["auml"] = "\u00e4"
(*em)["Agrave"] = "\u00c0"
(*em)["sect"] = "\u00a7"
(*em)["sube"] = "\u2286"
(*em)["sigmaf"] = "\u03c2"
(*em)["Gamma"] = "\u0393"
(*em)["amp"] = "\u0026"
(*em)["ensp"] = "\u2002"
(*em)["ETH"] = "\u00d0"
(*em)["Igrave"] = "\u00cc"
(*em)["Omega"] = "\u03a9"
(*em)["Lambda"] = "\u039b"
(*em)["Omicron"] = "\u039f"
(*em)["there4"] = "\u2234"
(*em)["ntilde"] = "\u00f1"
(*em)["xi"] = "\u03be"
(*em)["dagger"] = "\u2020"
(*em)["egrave"] = "\u00e8"
(*em)["Delta"] = "\u0394"
(*em)["OElig"] = "\u0152"
(*em)["diams"] = "\u2666"
(*em)["ldquo"] = "\u201c"
(*em)["radic"] = "\u221a"
(*em)["Oslash"] = "\u00d8"
(*em)["Ouml"] = "\u00d6"
(*em)["lceil"] = "\u2308"
(*em)["uarr"] = "\u2191"
(*em)["atilde"] = "\u00e3"
(*em)["iquest"] = "\u00bf"
(*em)["lsaquo"] = "\u2039"
(*em)["Epsilon"] = "\u0395"
(*em)["iacute"] = "\u00ed"
(*em)["cap"] = "\u2229"
(*em)["deg"] = "\u00b0"
(*em)["Otilde"] = "\u00d5"
(*em)["zeta"] = "\u03b6"
(*em)["ocirc"] = "\u00f4"
(*em)["scaron"] = "\u0161"
(*em)["ecirc"] = "\u00ea"
(*em)["ordm"] = "\u00ba"
(*em)["tau"] = "\u03c4"
(*em)["Auml"] = "\u00c4"
(*em)["dArr"] = "\u21d3"
(*em)["ordf"] = "\u00aa"
(*em)["alefsym"] = "\u2135"
(*em)["notin"] = "\u2209"
(*em)["Pi"] = "\u03a0"
(*em)["sdot"] = "\u22c5"
(*em)["upsilon"] = "\u03c5"
(*em)["iota"] = "\u03b9"
(*em)["hArr"] = "\u21d4"
(*em)["Sigma"] = "\u03a3"
(*em)["lang"] = "\u2329"
(*em)["curren"] = "\u00a4"
(*em)["Theta"] = "\u0398"
(*em)["lArr"] = "\u21d0"
(*em)["Phi"] = "\u03a6"
(*em)["Nu"] = "\u039d"
(*em)["rho"] = "\u03c1"
(*em)["alpha"] = "\u03b1"
(*em)["iexcl"] = "\u00a1"
(*em)["micro"] = "\u00b5"
(*em)["cedil"] = "\u00b8"
(*em)["Ntilde"] = "\u00d1"
(*em)["Psi"] = "\u03a8"
(*em)["Dagger"] = "\u2021"
(*em)["Egrave"] = "\u00c8"
(*em)["Icirc"] = "\u00ce"
(*em)["nsub"] = "\u2284"
(*em)["bdquo"] = "\u201e"
(*em)["empty"] = "\u2205"
(*em)["aelig"] = "\u00e6"
(*em)["ograve"] = "\u00f2"
(*em)["macr"] = "\u00af"
(*em)["Zeta"] = "\u0396"
(*em)["beta"] = "\u03b2"
(*em)["sim"] = "\u223c"
(*em)["uuml"] = "\u00fc"
(*em)["Aacute"] = "\u00c1"
(*em)["Iacute"] = "\u00cd"
(*em)["exist"] = "\u2203"
(*em)["prime"] = "\u2032"
(*em)["rceil"] = "\u2309"
(*em)["real"] = "\u211c"
(*em)["zwnj"] = "\u200c"
(*em)["bull"] = "\u2022"
(*em)["quot"] = "\u0022"
(*em)["Scaron"] = "\u0160"
(*em)["ugrave"] = "\u00f9"
}

View File

@ -4,19 +4,19 @@ import "os"
import "io"
type ILoader interface {
LoadUrl(string) os.Error;
LoadFile(string) os.Error;
LoadString(string) os.Error;
LoadStream(*io.Reader) os.Error;
LoadUrl(string) os.Error
LoadFile(string) os.Error
LoadString(string) os.Error
LoadStream(*io.Reader) os.Error
}
type ISaver interface {
SaveFile(string) os.Error;
SaveString(string) (string, os.Error);
SaveStream(*io.Writer) os.Error;
SaveFile(string) os.Error
SaveString(string) (string, os.Error)
SaveStream(*io.Writer) os.Error
}
type ILoaderSaver interface {
ILoader;
ISaver;
ILoader
ISaver
}

View File

@ -7,175 +7,219 @@ import "fmt"
import "strconv"
const (
NT_ROOT = 0x00;
NT_DIRECTIVE = 0x01;
NT_PROCINST = 0x02;
NT_COMMENT = 0x03;
NT_ELEMENT = 0x04;
NT_ROOT = 0x00
NT_DIRECTIVE = 0x01
NT_PROCINST = 0x02
NT_COMMENT = 0x03
NT_ELEMENT = 0x04
)
type Attr struct {
Name xml.Name;
Value string;
Name xml.Name
Value string
}
type Node struct {
Type byte;
Name xml.Name;
Children []*Node;
Attributes []Attr;
Parent *Node;
Value string;
Target string; // procinst field
Type byte
Name xml.Name
Children []*Node
Attributes []Attr
Parent *Node
Value string
Target string // procinst field
}
func NewNode(tid byte) *Node { return &Node{Type: tid} }
func NewNode(tid byte) *Node { return &Node{Type: tid} }
// This wraps the standard xml.Unmarshal function and supplies this particular
// This wraps the standard xml.Unmarshal function and supplies this particular
// node as the content to be unmarshalled.
func (this *Node) Unmarshal(obj interface{}) os.Error {
return xml.Unmarshal(strings.NewReader(this.String()), obj);
return xml.Unmarshal(strings.NewReader(this.String()), obj)
}
// Get node value as string
func (this *Node) GetValue(namespace, name string) string {
node := rec_SelectNode(this, namespace, name);
if node == nil { return "" }
return node.Value;
node := rec_SelectNode(this, namespace, name)
if node == nil {
return ""
}
return node.Value
}
// Get node value as int
func (this *Node) GetValuei(namespace, name string) int {
node := rec_SelectNode(this, namespace, name);
if node == nil { return 0 }
if node.Value == "" { return 0 }
n, _ := strconv.Atoi(node.Value);
return n;
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
return 0
}
n, _ := strconv.Atoi(node.Value)
return n
}
// Get node value as int64
func (this *Node) GetValuei64(namespace, name string) int64 {
node := rec_SelectNode(this, namespace, name);
if node == nil { return 0 }
if node.Value == "" { return 0 }
n, _ := strconv.Atoi64(node.Value);
return n;
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
return 0
}
n, _ := strconv.Atoi64(node.Value)
return n
}
// Get node value as uint
func (this *Node) GetValueui(namespace, name string) uint {
node := rec_SelectNode(this, namespace, name);
if node == nil { return 0 }
if node.Value == "" { return 0 }
n, _ := strconv.Atoui(node.Value);
return n;
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
return 0
}
n, _ := strconv.Atoui(node.Value)
return n
}
// Get node value as uint64
func (this *Node) GetValueui64(namespace, name string) uint64 {
node := rec_SelectNode(this, namespace, name);
if node == nil { return 0 }
if node.Value == "" { return 0 }
n, _ := strconv.Atoui64(node.Value);
return n;
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
return 0
}
n, _ := strconv.Atoui64(node.Value)
return n
}
// Get node value as float
func (this *Node) GetValuef(namespace, name string) float {
node := rec_SelectNode(this, namespace, name);
if node == nil { return 0 }
if node.Value == "" { return 0 }
n, _ := strconv.Atof(node.Value);
return n;
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
return 0
}
n, _ := strconv.Atof(node.Value)
return n
}
// Get node value as float32
func (this *Node) GetValuef32(namespace, name string) float32 {
node := rec_SelectNode(this, namespace, name);
if node == nil { return 0 }
if node.Value == "" { return 0 }
n, _ := strconv.Atof32(node.Value);
return n;
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
return 0
}
n, _ := strconv.Atof32(node.Value)
return n
}
// Get node value as float64
func (this *Node) GetValuef64(namespace, name string) float64 {
node := rec_SelectNode(this, namespace, name);
if node == nil { return 0 }
if node.Value == "" { return 0 }
n, _ := strconv.Atof64(node.Value);
return n;
node := rec_SelectNode(this, namespace, name)
if node == nil {
return 0
}
if node.Value == "" {
return 0
}
n, _ := strconv.Atof64(node.Value)
return n
}
// Get attribute value as string
func (this *Node) GetAttr(namespace, name string) string {
for _,v := range this.Attributes {
for _, v := range this.Attributes {
if namespace == v.Name.Space && name == v.Name.Local {
return v.Value;
return v.Value
}
}
return "";
return ""
}
// Get attribute value as int
func (this *Node) GetAttri(namespace, name string) int {
s := this.GetAttr(namespace, name);
if s == "" { return 0 }
n, _ := strconv.Atoi(s);
return n;
s := this.GetAttr(namespace, name)
if s == "" {
return 0
}
n, _ := strconv.Atoi(s)
return n
}
// Get attribute value as uint
func (this *Node) GetAttrui(namespace, name string) uint {
s := this.GetAttr(namespace, name);
if s == "" { return 0 }
n, _ := strconv.Atoui(s);
return n;
s := this.GetAttr(namespace, name)
if s == "" {
return 0
}
n, _ := strconv.Atoui(s)
return n
}
// Get attribute value as uint64
func (this *Node) GetAttrui64(namespace, name string) uint64 {
s := this.GetAttr(namespace, name);
if s == "" { return 0 }
n, _ := strconv.Atoui64(s);
return n;
s := this.GetAttr(namespace, name)
if s == "" {
return 0
}
n, _ := strconv.Atoui64(s)
return n
}
// Get attribute value as int64
func (this *Node) GetAttri64(namespace, name string) int64 {
s := this.GetAttr(namespace, name);
if s == "" { return 0 }
n, _ := strconv.Atoi64(s);
return n;
s := this.GetAttr(namespace, name)
if s == "" {
return 0
}
n, _ := strconv.Atoi64(s)
return n
}
// Get attribute value as float
func (this *Node) GetAttrf(namespace, name string) float {
s := this.GetAttr(namespace, name);
if s == "" { return 0 }
n, _ := strconv.Atof(s);
return n;
s := this.GetAttr(namespace, name)
if s == "" {
return 0
}
n, _ := strconv.Atof(s)
return n
}
// Get attribute value as float32
func (this *Node) GetAttrf32(namespace, name string) float32 {
s := this.GetAttr(namespace, name);
if s == "" { return 0 }
n, _ := strconv.Atof32(s);
return n;
s := this.GetAttr(namespace, name)
if s == "" {
return 0
}
n, _ := strconv.Atof32(s)
return n
}
// Get attribute value as float64
func (this *Node) GetAttrf64(namespace, name string) float64 {
s := this.GetAttr(namespace, name);
if s == "" { return 0 }
n, _ := strconv.Atof64(s);
return n;
s := this.GetAttr(namespace, name)
if s == "" {
return 0
}
n, _ := strconv.Atof64(s)
return n
}
// Returns true if this node has the specified attribute. False otherwise.
func (this *Node) HasAttr(namespace, name string) bool {
for _,v := range this.Attributes {
for _, v := range this.Attributes {
if namespace == v.Name.Space && name == v.Name.Local {
return true
}
@ -185,46 +229,46 @@ func (this *Node) HasAttr(namespace, name string) bool {
// Select single node by name
func (this *Node) SelectNode(namespace, name string) *Node {
return rec_SelectNode(this, namespace, name);
return rec_SelectNode(this, namespace, name)
}
func rec_SelectNode(cn *Node, namespace, name string) *Node {
if cn.Name.Space == namespace && cn.Name.Local == name {
return cn;
return cn
}
for _, v := range cn.Children {
tn := rec_SelectNode(v, namespace, name);
if tn != nil { return tn }
tn := rec_SelectNode(v, namespace, name)
if tn != nil {
return tn
}
}
return nil;
return nil
}
// Select multiple nodes by name
func (this *Node) SelectNodes(namespace, name string) []*Node {
list := make([]*Node, 0);
rec_SelectNodes(this, namespace, name, &list);
return list;
list := make([]*Node, 0)
rec_SelectNodes(this, namespace, name, &list)
return list
}
func rec_SelectNodes(cn *Node, namespace, name string, list *[]*Node) {
if cn.Name.Space == namespace && cn.Name.Local == name {
slice := make([]*Node, len(*list) + 1);
for i,v := range *list {
slice[i] = v;
}
slice[len(slice) - 1] = cn;
*list = slice;
c := make([]*Node, len(*list)+1)
copy(c, *list)
c[len(c)-1] = cn
*list = c
return
}
for _, v := range cn.Children {
rec_SelectNodes(v, namespace, name, list);
rec_SelectNodes(v, namespace, name, list)
}
}
// Convert node to appropriate string representation based on it's @Type.
// Note that NT_ROOT is a special-case empty node used as the root for a
// Note that NT_ROOT is a special-case empty node used as the root for a
// Document. This one has no representation by itself. It merely forwards the
// String() call to it's child nodes.
func (this *Node) String() (s string) {
@ -240,29 +284,29 @@ func (this *Node) String() (s string) {
case NT_ROOT:
s = this.printRoot()
}
return;
return
}
func (this *Node) printRoot() (s string) {
for _, v := range this.Children {
s += v.String()
}
return;
return
}
func (this *Node) printProcInst() (s string) {
s = "<?" + this.Target + " " + this.Value + "?>";
return;
s = "<?" + this.Target + " " + this.Value + "?>"
return
}
func (this *Node) printComment() (s string) {
s = "<!-- " + this.Value + " -->";
return;
s = "<!-- " + this.Value + " -->"
return
}
func (this *Node) printDirective() (s string) {
s = "<!" + this.Value + "!>";
return;
s = "<!" + this.Value + "!>"
return
}
func (this *Node) printElement() (s string) {
@ -281,23 +325,23 @@ func (this *Node) printElement() (s string) {
}
if len(this.Children) == 0 && len(this.Value) == 0 {
s += " />";
return;
s += " />"
return
}
s += ">";
s += ">"
for _, v := range this.Children {
s += v.String()
}
s += this.Value;
s += this.Value
if len(this.Name.Space) > 0 {
s += "</" + this.Name.Space + ":" + this.Name.Local + ">"
} else {
s += "</" + this.Name.Local + ">"
}
return;
return
}
// Add a child node
@ -305,38 +349,32 @@ func (this *Node) AddChild(t *Node) {
if t.Parent != nil {
t.Parent.RemoveChild(t)
}
t.Parent = this;
t.Parent = this
slice := make([]*Node, len(this.Children)+1);
for i, v := range this.Children {
slice[i] = v
}
slice[len(slice)-1] = t;
this.Children = slice;
c := make([]*Node, len(this.Children)+1)
copy(c, this.Children)
c[len(c)-1] = t
this.Children = c
}
// Remove a child node
func (this *Node) RemoveChild(t *Node) {
pos := -1;
p := -1
for i, v := range this.Children {
if v == t {
pos = i;
break;
p = i
break
}
}
if pos == -1 {
if p == -1 {
return
}
slice := make([]*Node, len(this.Children)-1);
idx := 0;
for i, v := range this.Children {
if i != pos {
slice[idx] = v;
idx++;
}
}
c := make([]*Node, len(this.Children)-1)
copy(c, this.Children[0:p])
copy(c[p:], this.Children[p+1:])
this.Children = c
t.Parent = nil;
t.Parent = nil
}

View File

@ -3,108 +3,106 @@ package xmlx
import "testing"
func TestLoadLocal(t *testing.T) {
doc := New();
err := doc.LoadFile("test.xml");
doc := New()
err := doc.LoadFile("test.xml")
if err != nil {
t.Errorf("%s", err);
return;
t.Errorf("%s", err)
return
}
if len(doc.Root.Children) == 0 {
t.Errorf("Root node has no children.");
return;
t.Errorf("Root node has no children.")
return
}
}
func TestLoadRemote(t *testing.T) {
doc := New();
err := doc.LoadUri("http://tldp.org/authors/template/Sample-HOWTO.xml");
doc := New()
err := doc.LoadUri("http://tldp.org/authors/template/Sample-HOWTO.xml")
if err != nil {
t.Errorf("%s", err);
return;
t.Errorf("%s", err)
return
}
if len(doc.Root.Children) == 0 {
t.Errorf("Root node has no children.");
return;
t.Errorf("Root node has no children.")
return
}
}
func TestSave(t *testing.T) {
doc := New();
err := doc.LoadFile("test.xml");
doc := New()
err := doc.LoadFile("test.xml")
if err != nil {
t.Errorf("LoadFile(): %s", err);
return;
t.Errorf("LoadFile(): %s", err)
return
}
err = doc.SaveFile("test1.xml");
err = doc.SaveFile("test1.xml")
if err != nil {
t.Errorf("SaveFile(): %s", err);
return;
t.Errorf("SaveFile(): %s", err)
return
}
}
func TestNodeSearch(t *testing.T) {
doc := New();
err := doc.LoadFile("test.xml");
doc := New()
err := doc.LoadFile("test.xml")
if err != nil {
t.Errorf("LoadFile(): %s", err);
return;
t.Errorf("LoadFile(): %s", err)
return
}
node := doc.SelectNode("", "item");
node := doc.SelectNode("", "item")
if node == nil {
t.Errorf("SelectNode(): No node found.");
return;
t.Errorf("SelectNode(): No node found.")
return
}
nodes := doc.SelectNodes("", "item");
nodes := doc.SelectNodes("", "item")
if len(nodes) == 0 {
t.Errorf("SelectNodes(): no nodes found.");
return;
t.Errorf("SelectNodes(): no nodes found.")
return
}
}
type Image struct {
Title string;
Url string;
Link string;
Width string;
Height string;
Description string;
Title string
Url string
Link string
Width string
Height string
Description string
}
func TestUnmarshal(t *testing.T) {
doc := New();
err := doc.LoadFile("test.xml");
doc := New()
err := doc.LoadFile("test.xml")
if err != nil {
t.Errorf("LoadFile(): %s", err);
return;
t.Errorf("LoadFile(): %s", err)
return
}
node := doc.SelectNode("", "image");
node := doc.SelectNode("", "image")
if node == nil {
t.Errorf("SelectNode(): No node found.");
return;
t.Errorf("SelectNode(): No node found.")
return
}
img := Image{};
err = node.Unmarshal(&img);
img := Image{}
err = node.Unmarshal(&img)
if err != nil {
t.Errorf("Unmarshal(): %s", err);
return;
t.Errorf("Unmarshal(): %s", err)
return
}
if img.Title != "WriteTheWeb" {
t.Errorf("Image.Title has incorrect value. Got '%s', expected 'WriteTheWeb'.", img.Title);
return;
t.Errorf("Image.Title has incorrect value. Got '%s', expected 'WriteTheWeb'.", img.Title)
return
}
}