Refactored some code to be faster and a little more elegant.
This commit is contained in:
		
							parent
							
								
									79794996d0
								
							
						
					
					
						commit
						6d8b8289d8
					
				
					 10 changed files with 438 additions and 420 deletions
				
			
		
							
								
								
									
										12
									
								
								makefile
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								makefile
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,12 @@
 | 
			
		|||
 | 
			
		||||
all:
 | 
			
		||||
	make -C xmlx install
 | 
			
		||||
 | 
			
		||||
test:
 | 
			
		||||
	make -C xmlx test
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
	make -C xmlx clean
 | 
			
		||||
 | 
			
		||||
format:
 | 
			
		||||
	gofmt -w .
 | 
			
		||||
							
								
								
									
										311
									
								
								src/entitymap.go
									
										
									
									
									
								
							
							
						
						
									
										311
									
								
								src/entitymap.go
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -1,311 +0,0 @@
 | 
			
		|||
package xmlx
 | 
			
		||||
 | 
			
		||||
import "fmt"
 | 
			
		||||
import "utf8"
 | 
			
		||||
import "regexp"
 | 
			
		||||
import "strconv"
 | 
			
		||||
 | 
			
		||||
var reg_entity = regexp.MustCompile("^&#[0-9]+;$")
 | 
			
		||||
 | 
			
		||||
// Converts a single numerical html entity to a regular Go utf-token.
 | 
			
		||||
//    ex: "♣" -> "♣"
 | 
			
		||||
func HtmlToUTF8(entity string) string {
 | 
			
		||||
	// Make sure we have a valid entity: {
 | 
			
		||||
	ok := reg_entity.MatchString(entity)
 | 
			
		||||
	if !ok {
 | 
			
		||||
		return ""
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Convert entity to number
 | 
			
		||||
	num, err := strconv.Atoi(entity[2 : len(entity)-1])
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return ""
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	var arr [3]byte
 | 
			
		||||
	size := utf8.EncodeRune(num, &arr)
 | 
			
		||||
	if size == 0 {
 | 
			
		||||
		return ""
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return string(&arr)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Converts a single Go utf-token to it's an Html entity.
 | 
			
		||||
//   ex: "♣" -> "♣"
 | 
			
		||||
func UTF8ToHtml(token string) string {
 | 
			
		||||
	rune, size := utf8.DecodeRuneInString(token)
 | 
			
		||||
	if size == 0 {
 | 
			
		||||
		return ""
 | 
			
		||||
	}
 | 
			
		||||
	return fmt.Sprintf("&#%d;", rune)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
	http://www.w3.org/TR/html4/sgml/entities.html
 | 
			
		||||
 | 
			
		||||
	Portions © International Organization for Standardization 1986
 | 
			
		||||
	Permission to copy in any form is granted for use with
 | 
			
		||||
	conforming SGML systems and applications as defined in
 | 
			
		||||
	ISO 8879, provided this notice is included in all copies.
 | 
			
		||||
 | 
			
		||||
	Fills the supplied map with html entities mapped to their Go utf8
 | 
			
		||||
	equivalents. This map can be assigned to xml.Parser.Entity
 | 
			
		||||
	It will be used to map non-standard xml entities to a proper value.
 | 
			
		||||
	If the parser encounters any unknown entities, it will throw a syntax
 | 
			
		||||
	error and abort the parsing. Hence the ability to supply this map.
 | 
			
		||||
*/
 | 
			
		||||
func loadNonStandardEntities(em *map[string]string) {
 | 
			
		||||
	(*em)["pi"] = "\u03c0"
 | 
			
		||||
	(*em)["nabla"] = "\u2207"
 | 
			
		||||
	(*em)["isin"] = "\u2208"
 | 
			
		||||
	(*em)["loz"] = "\u25ca"
 | 
			
		||||
	(*em)["prop"] = "\u221d"
 | 
			
		||||
	(*em)["para"] = "\u00b6"
 | 
			
		||||
	(*em)["Aring"] = "\u00c5"
 | 
			
		||||
	(*em)["euro"] = "\u20ac"
 | 
			
		||||
	(*em)["sup3"] = "\u00b3"
 | 
			
		||||
	(*em)["sup2"] = "\u00b2"
 | 
			
		||||
	(*em)["sup1"] = "\u00b9"
 | 
			
		||||
	(*em)["prod"] = "\u220f"
 | 
			
		||||
	(*em)["gamma"] = "\u03b3"
 | 
			
		||||
	(*em)["perp"] = "\u22a5"
 | 
			
		||||
	(*em)["lfloor"] = "\u230a"
 | 
			
		||||
	(*em)["fnof"] = "\u0192"
 | 
			
		||||
	(*em)["frasl"] = "\u2044"
 | 
			
		||||
	(*em)["rlm"] = "\u200f"
 | 
			
		||||
	(*em)["omega"] = "\u03c9"
 | 
			
		||||
	(*em)["part"] = "\u2202"
 | 
			
		||||
	(*em)["euml"] = "\u00eb"
 | 
			
		||||
	(*em)["Kappa"] = "\u039a"
 | 
			
		||||
	(*em)["nbsp"] = "\u00a0"
 | 
			
		||||
	(*em)["Eacute"] = "\u00c9"
 | 
			
		||||
	(*em)["brvbar"] = "\u00a6"
 | 
			
		||||
	(*em)["otimes"] = "\u2297"
 | 
			
		||||
	(*em)["ndash"] = "\u2013"
 | 
			
		||||
	(*em)["thinsp"] = "\u2009"
 | 
			
		||||
	(*em)["nu"] = "\u03bd"
 | 
			
		||||
	(*em)["Upsilon"] = "\u03a5"
 | 
			
		||||
	(*em)["upsih"] = "\u03d2"
 | 
			
		||||
	(*em)["raquo"] = "\u00bb"
 | 
			
		||||
	(*em)["yacute"] = "\u00fd"
 | 
			
		||||
	(*em)["delta"] = "\u03b4"
 | 
			
		||||
	(*em)["eth"] = "\u00f0"
 | 
			
		||||
	(*em)["supe"] = "\u2287"
 | 
			
		||||
	(*em)["ne"] = "\u2260"
 | 
			
		||||
	(*em)["ni"] = "\u220b"
 | 
			
		||||
	(*em)["eta"] = "\u03b7"
 | 
			
		||||
	(*em)["uArr"] = "\u21d1"
 | 
			
		||||
	(*em)["image"] = "\u2111"
 | 
			
		||||
	(*em)["asymp"] = "\u2248"
 | 
			
		||||
	(*em)["oacute"] = "\u00f3"
 | 
			
		||||
	(*em)["rarr"] = "\u2192"
 | 
			
		||||
	(*em)["emsp"] = "\u2003"
 | 
			
		||||
	(*em)["acirc"] = "\u00e2"
 | 
			
		||||
	(*em)["shy"] = "\u00ad"
 | 
			
		||||
	(*em)["yuml"] = "\u00ff"
 | 
			
		||||
	(*em)["acute"] = "\u00b4"
 | 
			
		||||
	(*em)["int"] = "\u222b"
 | 
			
		||||
	(*em)["ccedil"] = "\u00e7"
 | 
			
		||||
	(*em)["Acirc"] = "\u00c2"
 | 
			
		||||
	(*em)["Ograve"] = "\u00d2"
 | 
			
		||||
	(*em)["times"] = "\u00d7"
 | 
			
		||||
	(*em)["weierp"] = "\u2118"
 | 
			
		||||
	(*em)["Tau"] = "\u03a4"
 | 
			
		||||
	(*em)["omicron"] = "\u03bf"
 | 
			
		||||
	(*em)["lt"] = "\u003c"
 | 
			
		||||
	(*em)["Mu"] = "\u039c"
 | 
			
		||||
	(*em)["Ucirc"] = "\u00db"
 | 
			
		||||
	(*em)["sub"] = "\u2282"
 | 
			
		||||
	(*em)["le"] = "\u2264"
 | 
			
		||||
	(*em)["sum"] = "\u2211"
 | 
			
		||||
	(*em)["sup"] = "\u2283"
 | 
			
		||||
	(*em)["lrm"] = "\u200e"
 | 
			
		||||
	(*em)["frac34"] = "\u00be"
 | 
			
		||||
	(*em)["Iota"] = "\u0399"
 | 
			
		||||
	(*em)["Ugrave"] = "\u00d9"
 | 
			
		||||
	(*em)["THORN"] = "\u00de"
 | 
			
		||||
	(*em)["rsaquo"] = "\u203a"
 | 
			
		||||
	(*em)["not"] = "\u00ac"
 | 
			
		||||
	(*em)["sigma"] = "\u03c3"
 | 
			
		||||
	(*em)["iuml"] = "\u00ef"
 | 
			
		||||
	(*em)["epsilon"] = "\u03b5"
 | 
			
		||||
	(*em)["spades"] = "\u2660"
 | 
			
		||||
	(*em)["theta"] = "\u03b8"
 | 
			
		||||
	(*em)["divide"] = "\u00f7"
 | 
			
		||||
	(*em)["Atilde"] = "\u00c3"
 | 
			
		||||
	(*em)["uacute"] = "\u00fa"
 | 
			
		||||
	(*em)["Rho"] = "\u03a1"
 | 
			
		||||
	(*em)["trade"] = "\u2122"
 | 
			
		||||
	(*em)["chi"] = "\u03c7"
 | 
			
		||||
	(*em)["agrave"] = "\u00e0"
 | 
			
		||||
	(*em)["or"] = "\u2228"
 | 
			
		||||
	(*em)["circ"] = "\u02c6"
 | 
			
		||||
	(*em)["middot"] = "\u00b7"
 | 
			
		||||
	(*em)["plusmn"] = "\u00b1"
 | 
			
		||||
	(*em)["aring"] = "\u00e5"
 | 
			
		||||
	(*em)["lsquo"] = "\u2018"
 | 
			
		||||
	(*em)["Yacute"] = "\u00dd"
 | 
			
		||||
	(*em)["oline"] = "\u203e"
 | 
			
		||||
	(*em)["copy"] = "\u00a9"
 | 
			
		||||
	(*em)["icirc"] = "\u00ee"
 | 
			
		||||
	(*em)["lowast"] = "\u2217"
 | 
			
		||||
	(*em)["Oacute"] = "\u00d3"
 | 
			
		||||
	(*em)["aacute"] = "\u00e1"
 | 
			
		||||
	(*em)["oplus"] = "\u2295"
 | 
			
		||||
	(*em)["crarr"] = "\u21b5"
 | 
			
		||||
	(*em)["thetasym"] = "\u03d1"
 | 
			
		||||
	(*em)["Beta"] = "\u0392"
 | 
			
		||||
	(*em)["laquo"] = "\u00ab"
 | 
			
		||||
	(*em)["rang"] = "\u232a"
 | 
			
		||||
	(*em)["tilde"] = "\u02dc"
 | 
			
		||||
	(*em)["Uuml"] = "\u00dc"
 | 
			
		||||
	(*em)["zwj"] = "\u200d"
 | 
			
		||||
	(*em)["mu"] = "\u03bc"
 | 
			
		||||
	(*em)["Ccedil"] = "\u00c7"
 | 
			
		||||
	(*em)["infin"] = "\u221e"
 | 
			
		||||
	(*em)["ouml"] = "\u00f6"
 | 
			
		||||
	(*em)["rfloor"] = "\u230b"
 | 
			
		||||
	(*em)["pound"] = "\u00a3"
 | 
			
		||||
	(*em)["szlig"] = "\u00df"
 | 
			
		||||
	(*em)["thorn"] = "\u00fe"
 | 
			
		||||
	(*em)["forall"] = "\u2200"
 | 
			
		||||
	(*em)["piv"] = "\u03d6"
 | 
			
		||||
	(*em)["rdquo"] = "\u201d"
 | 
			
		||||
	(*em)["frac12"] = "\u00bd"
 | 
			
		||||
	(*em)["frac14"] = "\u00bc"
 | 
			
		||||
	(*em)["Ocirc"] = "\u00d4"
 | 
			
		||||
	(*em)["Ecirc"] = "\u00ca"
 | 
			
		||||
	(*em)["kappa"] = "\u03ba"
 | 
			
		||||
	(*em)["Euml"] = "\u00cb"
 | 
			
		||||
	(*em)["minus"] = "\u2212"
 | 
			
		||||
	(*em)["cong"] = "\u2245"
 | 
			
		||||
	(*em)["hellip"] = "\u2026"
 | 
			
		||||
	(*em)["equiv"] = "\u2261"
 | 
			
		||||
	(*em)["cent"] = "\u00a2"
 | 
			
		||||
	(*em)["Uacute"] = "\u00da"
 | 
			
		||||
	(*em)["darr"] = "\u2193"
 | 
			
		||||
	(*em)["Eta"] = "\u0397"
 | 
			
		||||
	(*em)["sbquo"] = "\u201a"
 | 
			
		||||
	(*em)["rArr"] = "\u21d2"
 | 
			
		||||
	(*em)["igrave"] = "\u00ec"
 | 
			
		||||
	(*em)["uml"] = "\u00a8"
 | 
			
		||||
	(*em)["lambda"] = "\u03bb"
 | 
			
		||||
	(*em)["oelig"] = "\u0153"
 | 
			
		||||
	(*em)["harr"] = "\u2194"
 | 
			
		||||
	(*em)["ang"] = "\u2220"
 | 
			
		||||
	(*em)["clubs"] = "\u2663"
 | 
			
		||||
	(*em)["and"] = "\u2227"
 | 
			
		||||
	(*em)["permil"] = "\u2030"
 | 
			
		||||
	(*em)["larr"] = "\u2190"
 | 
			
		||||
	(*em)["Yuml"] = "\u0178"
 | 
			
		||||
	(*em)["cup"] = "\u222a"
 | 
			
		||||
	(*em)["Xi"] = "\u039e"
 | 
			
		||||
	(*em)["Alpha"] = "\u0391"
 | 
			
		||||
	(*em)["phi"] = "\u03c6"
 | 
			
		||||
	(*em)["ucirc"] = "\u00fb"
 | 
			
		||||
	(*em)["oslash"] = "\u00f8"
 | 
			
		||||
	(*em)["rsquo"] = "\u2019"
 | 
			
		||||
	(*em)["AElig"] = "\u00c6"
 | 
			
		||||
	(*em)["mdash"] = "\u2014"
 | 
			
		||||
	(*em)["psi"] = "\u03c8"
 | 
			
		||||
	(*em)["eacute"] = "\u00e9"
 | 
			
		||||
	(*em)["otilde"] = "\u00f5"
 | 
			
		||||
	(*em)["yen"] = "\u00a5"
 | 
			
		||||
	(*em)["gt"] = "\u003e"
 | 
			
		||||
	(*em)["Iuml"] = "\u00cf"
 | 
			
		||||
	(*em)["Prime"] = "\u2033"
 | 
			
		||||
	(*em)["Chi"] = "\u03a7"
 | 
			
		||||
	(*em)["ge"] = "\u2265"
 | 
			
		||||
	(*em)["reg"] = "\u00ae"
 | 
			
		||||
	(*em)["hearts"] = "\u2665"
 | 
			
		||||
	(*em)["auml"] = "\u00e4"
 | 
			
		||||
	(*em)["Agrave"] = "\u00c0"
 | 
			
		||||
	(*em)["sect"] = "\u00a7"
 | 
			
		||||
	(*em)["sube"] = "\u2286"
 | 
			
		||||
	(*em)["sigmaf"] = "\u03c2"
 | 
			
		||||
	(*em)["Gamma"] = "\u0393"
 | 
			
		||||
	(*em)["amp"] = "\u0026"
 | 
			
		||||
	(*em)["ensp"] = "\u2002"
 | 
			
		||||
	(*em)["ETH"] = "\u00d0"
 | 
			
		||||
	(*em)["Igrave"] = "\u00cc"
 | 
			
		||||
	(*em)["Omega"] = "\u03a9"
 | 
			
		||||
	(*em)["Lambda"] = "\u039b"
 | 
			
		||||
	(*em)["Omicron"] = "\u039f"
 | 
			
		||||
	(*em)["there4"] = "\u2234"
 | 
			
		||||
	(*em)["ntilde"] = "\u00f1"
 | 
			
		||||
	(*em)["xi"] = "\u03be"
 | 
			
		||||
	(*em)["dagger"] = "\u2020"
 | 
			
		||||
	(*em)["egrave"] = "\u00e8"
 | 
			
		||||
	(*em)["Delta"] = "\u0394"
 | 
			
		||||
	(*em)["OElig"] = "\u0152"
 | 
			
		||||
	(*em)["diams"] = "\u2666"
 | 
			
		||||
	(*em)["ldquo"] = "\u201c"
 | 
			
		||||
	(*em)["radic"] = "\u221a"
 | 
			
		||||
	(*em)["Oslash"] = "\u00d8"
 | 
			
		||||
	(*em)["Ouml"] = "\u00d6"
 | 
			
		||||
	(*em)["lceil"] = "\u2308"
 | 
			
		||||
	(*em)["uarr"] = "\u2191"
 | 
			
		||||
	(*em)["atilde"] = "\u00e3"
 | 
			
		||||
	(*em)["iquest"] = "\u00bf"
 | 
			
		||||
	(*em)["lsaquo"] = "\u2039"
 | 
			
		||||
	(*em)["Epsilon"] = "\u0395"
 | 
			
		||||
	(*em)["iacute"] = "\u00ed"
 | 
			
		||||
	(*em)["cap"] = "\u2229"
 | 
			
		||||
	(*em)["deg"] = "\u00b0"
 | 
			
		||||
	(*em)["Otilde"] = "\u00d5"
 | 
			
		||||
	(*em)["zeta"] = "\u03b6"
 | 
			
		||||
	(*em)["ocirc"] = "\u00f4"
 | 
			
		||||
	(*em)["scaron"] = "\u0161"
 | 
			
		||||
	(*em)["ecirc"] = "\u00ea"
 | 
			
		||||
	(*em)["ordm"] = "\u00ba"
 | 
			
		||||
	(*em)["tau"] = "\u03c4"
 | 
			
		||||
	(*em)["Auml"] = "\u00c4"
 | 
			
		||||
	(*em)["dArr"] = "\u21d3"
 | 
			
		||||
	(*em)["ordf"] = "\u00aa"
 | 
			
		||||
	(*em)["alefsym"] = "\u2135"
 | 
			
		||||
	(*em)["notin"] = "\u2209"
 | 
			
		||||
	(*em)["Pi"] = "\u03a0"
 | 
			
		||||
	(*em)["sdot"] = "\u22c5"
 | 
			
		||||
	(*em)["upsilon"] = "\u03c5"
 | 
			
		||||
	(*em)["iota"] = "\u03b9"
 | 
			
		||||
	(*em)["hArr"] = "\u21d4"
 | 
			
		||||
	(*em)["Sigma"] = "\u03a3"
 | 
			
		||||
	(*em)["lang"] = "\u2329"
 | 
			
		||||
	(*em)["curren"] = "\u00a4"
 | 
			
		||||
	(*em)["Theta"] = "\u0398"
 | 
			
		||||
	(*em)["lArr"] = "\u21d0"
 | 
			
		||||
	(*em)["Phi"] = "\u03a6"
 | 
			
		||||
	(*em)["Nu"] = "\u039d"
 | 
			
		||||
	(*em)["rho"] = "\u03c1"
 | 
			
		||||
	(*em)["alpha"] = "\u03b1"
 | 
			
		||||
	(*em)["iexcl"] = "\u00a1"
 | 
			
		||||
	(*em)["micro"] = "\u00b5"
 | 
			
		||||
	(*em)["cedil"] = "\u00b8"
 | 
			
		||||
	(*em)["Ntilde"] = "\u00d1"
 | 
			
		||||
	(*em)["Psi"] = "\u03a8"
 | 
			
		||||
	(*em)["Dagger"] = "\u2021"
 | 
			
		||||
	(*em)["Egrave"] = "\u00c8"
 | 
			
		||||
	(*em)["Icirc"] = "\u00ce"
 | 
			
		||||
	(*em)["nsub"] = "\u2284"
 | 
			
		||||
	(*em)["bdquo"] = "\u201e"
 | 
			
		||||
	(*em)["empty"] = "\u2205"
 | 
			
		||||
	(*em)["aelig"] = "\u00e6"
 | 
			
		||||
	(*em)["ograve"] = "\u00f2"
 | 
			
		||||
	(*em)["macr"] = "\u00af"
 | 
			
		||||
	(*em)["Zeta"] = "\u0396"
 | 
			
		||||
	(*em)["beta"] = "\u03b2"
 | 
			
		||||
	(*em)["sim"] = "\u223c"
 | 
			
		||||
	(*em)["uuml"] = "\u00fc"
 | 
			
		||||
	(*em)["Aacute"] = "\u00c1"
 | 
			
		||||
	(*em)["Iacute"] = "\u00cd"
 | 
			
		||||
	(*em)["exist"] = "\u2203"
 | 
			
		||||
	(*em)["prime"] = "\u2032"
 | 
			
		||||
	(*em)["rceil"] = "\u2309"
 | 
			
		||||
	(*em)["real"] = "\u211c"
 | 
			
		||||
	(*em)["zwnj"] = "\u200c"
 | 
			
		||||
	(*em)["bull"] = "\u2022"
 | 
			
		||||
	(*em)["quot"] = "\u0022"
 | 
			
		||||
	(*em)["Scaron"] = "\u0160"
 | 
			
		||||
	(*em)["ugrave"] = "\u00f9"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,8 +1,6 @@
 | 
			
		|||
 | 
			
		||||
include $(GOROOT)/src/Make.$(GOARCH)
 | 
			
		||||
 | 
			
		||||
TARG=xmlx
 | 
			
		||||
GOFILES=document.go node.go io.go entitymap.go\
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
include $(GOROOT)/src/Make.pkg
 | 
			
		||||
| 
						 | 
				
			
			@ -29,6 +29,7 @@ package xmlx
 | 
			
		|||
 | 
			
		||||
import "os"
 | 
			
		||||
import "io"
 | 
			
		||||
import "bytes"
 | 
			
		||||
import "io/ioutil"
 | 
			
		||||
import "path"
 | 
			
		||||
import "strings"
 | 
			
		||||
| 
						 | 
				
			
			@ -62,7 +63,7 @@ func New() *Document {
 | 
			
		|||
// set only those entities needed manually using the document.Entity map, but
 | 
			
		||||
// if need be, this method can be called to fill the map with the entire set
 | 
			
		||||
// defined on http://www.w3.org/TR/html4/sgml/entities.html
 | 
			
		||||
func (this *Document) LoadExtendedEntityMap() { loadNonStandardEntities(&this.Entity) }
 | 
			
		||||
func (this *Document) LoadExtendedEntityMap() { loadNonStandardEntities(this.Entity) }
 | 
			
		||||
 | 
			
		||||
func (this *Document) String() string {
 | 
			
		||||
	s, _ := this.SaveString()
 | 
			
		||||
| 
						 | 
				
			
			@ -88,6 +89,11 @@ func (this *Document) LoadString(s string) (err os.Error) {
 | 
			
		|||
	ct := this.Root
 | 
			
		||||
 | 
			
		||||
	var tok xml.Token
 | 
			
		||||
	var t *Node
 | 
			
		||||
	var i int
 | 
			
		||||
	var doctype string
 | 
			
		||||
	var v xml.Attr
 | 
			
		||||
 | 
			
		||||
	for {
 | 
			
		||||
		if tok, err = xp.Token(); err != nil {
 | 
			
		||||
			if err == os.EOF {
 | 
			
		||||
| 
						 | 
				
			
			@ -104,20 +110,21 @@ func (this *Document) LoadString(s string) (err os.Error) {
 | 
			
		|||
		case xml.SyntaxError:
 | 
			
		||||
			return os.NewError(tt.String())
 | 
			
		||||
		case xml.CharData:
 | 
			
		||||
			ct.Value = strings.TrimSpace(string(tt))
 | 
			
		||||
			ct.Value = strings.TrimSpace(string([]byte(tt)))
 | 
			
		||||
		case xml.Comment:
 | 
			
		||||
			t := NewNode(NT_COMMENT)
 | 
			
		||||
			t.Value = strings.TrimSpace(string(tt))
 | 
			
		||||
			t.Value = strings.TrimSpace(string([]byte(tt)))
 | 
			
		||||
			ct.AddChild(t)
 | 
			
		||||
		case xml.Directive:
 | 
			
		||||
			t := NewNode(NT_DIRECTIVE)
 | 
			
		||||
			t.Value = strings.TrimSpace(string(tt))
 | 
			
		||||
			t = NewNode(NT_DIRECTIVE)
 | 
			
		||||
			t.Value = strings.TrimSpace(string([]byte(tt)))
 | 
			
		||||
			ct.AddChild(t)
 | 
			
		||||
		case xml.StartElement:
 | 
			
		||||
			t := NewNode(NT_ELEMENT)
 | 
			
		||||
			t = NewNode(NT_ELEMENT)
 | 
			
		||||
			t.Name = tt.Name
 | 
			
		||||
			t.Attributes = make([]Attr, len(tt.Attr))
 | 
			
		||||
			for i, v := range tt.Attr {
 | 
			
		||||
			t.Attributes = make([]*Attr, len(tt.Attr))
 | 
			
		||||
			for i, v = range tt.Attr {
 | 
			
		||||
				t.Attributes[i] = new(Attr)
 | 
			
		||||
				t.Attributes[i].Name = v.Name
 | 
			
		||||
				t.Attributes[i].Value = v.Value
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -125,15 +132,14 @@ func (this *Document) LoadString(s string) (err os.Error) {
 | 
			
		|||
			ct = t
 | 
			
		||||
		case xml.ProcInst:
 | 
			
		||||
			if tt.Target == "xml" { // xml doctype
 | 
			
		||||
				doctype := strings.TrimSpace(string(tt.Inst))
 | 
			
		||||
				pos := strings.Index(doctype, `standalone="`)
 | 
			
		||||
				if pos > -1 {
 | 
			
		||||
					this.StandAlone = doctype[pos+len(`standalone="`) : len(doctype)]
 | 
			
		||||
					pos = strings.Index(this.StandAlone, `"`)
 | 
			
		||||
					this.StandAlone = this.StandAlone[0:pos]
 | 
			
		||||
				doctype = strings.TrimSpace(string(tt.Inst))
 | 
			
		||||
				if i = strings.Index(doctype, `standalone="`); i > -1 {
 | 
			
		||||
					this.StandAlone = doctype[i+len(`standalone="`) : len(doctype)]
 | 
			
		||||
					i = strings.Index(this.StandAlone, `"`)
 | 
			
		||||
					this.StandAlone = this.StandAlone[0:i]
 | 
			
		||||
				}
 | 
			
		||||
			} else {
 | 
			
		||||
				t := NewNode(NT_PROCINST)
 | 
			
		||||
				t = NewNode(NT_PROCINST)
 | 
			
		||||
				t.Target = strings.TrimSpace(tt.Target)
 | 
			
		||||
				t.Value = strings.TrimSpace(string(tt.Inst))
 | 
			
		||||
				ct.AddChild(t)
 | 
			
		||||
| 
						 | 
				
			
			@ -176,17 +182,20 @@ func (this *Document) LoadUri(uri string) (err os.Error) {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
func (this *Document) LoadStream(r *io.Reader) (err os.Error) {
 | 
			
		||||
	content := ""
 | 
			
		||||
	buff := make([]byte, 256)
 | 
			
		||||
	var data []byte
 | 
			
		||||
 | 
			
		||||
	t := bytes.NewBuffer(data)
 | 
			
		||||
	s := make([]byte, 1024)
 | 
			
		||||
 | 
			
		||||
	for {
 | 
			
		||||
		_, err := r.Read(buff)
 | 
			
		||||
		_, err := r.Read(s)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		content += string(buff)
 | 
			
		||||
		t.Write(s)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	err = this.LoadString(content)
 | 
			
		||||
	err = this.LoadString(t.String())
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -194,19 +203,12 @@ func (this *Document) LoadStream(r *io.Reader) (err os.Error) {
 | 
			
		|||
// *** Satisfy ISaver interface
 | 
			
		||||
// *****************************************************************************
 | 
			
		||||
func (this *Document) SaveFile(path string) (err os.Error) {
 | 
			
		||||
	file, err := os.Open(path, os.O_WRONLY|os.O_CREAT, 0600)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	defer file.Close()
 | 
			
		||||
 | 
			
		||||
	content, err := this.SaveString()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
	var data string
 | 
			
		||||
	if data, err = this.SaveString(); err != nil {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	file.Write([]byte(content))
 | 
			
		||||
	return
 | 
			
		||||
	return ioutil.WriteFile(path, []byte(data), 0600)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *Document) SaveString() (s string, err os.Error) {
 | 
			
		||||
							
								
								
									
										311
									
								
								xmlx/entitymap.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										311
									
								
								xmlx/entitymap.go
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,311 @@
 | 
			
		|||
package xmlx
 | 
			
		||||
 | 
			
		||||
import "fmt"
 | 
			
		||||
import "utf8"
 | 
			
		||||
import "regexp"
 | 
			
		||||
import "strconv"
 | 
			
		||||
 | 
			
		||||
var reg_entity = regexp.MustCompile("^&#[0-9]+;$")
 | 
			
		||||
 | 
			
		||||
// Converts a single numerical html entity to a regular Go utf-token.
 | 
			
		||||
//    ex: "♣" -> "♣"
 | 
			
		||||
func HtmlToUTF8(entity string) string {
 | 
			
		||||
	// Make sure we have a valid entity: {
 | 
			
		||||
	ok := reg_entity.MatchString(entity)
 | 
			
		||||
	if !ok {
 | 
			
		||||
		return ""
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Convert entity to number
 | 
			
		||||
	num, err := strconv.Atoi(entity[2 : len(entity)-1])
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return ""
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	var arr []byte
 | 
			
		||||
	size := utf8.EncodeRune(num, arr)
 | 
			
		||||
	if size == 0 {
 | 
			
		||||
		return ""
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return string(arr)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Converts a single Go utf-token to it's an Html entity.
 | 
			
		||||
//   ex: "♣" -> "♣"
 | 
			
		||||
func UTF8ToHtml(token string) string {
 | 
			
		||||
	rune, size := utf8.DecodeRuneInString(token)
 | 
			
		||||
	if size == 0 {
 | 
			
		||||
		return ""
 | 
			
		||||
	}
 | 
			
		||||
	return fmt.Sprintf("&#%d;", rune)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
	http://www.w3.org/TR/html4/sgml/entities.html
 | 
			
		||||
 | 
			
		||||
	Portions © International Organization for Standardization 1986
 | 
			
		||||
	Permission to copy in any form is granted for use with
 | 
			
		||||
	conforming SGML systems and applications as defined in
 | 
			
		||||
	ISO 8879, provided this notice is included in all copies.
 | 
			
		||||
 | 
			
		||||
	Fills the supplied map with html entities mapped to their Go utf8
 | 
			
		||||
	equivalents. This map can be assigned to xml.Parser.Entity
 | 
			
		||||
	It will be used to map non-standard xml entities to a proper value.
 | 
			
		||||
	If the parser encounters any unknown entities, it will throw a syntax
 | 
			
		||||
	error and abort the parsing. Hence the ability to supply this map.
 | 
			
		||||
*/
 | 
			
		||||
func loadNonStandardEntities(em map[string]string) {
 | 
			
		||||
	em["pi"] = "\u03c0"
 | 
			
		||||
	em["nabla"] = "\u2207"
 | 
			
		||||
	em["isin"] = "\u2208"
 | 
			
		||||
	em["loz"] = "\u25ca"
 | 
			
		||||
	em["prop"] = "\u221d"
 | 
			
		||||
	em["para"] = "\u00b6"
 | 
			
		||||
	em["Aring"] = "\u00c5"
 | 
			
		||||
	em["euro"] = "\u20ac"
 | 
			
		||||
	em["sup3"] = "\u00b3"
 | 
			
		||||
	em["sup2"] = "\u00b2"
 | 
			
		||||
	em["sup1"] = "\u00b9"
 | 
			
		||||
	em["prod"] = "\u220f"
 | 
			
		||||
	em["gamma"] = "\u03b3"
 | 
			
		||||
	em["perp"] = "\u22a5"
 | 
			
		||||
	em["lfloor"] = "\u230a"
 | 
			
		||||
	em["fnof"] = "\u0192"
 | 
			
		||||
	em["frasl"] = "\u2044"
 | 
			
		||||
	em["rlm"] = "\u200f"
 | 
			
		||||
	em["omega"] = "\u03c9"
 | 
			
		||||
	em["part"] = "\u2202"
 | 
			
		||||
	em["euml"] = "\u00eb"
 | 
			
		||||
	em["Kappa"] = "\u039a"
 | 
			
		||||
	em["nbsp"] = "\u00a0"
 | 
			
		||||
	em["Eacute"] = "\u00c9"
 | 
			
		||||
	em["brvbar"] = "\u00a6"
 | 
			
		||||
	em["otimes"] = "\u2297"
 | 
			
		||||
	em["ndash"] = "\u2013"
 | 
			
		||||
	em["thinsp"] = "\u2009"
 | 
			
		||||
	em["nu"] = "\u03bd"
 | 
			
		||||
	em["Upsilon"] = "\u03a5"
 | 
			
		||||
	em["upsih"] = "\u03d2"
 | 
			
		||||
	em["raquo"] = "\u00bb"
 | 
			
		||||
	em["yacute"] = "\u00fd"
 | 
			
		||||
	em["delta"] = "\u03b4"
 | 
			
		||||
	em["eth"] = "\u00f0"
 | 
			
		||||
	em["supe"] = "\u2287"
 | 
			
		||||
	em["ne"] = "\u2260"
 | 
			
		||||
	em["ni"] = "\u220b"
 | 
			
		||||
	em["eta"] = "\u03b7"
 | 
			
		||||
	em["uArr"] = "\u21d1"
 | 
			
		||||
	em["image"] = "\u2111"
 | 
			
		||||
	em["asymp"] = "\u2248"
 | 
			
		||||
	em["oacute"] = "\u00f3"
 | 
			
		||||
	em["rarr"] = "\u2192"
 | 
			
		||||
	em["emsp"] = "\u2003"
 | 
			
		||||
	em["acirc"] = "\u00e2"
 | 
			
		||||
	em["shy"] = "\u00ad"
 | 
			
		||||
	em["yuml"] = "\u00ff"
 | 
			
		||||
	em["acute"] = "\u00b4"
 | 
			
		||||
	em["int"] = "\u222b"
 | 
			
		||||
	em["ccedil"] = "\u00e7"
 | 
			
		||||
	em["Acirc"] = "\u00c2"
 | 
			
		||||
	em["Ograve"] = "\u00d2"
 | 
			
		||||
	em["times"] = "\u00d7"
 | 
			
		||||
	em["weierp"] = "\u2118"
 | 
			
		||||
	em["Tau"] = "\u03a4"
 | 
			
		||||
	em["omicron"] = "\u03bf"
 | 
			
		||||
	em["lt"] = "\u003c"
 | 
			
		||||
	em["Mu"] = "\u039c"
 | 
			
		||||
	em["Ucirc"] = "\u00db"
 | 
			
		||||
	em["sub"] = "\u2282"
 | 
			
		||||
	em["le"] = "\u2264"
 | 
			
		||||
	em["sum"] = "\u2211"
 | 
			
		||||
	em["sup"] = "\u2283"
 | 
			
		||||
	em["lrm"] = "\u200e"
 | 
			
		||||
	em["frac34"] = "\u00be"
 | 
			
		||||
	em["Iota"] = "\u0399"
 | 
			
		||||
	em["Ugrave"] = "\u00d9"
 | 
			
		||||
	em["THORN"] = "\u00de"
 | 
			
		||||
	em["rsaquo"] = "\u203a"
 | 
			
		||||
	em["not"] = "\u00ac"
 | 
			
		||||
	em["sigma"] = "\u03c3"
 | 
			
		||||
	em["iuml"] = "\u00ef"
 | 
			
		||||
	em["epsilon"] = "\u03b5"
 | 
			
		||||
	em["spades"] = "\u2660"
 | 
			
		||||
	em["theta"] = "\u03b8"
 | 
			
		||||
	em["divide"] = "\u00f7"
 | 
			
		||||
	em["Atilde"] = "\u00c3"
 | 
			
		||||
	em["uacute"] = "\u00fa"
 | 
			
		||||
	em["Rho"] = "\u03a1"
 | 
			
		||||
	em["trade"] = "\u2122"
 | 
			
		||||
	em["chi"] = "\u03c7"
 | 
			
		||||
	em["agrave"] = "\u00e0"
 | 
			
		||||
	em["or"] = "\u2228"
 | 
			
		||||
	em["circ"] = "\u02c6"
 | 
			
		||||
	em["middot"] = "\u00b7"
 | 
			
		||||
	em["plusmn"] = "\u00b1"
 | 
			
		||||
	em["aring"] = "\u00e5"
 | 
			
		||||
	em["lsquo"] = "\u2018"
 | 
			
		||||
	em["Yacute"] = "\u00dd"
 | 
			
		||||
	em["oline"] = "\u203e"
 | 
			
		||||
	em["copy"] = "\u00a9"
 | 
			
		||||
	em["icirc"] = "\u00ee"
 | 
			
		||||
	em["lowast"] = "\u2217"
 | 
			
		||||
	em["Oacute"] = "\u00d3"
 | 
			
		||||
	em["aacute"] = "\u00e1"
 | 
			
		||||
	em["oplus"] = "\u2295"
 | 
			
		||||
	em["crarr"] = "\u21b5"
 | 
			
		||||
	em["thetasym"] = "\u03d1"
 | 
			
		||||
	em["Beta"] = "\u0392"
 | 
			
		||||
	em["laquo"] = "\u00ab"
 | 
			
		||||
	em["rang"] = "\u232a"
 | 
			
		||||
	em["tilde"] = "\u02dc"
 | 
			
		||||
	em["Uuml"] = "\u00dc"
 | 
			
		||||
	em["zwj"] = "\u200d"
 | 
			
		||||
	em["mu"] = "\u03bc"
 | 
			
		||||
	em["Ccedil"] = "\u00c7"
 | 
			
		||||
	em["infin"] = "\u221e"
 | 
			
		||||
	em["ouml"] = "\u00f6"
 | 
			
		||||
	em["rfloor"] = "\u230b"
 | 
			
		||||
	em["pound"] = "\u00a3"
 | 
			
		||||
	em["szlig"] = "\u00df"
 | 
			
		||||
	em["thorn"] = "\u00fe"
 | 
			
		||||
	em["forall"] = "\u2200"
 | 
			
		||||
	em["piv"] = "\u03d6"
 | 
			
		||||
	em["rdquo"] = "\u201d"
 | 
			
		||||
	em["frac12"] = "\u00bd"
 | 
			
		||||
	em["frac14"] = "\u00bc"
 | 
			
		||||
	em["Ocirc"] = "\u00d4"
 | 
			
		||||
	em["Ecirc"] = "\u00ca"
 | 
			
		||||
	em["kappa"] = "\u03ba"
 | 
			
		||||
	em["Euml"] = "\u00cb"
 | 
			
		||||
	em["minus"] = "\u2212"
 | 
			
		||||
	em["cong"] = "\u2245"
 | 
			
		||||
	em["hellip"] = "\u2026"
 | 
			
		||||
	em["equiv"] = "\u2261"
 | 
			
		||||
	em["cent"] = "\u00a2"
 | 
			
		||||
	em["Uacute"] = "\u00da"
 | 
			
		||||
	em["darr"] = "\u2193"
 | 
			
		||||
	em["Eta"] = "\u0397"
 | 
			
		||||
	em["sbquo"] = "\u201a"
 | 
			
		||||
	em["rArr"] = "\u21d2"
 | 
			
		||||
	em["igrave"] = "\u00ec"
 | 
			
		||||
	em["uml"] = "\u00a8"
 | 
			
		||||
	em["lambda"] = "\u03bb"
 | 
			
		||||
	em["oelig"] = "\u0153"
 | 
			
		||||
	em["harr"] = "\u2194"
 | 
			
		||||
	em["ang"] = "\u2220"
 | 
			
		||||
	em["clubs"] = "\u2663"
 | 
			
		||||
	em["and"] = "\u2227"
 | 
			
		||||
	em["permil"] = "\u2030"
 | 
			
		||||
	em["larr"] = "\u2190"
 | 
			
		||||
	em["Yuml"] = "\u0178"
 | 
			
		||||
	em["cup"] = "\u222a"
 | 
			
		||||
	em["Xi"] = "\u039e"
 | 
			
		||||
	em["Alpha"] = "\u0391"
 | 
			
		||||
	em["phi"] = "\u03c6"
 | 
			
		||||
	em["ucirc"] = "\u00fb"
 | 
			
		||||
	em["oslash"] = "\u00f8"
 | 
			
		||||
	em["rsquo"] = "\u2019"
 | 
			
		||||
	em["AElig"] = "\u00c6"
 | 
			
		||||
	em["mdash"] = "\u2014"
 | 
			
		||||
	em["psi"] = "\u03c8"
 | 
			
		||||
	em["eacute"] = "\u00e9"
 | 
			
		||||
	em["otilde"] = "\u00f5"
 | 
			
		||||
	em["yen"] = "\u00a5"
 | 
			
		||||
	em["gt"] = "\u003e"
 | 
			
		||||
	em["Iuml"] = "\u00cf"
 | 
			
		||||
	em["Prime"] = "\u2033"
 | 
			
		||||
	em["Chi"] = "\u03a7"
 | 
			
		||||
	em["ge"] = "\u2265"
 | 
			
		||||
	em["reg"] = "\u00ae"
 | 
			
		||||
	em["hearts"] = "\u2665"
 | 
			
		||||
	em["auml"] = "\u00e4"
 | 
			
		||||
	em["Agrave"] = "\u00c0"
 | 
			
		||||
	em["sect"] = "\u00a7"
 | 
			
		||||
	em["sube"] = "\u2286"
 | 
			
		||||
	em["sigmaf"] = "\u03c2"
 | 
			
		||||
	em["Gamma"] = "\u0393"
 | 
			
		||||
	em["amp"] = "\u0026"
 | 
			
		||||
	em["ensp"] = "\u2002"
 | 
			
		||||
	em["ETH"] = "\u00d0"
 | 
			
		||||
	em["Igrave"] = "\u00cc"
 | 
			
		||||
	em["Omega"] = "\u03a9"
 | 
			
		||||
	em["Lambda"] = "\u039b"
 | 
			
		||||
	em["Omicron"] = "\u039f"
 | 
			
		||||
	em["there4"] = "\u2234"
 | 
			
		||||
	em["ntilde"] = "\u00f1"
 | 
			
		||||
	em["xi"] = "\u03be"
 | 
			
		||||
	em["dagger"] = "\u2020"
 | 
			
		||||
	em["egrave"] = "\u00e8"
 | 
			
		||||
	em["Delta"] = "\u0394"
 | 
			
		||||
	em["OElig"] = "\u0152"
 | 
			
		||||
	em["diams"] = "\u2666"
 | 
			
		||||
	em["ldquo"] = "\u201c"
 | 
			
		||||
	em["radic"] = "\u221a"
 | 
			
		||||
	em["Oslash"] = "\u00d8"
 | 
			
		||||
	em["Ouml"] = "\u00d6"
 | 
			
		||||
	em["lceil"] = "\u2308"
 | 
			
		||||
	em["uarr"] = "\u2191"
 | 
			
		||||
	em["atilde"] = "\u00e3"
 | 
			
		||||
	em["iquest"] = "\u00bf"
 | 
			
		||||
	em["lsaquo"] = "\u2039"
 | 
			
		||||
	em["Epsilon"] = "\u0395"
 | 
			
		||||
	em["iacute"] = "\u00ed"
 | 
			
		||||
	em["cap"] = "\u2229"
 | 
			
		||||
	em["deg"] = "\u00b0"
 | 
			
		||||
	em["Otilde"] = "\u00d5"
 | 
			
		||||
	em["zeta"] = "\u03b6"
 | 
			
		||||
	em["ocirc"] = "\u00f4"
 | 
			
		||||
	em["scaron"] = "\u0161"
 | 
			
		||||
	em["ecirc"] = "\u00ea"
 | 
			
		||||
	em["ordm"] = "\u00ba"
 | 
			
		||||
	em["tau"] = "\u03c4"
 | 
			
		||||
	em["Auml"] = "\u00c4"
 | 
			
		||||
	em["dArr"] = "\u21d3"
 | 
			
		||||
	em["ordf"] = "\u00aa"
 | 
			
		||||
	em["alefsym"] = "\u2135"
 | 
			
		||||
	em["notin"] = "\u2209"
 | 
			
		||||
	em["Pi"] = "\u03a0"
 | 
			
		||||
	em["sdot"] = "\u22c5"
 | 
			
		||||
	em["upsilon"] = "\u03c5"
 | 
			
		||||
	em["iota"] = "\u03b9"
 | 
			
		||||
	em["hArr"] = "\u21d4"
 | 
			
		||||
	em["Sigma"] = "\u03a3"
 | 
			
		||||
	em["lang"] = "\u2329"
 | 
			
		||||
	em["curren"] = "\u00a4"
 | 
			
		||||
	em["Theta"] = "\u0398"
 | 
			
		||||
	em["lArr"] = "\u21d0"
 | 
			
		||||
	em["Phi"] = "\u03a6"
 | 
			
		||||
	em["Nu"] = "\u039d"
 | 
			
		||||
	em["rho"] = "\u03c1"
 | 
			
		||||
	em["alpha"] = "\u03b1"
 | 
			
		||||
	em["iexcl"] = "\u00a1"
 | 
			
		||||
	em["micro"] = "\u00b5"
 | 
			
		||||
	em["cedil"] = "\u00b8"
 | 
			
		||||
	em["Ntilde"] = "\u00d1"
 | 
			
		||||
	em["Psi"] = "\u03a8"
 | 
			
		||||
	em["Dagger"] = "\u2021"
 | 
			
		||||
	em["Egrave"] = "\u00c8"
 | 
			
		||||
	em["Icirc"] = "\u00ce"
 | 
			
		||||
	em["nsub"] = "\u2284"
 | 
			
		||||
	em["bdquo"] = "\u201e"
 | 
			
		||||
	em["empty"] = "\u2205"
 | 
			
		||||
	em["aelig"] = "\u00e6"
 | 
			
		||||
	em["ograve"] = "\u00f2"
 | 
			
		||||
	em["macr"] = "\u00af"
 | 
			
		||||
	em["Zeta"] = "\u0396"
 | 
			
		||||
	em["beta"] = "\u03b2"
 | 
			
		||||
	em["sim"] = "\u223c"
 | 
			
		||||
	em["uuml"] = "\u00fc"
 | 
			
		||||
	em["Aacute"] = "\u00c1"
 | 
			
		||||
	em["Iacute"] = "\u00cd"
 | 
			
		||||
	em["exist"] = "\u2203"
 | 
			
		||||
	em["prime"] = "\u2032"
 | 
			
		||||
	em["rceil"] = "\u2309"
 | 
			
		||||
	em["real"] = "\u211c"
 | 
			
		||||
	em["zwnj"] = "\u200c"
 | 
			
		||||
	em["bull"] = "\u2022"
 | 
			
		||||
	em["quot"] = "\u0022"
 | 
			
		||||
	em["Scaron"] = "\u0160"
 | 
			
		||||
	em["ugrave"] = "\u00f9"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -3,15 +3,16 @@ package xmlx
 | 
			
		|||
import "os"
 | 
			
		||||
import "strings"
 | 
			
		||||
import "xml"
 | 
			
		||||
import "bytes"
 | 
			
		||||
import "fmt"
 | 
			
		||||
import "strconv"
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	NT_ROOT      = 0x00
 | 
			
		||||
	NT_DIRECTIVE = 0x01
 | 
			
		||||
	NT_PROCINST  = 0x02
 | 
			
		||||
	NT_COMMENT   = 0x03
 | 
			
		||||
	NT_ELEMENT   = 0x04
 | 
			
		||||
	NT_ROOT = iota
 | 
			
		||||
	NT_DIRECTIVE
 | 
			
		||||
	NT_PROCINST
 | 
			
		||||
	NT_COMMENT
 | 
			
		||||
	NT_ELEMENT
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type Attr struct {
 | 
			
		||||
| 
						 | 
				
			
			@ -23,13 +24,19 @@ type Node struct {
 | 
			
		|||
	Type       byte
 | 
			
		||||
	Name       xml.Name
 | 
			
		||||
	Children   []*Node
 | 
			
		||||
	Attributes []Attr
 | 
			
		||||
	Attributes []*Attr
 | 
			
		||||
	Parent     *Node
 | 
			
		||||
	Value      string
 | 
			
		||||
	Target     string // procinst field
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewNode(tid byte) *Node { return &Node{Type: tid} }
 | 
			
		||||
func NewNode(tid byte) *Node {
 | 
			
		||||
	n := new(Node)
 | 
			
		||||
	n.Type = tid
 | 
			
		||||
	n.Children = make([]*Node, 0, 10)
 | 
			
		||||
	n.Attributes = make([]*Attr, 0, 10)
 | 
			
		||||
	return n
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// This wraps the standard xml.Unmarshal function and supplies this particular
 | 
			
		||||
// node as the content to be unmarshalled.
 | 
			
		||||
| 
						 | 
				
			
			@ -49,10 +56,7 @@ func (this *Node) GetValue(namespace, name string) string {
 | 
			
		|||
// Get node value as int
 | 
			
		||||
func (this *Node) GetValuei(namespace, name string) int {
 | 
			
		||||
	node := rec_SelectNode(this, namespace, name)
 | 
			
		||||
	if node == nil {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	if node.Value == "" {
 | 
			
		||||
	if node == nil || node.Value == "" {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	n, _ := strconv.Atoi(node.Value)
 | 
			
		||||
| 
						 | 
				
			
			@ -62,10 +66,7 @@ func (this *Node) GetValuei(namespace, name string) int {
 | 
			
		|||
// Get node value as int64
 | 
			
		||||
func (this *Node) GetValuei64(namespace, name string) int64 {
 | 
			
		||||
	node := rec_SelectNode(this, namespace, name)
 | 
			
		||||
	if node == nil {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	if node.Value == "" {
 | 
			
		||||
	if node == nil || node.Value == "" {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	n, _ := strconv.Atoi64(node.Value)
 | 
			
		||||
| 
						 | 
				
			
			@ -75,10 +76,7 @@ func (this *Node) GetValuei64(namespace, name string) int64 {
 | 
			
		|||
// Get node value as uint
 | 
			
		||||
func (this *Node) GetValueui(namespace, name string) uint {
 | 
			
		||||
	node := rec_SelectNode(this, namespace, name)
 | 
			
		||||
	if node == nil {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	if node.Value == "" {
 | 
			
		||||
	if node == nil || node.Value == "" {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	n, _ := strconv.Atoui(node.Value)
 | 
			
		||||
| 
						 | 
				
			
			@ -88,10 +86,7 @@ func (this *Node) GetValueui(namespace, name string) uint {
 | 
			
		|||
// Get node value as uint64
 | 
			
		||||
func (this *Node) GetValueui64(namespace, name string) uint64 {
 | 
			
		||||
	node := rec_SelectNode(this, namespace, name)
 | 
			
		||||
	if node == nil {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	if node.Value == "" {
 | 
			
		||||
	if node == nil || node.Value == "" {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	n, _ := strconv.Atoui64(node.Value)
 | 
			
		||||
| 
						 | 
				
			
			@ -101,10 +96,7 @@ func (this *Node) GetValueui64(namespace, name string) uint64 {
 | 
			
		|||
// Get node value as float
 | 
			
		||||
func (this *Node) GetValuef(namespace, name string) float {
 | 
			
		||||
	node := rec_SelectNode(this, namespace, name)
 | 
			
		||||
	if node == nil {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	if node.Value == "" {
 | 
			
		||||
	if node == nil || node.Value == "" {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	n, _ := strconv.Atof(node.Value)
 | 
			
		||||
| 
						 | 
				
			
			@ -114,10 +106,7 @@ func (this *Node) GetValuef(namespace, name string) float {
 | 
			
		|||
// Get node value as float32
 | 
			
		||||
func (this *Node) GetValuef32(namespace, name string) float32 {
 | 
			
		||||
	node := rec_SelectNode(this, namespace, name)
 | 
			
		||||
	if node == nil {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	if node.Value == "" {
 | 
			
		||||
	if node == nil || node.Value == "" {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	n, _ := strconv.Atof32(node.Value)
 | 
			
		||||
| 
						 | 
				
			
			@ -127,10 +116,7 @@ func (this *Node) GetValuef32(namespace, name string) float32 {
 | 
			
		|||
// Get node value as float64
 | 
			
		||||
func (this *Node) GetValuef64(namespace, name string) float64 {
 | 
			
		||||
	node := rec_SelectNode(this, namespace, name)
 | 
			
		||||
	if node == nil {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	if node.Value == "" {
 | 
			
		||||
	if node == nil || node.Value == "" {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	n, _ := strconv.Atof64(node.Value)
 | 
			
		||||
| 
						 | 
				
			
			@ -237,9 +223,9 @@ func rec_SelectNode(cn *Node, namespace, name string) *Node {
 | 
			
		|||
		return cn
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	var tn *Node
 | 
			
		||||
	for _, v := range cn.Children {
 | 
			
		||||
		tn := rec_SelectNode(v, namespace, name)
 | 
			
		||||
		if tn != nil {
 | 
			
		||||
		if tn = rec_SelectNode(v, namespace, name); tn != nil {
 | 
			
		||||
			return tn
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -248,17 +234,21 @@ func rec_SelectNode(cn *Node, namespace, name string) *Node {
 | 
			
		|||
 | 
			
		||||
// Select multiple nodes by name
 | 
			
		||||
func (this *Node) SelectNodes(namespace, name string) []*Node {
 | 
			
		||||
	list := make([]*Node, 0)
 | 
			
		||||
	list := make([]*Node, 0, 16)
 | 
			
		||||
	rec_SelectNodes(this, namespace, name, &list)
 | 
			
		||||
	return list
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func rec_SelectNodes(cn *Node, namespace, name string, list *[]*Node) {
 | 
			
		||||
	if cn.Name.Space == namespace && cn.Name.Local == name {
 | 
			
		||||
		c := make([]*Node, len(*list)+1)
 | 
			
		||||
		copy(c, *list)
 | 
			
		||||
		c[len(c)-1] = cn
 | 
			
		||||
		*list = c
 | 
			
		||||
		l := len(*list)
 | 
			
		||||
		if l >= cap(*list) {
 | 
			
		||||
			c := make([]*Node, l, l+16)
 | 
			
		||||
			copy(c, *list)
 | 
			
		||||
			*list = c
 | 
			
		||||
		}
 | 
			
		||||
		*list = (*list)[0 : l+1]
 | 
			
		||||
		(*list)[l] = cn
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -288,60 +278,73 @@ func (this *Node) String() (s string) {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
func (this *Node) printRoot() (s string) {
 | 
			
		||||
	var data []byte
 | 
			
		||||
	buf := bytes.NewBuffer(data)
 | 
			
		||||
	for _, v := range this.Children {
 | 
			
		||||
		s += v.String()
 | 
			
		||||
		buf.WriteString(v.String())
 | 
			
		||||
	}
 | 
			
		||||
	return
 | 
			
		||||
	return buf.String()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *Node) printProcInst() (s string) {
 | 
			
		||||
	s = "<?" + this.Target + " " + this.Value + "?>"
 | 
			
		||||
	return
 | 
			
		||||
func (this *Node) printProcInst() string {
 | 
			
		||||
	return "<?" + this.Target + " " + this.Value + "?>"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *Node) printComment() (s string) {
 | 
			
		||||
	s = "<!-- " + this.Value + " -->"
 | 
			
		||||
	return
 | 
			
		||||
func (this *Node) printComment() string {
 | 
			
		||||
	return "<!-- " + this.Value + " -->"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *Node) printDirective() (s string) {
 | 
			
		||||
	s = "<!" + this.Value + "!>"
 | 
			
		||||
	return
 | 
			
		||||
func (this *Node) printDirective() string {
 | 
			
		||||
	return "<!" + this.Value + "!>"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *Node) printElement() (s string) {
 | 
			
		||||
func (this *Node) printElement() string {
 | 
			
		||||
	var data []byte
 | 
			
		||||
	buf := bytes.NewBuffer(data)
 | 
			
		||||
 | 
			
		||||
	if len(this.Name.Space) > 0 {
 | 
			
		||||
		s = "<" + this.Name.Space + ":" + this.Name.Local
 | 
			
		||||
		buf.WriteRune('<')
 | 
			
		||||
		buf.WriteString(this.Name.Space)
 | 
			
		||||
		buf.WriteRune(':')
 | 
			
		||||
		buf.WriteString(this.Name.Local)
 | 
			
		||||
	} else {
 | 
			
		||||
		s = "<" + this.Name.Local
 | 
			
		||||
		buf.WriteRune('<')
 | 
			
		||||
		buf.WriteString(this.Name.Local)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, v := range this.Attributes {
 | 
			
		||||
		if len(v.Name.Space) > 0 {
 | 
			
		||||
			s += fmt.Sprintf(` %s:%s="%s"`, v.Name.Space, v.Name.Local, v.Value)
 | 
			
		||||
			buf.WriteString(fmt.Sprintf(` %s:%s="%s"`, v.Name.Space, v.Name.Local, v.Value))
 | 
			
		||||
		} else {
 | 
			
		||||
			s += fmt.Sprintf(` %s="%s"`, v.Name.Local, v.Value)
 | 
			
		||||
			buf.WriteString(fmt.Sprintf(` %s="%s"`, v.Name.Local, v.Value))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(this.Children) == 0 && len(this.Value) == 0 {
 | 
			
		||||
		s += " />"
 | 
			
		||||
		return
 | 
			
		||||
		buf.WriteString(" />")
 | 
			
		||||
		return buf.String()
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	s += ">"
 | 
			
		||||
	buf.WriteRune('>')
 | 
			
		||||
 | 
			
		||||
	for _, v := range this.Children {
 | 
			
		||||
		s += v.String()
 | 
			
		||||
		buf.WriteString(v.String())
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	s += this.Value
 | 
			
		||||
	buf.WriteString(this.Value)
 | 
			
		||||
	if len(this.Name.Space) > 0 {
 | 
			
		||||
		s += "</" + this.Name.Space + ":" + this.Name.Local + ">"
 | 
			
		||||
		buf.WriteString("</")
 | 
			
		||||
		buf.WriteString(this.Name.Space)
 | 
			
		||||
		buf.WriteRune(':')
 | 
			
		||||
		buf.WriteString(this.Name.Local)
 | 
			
		||||
		buf.WriteRune('>')
 | 
			
		||||
	} else {
 | 
			
		||||
		s += "</" + this.Name.Local + ">"
 | 
			
		||||
		buf.WriteString("</")
 | 
			
		||||
		buf.WriteString(this.Name.Local)
 | 
			
		||||
		buf.WriteRune('>')
 | 
			
		||||
	}
 | 
			
		||||
	return
 | 
			
		||||
 | 
			
		||||
	return buf.String()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Add a child node
 | 
			
		||||
| 
						 | 
				
			
			@ -351,10 +354,15 @@ func (this *Node) AddChild(t *Node) {
 | 
			
		|||
	}
 | 
			
		||||
	t.Parent = this
 | 
			
		||||
 | 
			
		||||
	c := make([]*Node, len(this.Children)+1)
 | 
			
		||||
	copy(c, this.Children)
 | 
			
		||||
	c[len(c)-1] = t
 | 
			
		||||
	this.Children = c
 | 
			
		||||
	l := len(this.Children)
 | 
			
		||||
	if l >= cap(this.Children) {
 | 
			
		||||
		c := make([]*Node, l, l+10)
 | 
			
		||||
		copy(c, this.Children)
 | 
			
		||||
		this.Children = c
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	this.Children = this.Children[0 : l+1]
 | 
			
		||||
	this.Children[l] = t
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Remove a child node
 | 
			
		||||
| 
						 | 
				
			
			@ -371,10 +379,8 @@ func (this *Node) RemoveChild(t *Node) {
 | 
			
		|||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	c := make([]*Node, len(this.Children)-1)
 | 
			
		||||
	copy(c, this.Children[0:p])
 | 
			
		||||
	copy(c[p:], this.Children[p+1:])
 | 
			
		||||
	this.Children = c
 | 
			
		||||
	copy(this.Children[p:], this.Children[p+1:])
 | 
			
		||||
	this.Children = this.Children[0 : len(this.Children)-1]
 | 
			
		||||
 | 
			
		||||
	t.Parent = nil
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										1
									
								
								xmlx/test1.xml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								xmlx/test1.xml
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="0.91"><channel><title>WriteTheWeb</title><link>http://writetheweb.com</link><description>News for web users that write back</description><language>en-us</language><copyright>Copyright 2000, WriteTheWeb team.</copyright><managingEditor>editor@writetheweb.com</managingEditor><webMaster>webmaster@writetheweb.com</webMaster><image><title>WriteTheWeb</title><url>http://writetheweb.com/images/mynetscape88.gif</url><link>http://writetheweb.com</link><width>88</width><height>31</height><description>News for web users that write back</description></image><item><title>Giving the world a pluggable Gnutella</title><link>http://writetheweb.com/read.php?item=24</link><description>WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.</description></item><item><title>Syndication discussions hot up</title><link>http://writetheweb.com/read.php?item=23</link><description>After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication.</description></item><item><title>Personal web server integrates file sharing and messaging</title><link>http://writetheweb.com/read.php?item=22</link><description>The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices.</description></item><item><title>Syndication and Metadata</title><link>http://writetheweb.com/read.php?item=21</link><description>RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF.</description></item><item><title>UK bloggers get organised</title><link>http://writetheweb.com/read.php?item=20</link><description>Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups.</description></item><item><title>Yournamehere.com more important than anything</title><link>http://writetheweb.com/read.php?item=19</link><description>Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman.</description></item></channel></rss>
 | 
			
		||||
| 
						 | 
				
			
			@ -16,7 +16,7 @@ func TestLoadLocal(t *testing.T) {
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestLoadRemote(t *testing.T) {
 | 
			
		||||
func _TestLoadRemote(t *testing.T) {
 | 
			
		||||
	doc := New()
 | 
			
		||||
 | 
			
		||||
	if err := doc.LoadUri("http://www.w3schools.com/xml/plant_catalog.xml"); err != nil {
 | 
			
		||||
| 
						 | 
				
			
			@ -89,8 +89,7 @@ func TestUnmarshal(t *testing.T) {
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	img := Image{}
 | 
			
		||||
	err = node.Unmarshal(&img)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
	if err = node.Unmarshal(&img); err != nil {
 | 
			
		||||
		t.Errorf("Unmarshal(): %s", err)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue