Fix issue 1: Added dependency to go-iconv. this lib is needed to ensure we pass valud UTF-8 encoded data to the XML tokenizer.
This commit is contained in:
		
							parent
							
								
									bacbff0e71
								
							
						
					
					
						commit
						02d19ed0bd
					
				
					 9 changed files with 101 additions and 76 deletions
				
			
		
							
								
								
									
										2
									
								
								LICENSE
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								LICENSE
									
										
									
									
									
								
							|  | @ -1,5 +1,5 @@ | |||
| 
 | ||||
| Copyright (c) 2010, Jim Teeuwen. | ||||
| Copyright (c) 2010-2011, Jim Teeuwen. | ||||
| All rights reserved. | ||||
| 
 | ||||
| Redistribution and use in source and binary forms, with or without modification, | ||||
|  |  | |||
							
								
								
									
										6
									
								
								README
									
										
									
									
									
								
							
							
						
						
									
										6
									
								
								README
									
										
									
									
									
								
							|  | @ -8,6 +8,12 @@ | |||
|  Nodes now simply become collections and don't require you to read them in the | ||||
|  order in which the xml.Parser finds them. | ||||
| 
 | ||||
| ================================================================================ | ||||
|  DEPENDENCIES | ||||
| ================================================================================ | ||||
| 
 | ||||
|  go-iconv: https://github.com/sloonz/go-iconv | ||||
| 
 | ||||
| ================================================================================ | ||||
|  USAGE | ||||
| ================================================================================ | ||||
|  |  | |||
|  | @ -1,6 +1,6 @@ | |||
| include $(GOROOT)/src/Make.inc | ||||
| 
 | ||||
| TARG = xmlx | ||||
| GOFILES = document.go node.go io.go entitymap.go\
 | ||||
| GOFILES = document.go node.go entitymap.go | ||||
| 
 | ||||
| include $(GOROOT)/src/Make.pkg | ||||
|  |  | |||
|  | @ -1,41 +1,40 @@ | |||
| // Copyright (c) 2010, Jim Teeuwen. All rights reserved. | ||||
| // This code is subject to a 1-clause BSD license. | ||||
| // The contents of which can be found in the LICENSE file. | ||||
| 
 | ||||
| /* | ||||
| Copyright (c) 2010, Jim Teeuwen. | ||||
| All rights reserved. | ||||
| 
 | ||||
| This code is subject to a 1-clause BSD license. | ||||
| The contents of which can be found in the LICENSE file. | ||||
| 
 | ||||
| 
 | ||||
|  This package wraps the standard XML library and uses it to build a node tree of | ||||
|  any document you load. This allows you to look up nodes forwards and backwards, | ||||
|  as well as perform search queries (no xpath support yet). | ||||
|  as well as perform simple search queries. | ||||
| 
 | ||||
|  Nodes now simply become collections and don't require you to read them in the | ||||
|  order in which the xml.Parser finds them. | ||||
| 
 | ||||
|  The Document currently implements 2 simple search functions which allow you to | ||||
|  The Document currently implements 2 search functions which allow you to | ||||
|  look for specific nodes. | ||||
| 
 | ||||
|    Document.SelectNode(namespace, name string) *Node; | ||||
|    Document.SelectNodes(namespace, name string) []*Node; | ||||
|    *xmlx.Document.SelectNode(namespace, name string) *Node; | ||||
|    *xmlx.Document.SelectNodes(namespace, name string) []*Node; | ||||
| 
 | ||||
|  SelectNode() returns the first, single node it finds matching the given name | ||||
|  and namespace. SelectNodes() returns a slice containing all the matching nodes. | ||||
| 
 | ||||
|  Note that these search functions can be invoked on individual nodes as well. | ||||
|  This allows you to search only a subset of the entire document. | ||||
| 
 | ||||
| */ | ||||
| package xmlx | ||||
| 
 | ||||
| import "os" | ||||
| import "io" | ||||
| import "io/ioutil" | ||||
| import "path" | ||||
| import "strings" | ||||
| import "xml" | ||||
| import "fmt" | ||||
| import "http" | ||||
| import ( | ||||
| 	"os" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"path" | ||||
| 	"strings" | ||||
| 	"xml" | ||||
| 	"fmt" | ||||
| 	"http" | ||||
| 	"iconv" | ||||
| ) | ||||
| 
 | ||||
| type Document struct { | ||||
| 	Version     string | ||||
|  | @ -82,6 +81,12 @@ func (this *Document) SelectNodes(namespace, name string) []*Node { | |||
| // *** Satisfy ILoader interface | ||||
| // ***************************************************************************** | ||||
| func (this *Document) LoadString(s string) (err os.Error) { | ||||
| 	// Ensure we are passing UTF-8 encoding content to the XML tokenizer. | ||||
| 	if s, err = this.correctEncoding(s); err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	// tokenize data | ||||
| 	xp := xml.NewParser(strings.NewReader(s)) | ||||
| 	xp.Entity = this.Entity | ||||
| 
 | ||||
|  | @ -218,3 +223,48 @@ func (this *Document) SaveStream(w io.Writer) (err os.Error) { | |||
| 	_, err = w.Write([]byte(s)) | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
| // Use libiconv to ensure we get UTF-8 encoded data. The Go Xml tokenizer will | ||||
| // throw a tantrum if we give it anything else. | ||||
| func (this *Document) correctEncoding(data string) (ret string, err os.Error) { | ||||
| 	var cd *iconv.Iconv | ||||
| 	var tok xml.Token | ||||
| 
 | ||||
| 	enc := "utf-8" | ||||
| 	xp := xml.NewParser(strings.NewReader(data)) | ||||
| 	xp.Entity = this.Entity | ||||
| 
 | ||||
| loop: | ||||
| 	for { | ||||
| 		if tok, err = xp.Token(); err != nil { | ||||
| 			if err == os.EOF { | ||||
| 				break loop | ||||
| 			} | ||||
| 			return "", err | ||||
| 		} | ||||
| 
 | ||||
| 		switch tt := tok.(type) { | ||||
| 		case xml.ProcInst: | ||||
| 			if tt.Target == "xml" { // xml doctype | ||||
| 				enc = strings.ToLower(string(tt.Inst)) | ||||
| 				if i := strings.Index(enc, `encoding="`); i > -1 { | ||||
| 					enc = enc[i+len(`encoding="`):] | ||||
| 					i = strings.Index(enc, `"`) | ||||
| 					enc = enc[:i] | ||||
| 					break loop | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if enc == "utf-8" { | ||||
| 		return data, nil | ||||
| 	} | ||||
| 
 | ||||
| 	if cd, err = iconv.Open("utf-8", enc); err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	defer cd.Close() | ||||
| 	return cd.Conv(data) | ||||
| } | ||||
|  |  | |||
|  | @ -1,11 +1,6 @@ | |||
| /* | ||||
| Copyright (c) 2010, Jim Teeuwen. | ||||
| All rights reserved. | ||||
| 
 | ||||
| This code is subject to a 1-clause BSD license. | ||||
| The contents of which can be found in the LICENSE file. | ||||
| */ | ||||
| 
 | ||||
| // Copyright (c) 2010, Jim Teeuwen. All rights reserved. | ||||
| // This code is subject to a 1-clause BSD license. | ||||
| // The contents of which can be found in the LICENSE file. | ||||
| package xmlx | ||||
| 
 | ||||
| /* | ||||
|  | @ -20,11 +15,13 @@ package xmlx | |||
| 	"â" (â) is not the same as "Â" (Â). | ||||
| */ | ||||
| 
 | ||||
| import "os" | ||||
| import "fmt" | ||||
| import "utf8" | ||||
| import "regexp" | ||||
| import "strconv" | ||||
| import ( | ||||
| 	"os" | ||||
| 	"fmt" | ||||
| 	"utf8" | ||||
| 	"regexp" | ||||
| 	"strconv" | ||||
| ) | ||||
| 
 | ||||
| var reg_entnumeric = regexp.MustCompile("^&#[0-9]+;$") | ||||
| var reg_entnamed = regexp.MustCompile("^&[a-zA-Z]+;$") | ||||
|  |  | |||
							
								
								
									
										30
									
								
								xmlx/io.go
									
										
									
									
									
								
							
							
						
						
									
										30
									
								
								xmlx/io.go
									
										
									
									
									
								
							|  | @ -1,30 +0,0 @@ | |||
| /* | ||||
| Copyright (c) 2010, Jim Teeuwen. | ||||
| All rights reserved. | ||||
| 
 | ||||
| This code is subject to a 1-clause BSD license. | ||||
| The contents of which can be found in the LICENSE file. | ||||
| */ | ||||
| 
 | ||||
| package xmlx | ||||
| 
 | ||||
| import "os" | ||||
| import "io" | ||||
| 
 | ||||
| type ILoader interface { | ||||
| 	LoadUrl(string) os.Error | ||||
| 	LoadFile(string) os.Error | ||||
| 	LoadString(string) os.Error | ||||
| 	LoadStream(*io.Reader) os.Error | ||||
| } | ||||
| 
 | ||||
| type ISaver interface { | ||||
| 	SaveFile(string) os.Error | ||||
| 	SaveString(string) (string, os.Error) | ||||
| 	SaveStream(*io.Writer) os.Error | ||||
| } | ||||
| 
 | ||||
| type ILoaderSaver interface { | ||||
| 	ILoader | ||||
| 	ISaver | ||||
| } | ||||
							
								
								
									
										14
									
								
								xmlx/node.go
									
										
									
									
									
								
							
							
						
						
									
										14
									
								
								xmlx/node.go
									
										
									
									
									
								
							|  | @ -8,12 +8,14 @@ The contents of which can be found in the LICENSE file. | |||
| 
 | ||||
| package xmlx | ||||
| 
 | ||||
| import "os" | ||||
| import "strings" | ||||
| import "xml" | ||||
| import "bytes" | ||||
| import "fmt" | ||||
| import "strconv" | ||||
| import ( | ||||
| 	"os" | ||||
| 	"strings" | ||||
| 	"xml" | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"strconv" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	NT_ROOT = iota | ||||
|  |  | |||
|  | @ -1,5 +1,5 @@ | |||
| <!DOCTYPE xml> | ||||
| <?xml version="1.0" encoding="ISO-8859-1"?> | ||||
| <?xml version="1.0" encoding="utf-8"?> | ||||
| <rss version="0.91"> | ||||
| 	<channel> | ||||
| 		<title>WriteTheWeb</title>  | ||||
|  |  | |||
|  | @ -14,7 +14,7 @@ func TestLoadLocal(t *testing.T) { | |||
| 	doc := New() | ||||
| 
 | ||||
| 	if err := doc.LoadFile("test.xml"); err != nil { | ||||
| 		t.Errorf("%s", err) | ||||
| 		t.Error(err.String()) | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
|  | @ -24,11 +24,11 @@ func TestLoadLocal(t *testing.T) { | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| func _TestLoadRemote(t *testing.T) { | ||||
| func TestLoadRemote(t *testing.T) { | ||||
| 	doc := New() | ||||
| 
 | ||||
| 	if err := doc.LoadUri("http://www.w3schools.com/xml/plant_catalog.xml"); err != nil { | ||||
| 		t.Errorf("%s", err) | ||||
| 	if err := doc.LoadUri("http://rss.cnn.com/rss/cnn_latest.rss"); err != nil { | ||||
| 		t.Error(err.String()) | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue