Fix issue 1: Added dependency to go-iconv. this lib is needed to ensure we pass valud UTF-8 encoded data to the XML tokenizer.
This commit is contained in:
		
							parent
							
								
									bacbff0e71
								
							
						
					
					
						commit
						02d19ed0bd
					
				
					 9 changed files with 101 additions and 76 deletions
				
			
		
							
								
								
									
										2
									
								
								LICENSE
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								LICENSE
									
										
									
									
									
								
							|  | @ -1,5 +1,5 @@ | ||||||
| 
 | 
 | ||||||
| Copyright (c) 2010, Jim Teeuwen. | Copyright (c) 2010-2011, Jim Teeuwen. | ||||||
| All rights reserved. | All rights reserved. | ||||||
| 
 | 
 | ||||||
| Redistribution and use in source and binary forms, with or without modification, | Redistribution and use in source and binary forms, with or without modification, | ||||||
|  |  | ||||||
							
								
								
									
										6
									
								
								README
									
										
									
									
									
								
							
							
						
						
									
										6
									
								
								README
									
										
									
									
									
								
							|  | @ -8,6 +8,12 @@ | ||||||
|  Nodes now simply become collections and don't require you to read them in the |  Nodes now simply become collections and don't require you to read them in the | ||||||
|  order in which the xml.Parser finds them. |  order in which the xml.Parser finds them. | ||||||
| 
 | 
 | ||||||
|  | ================================================================================ | ||||||
|  |  DEPENDENCIES | ||||||
|  | ================================================================================ | ||||||
|  | 
 | ||||||
|  |  go-iconv: https://github.com/sloonz/go-iconv | ||||||
|  | 
 | ||||||
| ================================================================================ | ================================================================================ | ||||||
|  USAGE |  USAGE | ||||||
| ================================================================================ | ================================================================================ | ||||||
|  |  | ||||||
|  | @ -1,6 +1,6 @@ | ||||||
| include $(GOROOT)/src/Make.inc | include $(GOROOT)/src/Make.inc | ||||||
| 
 | 
 | ||||||
| TARG = xmlx | TARG = xmlx | ||||||
| GOFILES = document.go node.go io.go entitymap.go\
 | GOFILES = document.go node.go entitymap.go | ||||||
| 
 | 
 | ||||||
| include $(GOROOT)/src/Make.pkg | include $(GOROOT)/src/Make.pkg | ||||||
|  |  | ||||||
|  | @ -1,41 +1,40 @@ | ||||||
|  | // Copyright (c) 2010, Jim Teeuwen. All rights reserved. | ||||||
|  | // This code is subject to a 1-clause BSD license. | ||||||
|  | // The contents of which can be found in the LICENSE file. | ||||||
|  | 
 | ||||||
| /* | /* | ||||||
| Copyright (c) 2010, Jim Teeuwen. |  | ||||||
| All rights reserved. |  | ||||||
| 
 |  | ||||||
| This code is subject to a 1-clause BSD license. |  | ||||||
| The contents of which can be found in the LICENSE file. |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
|  This package wraps the standard XML library and uses it to build a node tree of |  This package wraps the standard XML library and uses it to build a node tree of | ||||||
|  any document you load. This allows you to look up nodes forwards and backwards, |  any document you load. This allows you to look up nodes forwards and backwards, | ||||||
|  as well as perform search queries (no xpath support yet). |  as well as perform simple search queries. | ||||||
| 
 | 
 | ||||||
|  Nodes now simply become collections and don't require you to read them in the |  Nodes now simply become collections and don't require you to read them in the | ||||||
|  order in which the xml.Parser finds them. |  order in which the xml.Parser finds them. | ||||||
| 
 | 
 | ||||||
|  The Document currently implements 2 simple search functions which allow you to |  The Document currently implements 2 search functions which allow you to | ||||||
|  look for specific nodes. |  look for specific nodes. | ||||||
| 
 | 
 | ||||||
|    Document.SelectNode(namespace, name string) *Node; |    *xmlx.Document.SelectNode(namespace, name string) *Node; | ||||||
|    Document.SelectNodes(namespace, name string) []*Node; |    *xmlx.Document.SelectNodes(namespace, name string) []*Node; | ||||||
| 
 | 
 | ||||||
|  SelectNode() returns the first, single node it finds matching the given name |  SelectNode() returns the first, single node it finds matching the given name | ||||||
|  and namespace. SelectNodes() returns a slice containing all the matching nodes. |  and namespace. SelectNodes() returns a slice containing all the matching nodes. | ||||||
| 
 | 
 | ||||||
|  Note that these search functions can be invoked on individual nodes as well. |  Note that these search functions can be invoked on individual nodes as well. | ||||||
|  This allows you to search only a subset of the entire document. |  This allows you to search only a subset of the entire document. | ||||||
| 
 |  | ||||||
| */ | */ | ||||||
| package xmlx | package xmlx | ||||||
| 
 | 
 | ||||||
| import "os" | import ( | ||||||
| import "io" | 	"os" | ||||||
| import "io/ioutil" | 	"io" | ||||||
| import "path" | 	"io/ioutil" | ||||||
| import "strings" | 	"path" | ||||||
| import "xml" | 	"strings" | ||||||
| import "fmt" | 	"xml" | ||||||
| import "http" | 	"fmt" | ||||||
|  | 	"http" | ||||||
|  | 	"iconv" | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| type Document struct { | type Document struct { | ||||||
| 	Version     string | 	Version     string | ||||||
|  | @ -82,6 +81,12 @@ func (this *Document) SelectNodes(namespace, name string) []*Node { | ||||||
| // *** Satisfy ILoader interface | // *** Satisfy ILoader interface | ||||||
| // ***************************************************************************** | // ***************************************************************************** | ||||||
| func (this *Document) LoadString(s string) (err os.Error) { | func (this *Document) LoadString(s string) (err os.Error) { | ||||||
|  | 	// Ensure we are passing UTF-8 encoding content to the XML tokenizer. | ||||||
|  | 	if s, err = this.correctEncoding(s); err != nil { | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// tokenize data | ||||||
| 	xp := xml.NewParser(strings.NewReader(s)) | 	xp := xml.NewParser(strings.NewReader(s)) | ||||||
| 	xp.Entity = this.Entity | 	xp.Entity = this.Entity | ||||||
| 
 | 
 | ||||||
|  | @ -218,3 +223,48 @@ func (this *Document) SaveStream(w io.Writer) (err os.Error) { | ||||||
| 	_, err = w.Write([]byte(s)) | 	_, err = w.Write([]byte(s)) | ||||||
| 	return | 	return | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | // Use libiconv to ensure we get UTF-8 encoded data. The Go Xml tokenizer will | ||||||
|  | // throw a tantrum if we give it anything else. | ||||||
|  | func (this *Document) correctEncoding(data string) (ret string, err os.Error) { | ||||||
|  | 	var cd *iconv.Iconv | ||||||
|  | 	var tok xml.Token | ||||||
|  | 
 | ||||||
|  | 	enc := "utf-8" | ||||||
|  | 	xp := xml.NewParser(strings.NewReader(data)) | ||||||
|  | 	xp.Entity = this.Entity | ||||||
|  | 
 | ||||||
|  | loop: | ||||||
|  | 	for { | ||||||
|  | 		if tok, err = xp.Token(); err != nil { | ||||||
|  | 			if err == os.EOF { | ||||||
|  | 				break loop | ||||||
|  | 			} | ||||||
|  | 			return "", err | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		switch tt := tok.(type) { | ||||||
|  | 		case xml.ProcInst: | ||||||
|  | 			if tt.Target == "xml" { // xml doctype | ||||||
|  | 				enc = strings.ToLower(string(tt.Inst)) | ||||||
|  | 				if i := strings.Index(enc, `encoding="`); i > -1 { | ||||||
|  | 					enc = enc[i+len(`encoding="`):] | ||||||
|  | 					i = strings.Index(enc, `"`) | ||||||
|  | 					enc = enc[:i] | ||||||
|  | 					break loop | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if enc == "utf-8" { | ||||||
|  | 		return data, nil | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if cd, err = iconv.Open("utf-8", enc); err != nil { | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	defer cd.Close() | ||||||
|  | 	return cd.Conv(data) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -1,11 +1,6 @@ | ||||||
| /* | // Copyright (c) 2010, Jim Teeuwen. All rights reserved. | ||||||
| Copyright (c) 2010, Jim Teeuwen. | // This code is subject to a 1-clause BSD license. | ||||||
| All rights reserved. | // The contents of which can be found in the LICENSE file. | ||||||
| 
 |  | ||||||
| This code is subject to a 1-clause BSD license. |  | ||||||
| The contents of which can be found in the LICENSE file. |  | ||||||
| */ |  | ||||||
| 
 |  | ||||||
| package xmlx | package xmlx | ||||||
| 
 | 
 | ||||||
| /* | /* | ||||||
|  | @ -20,11 +15,13 @@ package xmlx | ||||||
| 	"â" (â) is not the same as "Â" (Â). | 	"â" (â) is not the same as "Â" (Â). | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| import "os" | import ( | ||||||
| import "fmt" | 	"os" | ||||||
| import "utf8" | 	"fmt" | ||||||
| import "regexp" | 	"utf8" | ||||||
| import "strconv" | 	"regexp" | ||||||
|  | 	"strconv" | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| var reg_entnumeric = regexp.MustCompile("^&#[0-9]+;$") | var reg_entnumeric = regexp.MustCompile("^&#[0-9]+;$") | ||||||
| var reg_entnamed = regexp.MustCompile("^&[a-zA-Z]+;$") | var reg_entnamed = regexp.MustCompile("^&[a-zA-Z]+;$") | ||||||
|  |  | ||||||
							
								
								
									
										30
									
								
								xmlx/io.go
									
										
									
									
									
								
							
							
						
						
									
										30
									
								
								xmlx/io.go
									
										
									
									
									
								
							|  | @ -1,30 +0,0 @@ | ||||||
| /* |  | ||||||
| Copyright (c) 2010, Jim Teeuwen. |  | ||||||
| All rights reserved. |  | ||||||
| 
 |  | ||||||
| This code is subject to a 1-clause BSD license. |  | ||||||
| The contents of which can be found in the LICENSE file. |  | ||||||
| */ |  | ||||||
| 
 |  | ||||||
| package xmlx |  | ||||||
| 
 |  | ||||||
| import "os" |  | ||||||
| import "io" |  | ||||||
| 
 |  | ||||||
| type ILoader interface { |  | ||||||
| 	LoadUrl(string) os.Error |  | ||||||
| 	LoadFile(string) os.Error |  | ||||||
| 	LoadString(string) os.Error |  | ||||||
| 	LoadStream(*io.Reader) os.Error |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| type ISaver interface { |  | ||||||
| 	SaveFile(string) os.Error |  | ||||||
| 	SaveString(string) (string, os.Error) |  | ||||||
| 	SaveStream(*io.Writer) os.Error |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| type ILoaderSaver interface { |  | ||||||
| 	ILoader |  | ||||||
| 	ISaver |  | ||||||
| } |  | ||||||
							
								
								
									
										14
									
								
								xmlx/node.go
									
										
									
									
									
								
							
							
						
						
									
										14
									
								
								xmlx/node.go
									
										
									
									
									
								
							|  | @ -8,12 +8,14 @@ The contents of which can be found in the LICENSE file. | ||||||
| 
 | 
 | ||||||
| package xmlx | package xmlx | ||||||
| 
 | 
 | ||||||
| import "os" | import ( | ||||||
| import "strings" | 	"os" | ||||||
| import "xml" | 	"strings" | ||||||
| import "bytes" | 	"xml" | ||||||
| import "fmt" | 	"bytes" | ||||||
| import "strconv" | 	"fmt" | ||||||
|  | 	"strconv" | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| const ( | const ( | ||||||
| 	NT_ROOT = iota | 	NT_ROOT = iota | ||||||
|  |  | ||||||
|  | @ -1,5 +1,5 @@ | ||||||
| <!DOCTYPE xml> | <!DOCTYPE xml> | ||||||
| <?xml version="1.0" encoding="ISO-8859-1"?> | <?xml version="1.0" encoding="utf-8"?> | ||||||
| <rss version="0.91"> | <rss version="0.91"> | ||||||
| 	<channel> | 	<channel> | ||||||
| 		<title>WriteTheWeb</title>  | 		<title>WriteTheWeb</title>  | ||||||
|  |  | ||||||
|  | @ -14,7 +14,7 @@ func TestLoadLocal(t *testing.T) { | ||||||
| 	doc := New() | 	doc := New() | ||||||
| 
 | 
 | ||||||
| 	if err := doc.LoadFile("test.xml"); err != nil { | 	if err := doc.LoadFile("test.xml"); err != nil { | ||||||
| 		t.Errorf("%s", err) | 		t.Error(err.String()) | ||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -24,11 +24,11 @@ func TestLoadLocal(t *testing.T) { | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func _TestLoadRemote(t *testing.T) { | func TestLoadRemote(t *testing.T) { | ||||||
| 	doc := New() | 	doc := New() | ||||||
| 
 | 
 | ||||||
| 	if err := doc.LoadUri("http://www.w3schools.com/xml/plant_catalog.xml"); err != nil { | 	if err := doc.LoadUri("http://rss.cnn.com/rss/cnn_latest.rss"); err != nil { | ||||||
| 		t.Errorf("%s", err) | 		t.Error(err.String()) | ||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue