Fix issue 1: Added dependency to go-iconv. this lib is needed to ensure we pass valud UTF-8 encoded data to the XML tokenizer.
This commit is contained in:
		
							parent
							
								
									bacbff0e71
								
							
						
					
					
						commit
						02d19ed0bd
					
				
					 9 changed files with 101 additions and 76 deletions
				
			
		| 
						 | 
				
			
			@ -1,6 +1,6 @@
 | 
			
		|||
include $(GOROOT)/src/Make.inc
 | 
			
		||||
 | 
			
		||||
TARG = xmlx
 | 
			
		||||
GOFILES = document.go node.go io.go entitymap.go\
 | 
			
		||||
GOFILES = document.go node.go entitymap.go
 | 
			
		||||
 | 
			
		||||
include $(GOROOT)/src/Make.pkg
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,41 +1,40 @@
 | 
			
		|||
// Copyright (c) 2010, Jim Teeuwen. All rights reserved.
 | 
			
		||||
// This code is subject to a 1-clause BSD license.
 | 
			
		||||
// The contents of which can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
Copyright (c) 2010, Jim Teeuwen.
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
This code is subject to a 1-clause BSD license.
 | 
			
		||||
The contents of which can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 This package wraps the standard XML library and uses it to build a node tree of
 | 
			
		||||
 any document you load. This allows you to look up nodes forwards and backwards,
 | 
			
		||||
 as well as perform search queries (no xpath support yet).
 | 
			
		||||
 as well as perform simple search queries.
 | 
			
		||||
 | 
			
		||||
 Nodes now simply become collections and don't require you to read them in the
 | 
			
		||||
 order in which the xml.Parser finds them.
 | 
			
		||||
 | 
			
		||||
 The Document currently implements 2 simple search functions which allow you to
 | 
			
		||||
 The Document currently implements 2 search functions which allow you to
 | 
			
		||||
 look for specific nodes.
 | 
			
		||||
 | 
			
		||||
   Document.SelectNode(namespace, name string) *Node;
 | 
			
		||||
   Document.SelectNodes(namespace, name string) []*Node;
 | 
			
		||||
   *xmlx.Document.SelectNode(namespace, name string) *Node;
 | 
			
		||||
   *xmlx.Document.SelectNodes(namespace, name string) []*Node;
 | 
			
		||||
 | 
			
		||||
 SelectNode() returns the first, single node it finds matching the given name
 | 
			
		||||
 and namespace. SelectNodes() returns a slice containing all the matching nodes.
 | 
			
		||||
 | 
			
		||||
 Note that these search functions can be invoked on individual nodes as well.
 | 
			
		||||
 This allows you to search only a subset of the entire document.
 | 
			
		||||
 | 
			
		||||
*/
 | 
			
		||||
package xmlx
 | 
			
		||||
 | 
			
		||||
import "os"
 | 
			
		||||
import "io"
 | 
			
		||||
import "io/ioutil"
 | 
			
		||||
import "path"
 | 
			
		||||
import "strings"
 | 
			
		||||
import "xml"
 | 
			
		||||
import "fmt"
 | 
			
		||||
import "http"
 | 
			
		||||
import (
 | 
			
		||||
	"os"
 | 
			
		||||
	"io"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"path"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"xml"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"http"
 | 
			
		||||
	"iconv"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type Document struct {
 | 
			
		||||
	Version     string
 | 
			
		||||
| 
						 | 
				
			
			@ -82,6 +81,12 @@ func (this *Document) SelectNodes(namespace, name string) []*Node {
 | 
			
		|||
// *** Satisfy ILoader interface
 | 
			
		||||
// *****************************************************************************
 | 
			
		||||
func (this *Document) LoadString(s string) (err os.Error) {
 | 
			
		||||
	// Ensure we are passing UTF-8 encoding content to the XML tokenizer.
 | 
			
		||||
	if s, err = this.correctEncoding(s); err != nil {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// tokenize data
 | 
			
		||||
	xp := xml.NewParser(strings.NewReader(s))
 | 
			
		||||
	xp.Entity = this.Entity
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -218,3 +223,48 @@ func (this *Document) SaveStream(w io.Writer) (err os.Error) {
 | 
			
		|||
	_, err = w.Write([]byte(s))
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Use libiconv to ensure we get UTF-8 encoded data. The Go Xml tokenizer will
 | 
			
		||||
// throw a tantrum if we give it anything else.
 | 
			
		||||
func (this *Document) correctEncoding(data string) (ret string, err os.Error) {
 | 
			
		||||
	var cd *iconv.Iconv
 | 
			
		||||
	var tok xml.Token
 | 
			
		||||
 | 
			
		||||
	enc := "utf-8"
 | 
			
		||||
	xp := xml.NewParser(strings.NewReader(data))
 | 
			
		||||
	xp.Entity = this.Entity
 | 
			
		||||
 | 
			
		||||
loop:
 | 
			
		||||
	for {
 | 
			
		||||
		if tok, err = xp.Token(); err != nil {
 | 
			
		||||
			if err == os.EOF {
 | 
			
		||||
				break loop
 | 
			
		||||
			}
 | 
			
		||||
			return "", err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		switch tt := tok.(type) {
 | 
			
		||||
		case xml.ProcInst:
 | 
			
		||||
			if tt.Target == "xml" { // xml doctype
 | 
			
		||||
				enc = strings.ToLower(string(tt.Inst))
 | 
			
		||||
				if i := strings.Index(enc, `encoding="`); i > -1 {
 | 
			
		||||
					enc = enc[i+len(`encoding="`):]
 | 
			
		||||
					i = strings.Index(enc, `"`)
 | 
			
		||||
					enc = enc[:i]
 | 
			
		||||
					break loop
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if enc == "utf-8" {
 | 
			
		||||
		return data, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if cd, err = iconv.Open("utf-8", enc); err != nil {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	defer cd.Close()
 | 
			
		||||
	return cd.Conv(data)
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,11 +1,6 @@
 | 
			
		|||
/*
 | 
			
		||||
Copyright (c) 2010, Jim Teeuwen.
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
This code is subject to a 1-clause BSD license.
 | 
			
		||||
The contents of which can be found in the LICENSE file.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
// Copyright (c) 2010, Jim Teeuwen. All rights reserved.
 | 
			
		||||
// This code is subject to a 1-clause BSD license.
 | 
			
		||||
// The contents of which can be found in the LICENSE file.
 | 
			
		||||
package xmlx
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -20,11 +15,13 @@ package xmlx
 | 
			
		|||
	"â" (â) is not the same as "Â" (Â).
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
import "os"
 | 
			
		||||
import "fmt"
 | 
			
		||||
import "utf8"
 | 
			
		||||
import "regexp"
 | 
			
		||||
import "strconv"
 | 
			
		||||
import (
 | 
			
		||||
	"os"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"utf8"
 | 
			
		||||
	"regexp"
 | 
			
		||||
	"strconv"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var reg_entnumeric = regexp.MustCompile("^&#[0-9]+;$")
 | 
			
		||||
var reg_entnamed = regexp.MustCompile("^&[a-zA-Z]+;$")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										30
									
								
								xmlx/io.go
									
										
									
									
									
								
							
							
						
						
									
										30
									
								
								xmlx/io.go
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -1,30 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
Copyright (c) 2010, Jim Teeuwen.
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
This code is subject to a 1-clause BSD license.
 | 
			
		||||
The contents of which can be found in the LICENSE file.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package xmlx
 | 
			
		||||
 | 
			
		||||
import "os"
 | 
			
		||||
import "io"
 | 
			
		||||
 | 
			
		||||
type ILoader interface {
 | 
			
		||||
	LoadUrl(string) os.Error
 | 
			
		||||
	LoadFile(string) os.Error
 | 
			
		||||
	LoadString(string) os.Error
 | 
			
		||||
	LoadStream(*io.Reader) os.Error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type ISaver interface {
 | 
			
		||||
	SaveFile(string) os.Error
 | 
			
		||||
	SaveString(string) (string, os.Error)
 | 
			
		||||
	SaveStream(*io.Writer) os.Error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type ILoaderSaver interface {
 | 
			
		||||
	ILoader
 | 
			
		||||
	ISaver
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										14
									
								
								xmlx/node.go
									
										
									
									
									
								
							
							
						
						
									
										14
									
								
								xmlx/node.go
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -8,12 +8,14 @@ The contents of which can be found in the LICENSE file.
 | 
			
		|||
 | 
			
		||||
package xmlx
 | 
			
		||||
 | 
			
		||||
import "os"
 | 
			
		||||
import "strings"
 | 
			
		||||
import "xml"
 | 
			
		||||
import "bytes"
 | 
			
		||||
import "fmt"
 | 
			
		||||
import "strconv"
 | 
			
		||||
import (
 | 
			
		||||
	"os"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"xml"
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"strconv"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	NT_ROOT = iota
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
<!DOCTYPE xml>
 | 
			
		||||
<?xml version="1.0" encoding="ISO-8859-1"?>
 | 
			
		||||
<?xml version="1.0" encoding="utf-8"?>
 | 
			
		||||
<rss version="0.91">
 | 
			
		||||
	<channel>
 | 
			
		||||
		<title>WriteTheWeb</title> 
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,7 +14,7 @@ func TestLoadLocal(t *testing.T) {
 | 
			
		|||
	doc := New()
 | 
			
		||||
 | 
			
		||||
	if err := doc.LoadFile("test.xml"); err != nil {
 | 
			
		||||
		t.Errorf("%s", err)
 | 
			
		||||
		t.Error(err.String())
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -24,11 +24,11 @@ func TestLoadLocal(t *testing.T) {
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func _TestLoadRemote(t *testing.T) {
 | 
			
		||||
func TestLoadRemote(t *testing.T) {
 | 
			
		||||
	doc := New()
 | 
			
		||||
 | 
			
		||||
	if err := doc.LoadUri("http://www.w3schools.com/xml/plant_catalog.xml"); err != nil {
 | 
			
		||||
		t.Errorf("%s", err)
 | 
			
		||||
	if err := doc.LoadUri("http://rss.cnn.com/rss/cnn_latest.rss"); err != nil {
 | 
			
		||||
		t.Error(err.String())
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue