Fix bug in document.correctEncoding() which failed to properly parse xml doctypes.
This commit is contained in:
		
							parent
							
								
									9b53d04d9d
								
							
						
					
					
						commit
						c271c20e08
					
				
					 3 changed files with 22 additions and 11 deletions
				
			
		
							
								
								
									
										9
									
								
								README
									
										
									
									
									
								
							
							
						
						
									
										9
									
								
								README
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -20,10 +20,11 @@
 | 
			
		|||
 | 
			
		||||
 Getting the package up and running is simple enough:
 | 
			
		||||
 
 | 
			
		||||
 $ cd /path/to/code/dir
 | 
			
		||||
 $ git clone http://github.com/jteeuwen/go-pkg-xmlx.git
 | 
			
		||||
 $ cd go-pkg-xmlx
 | 
			
		||||
 $ make
 | 
			
		||||
    $ goinstall github.com/jteeuwen/go-pkg-xmlx
 | 
			
		||||
 | 
			
		||||
 Using it:
 | 
			
		||||
 | 
			
		||||
    import xmlx "github.com/jteeuwen/go-pkg-xmlx"
 | 
			
		||||
 | 
			
		||||
================================================================================
 | 
			
		||||
 API
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										22
									
								
								document.go
									
										
									
									
									
								
							
							
						
						
									
										22
									
								
								document.go
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -240,24 +240,34 @@ loop:
 | 
			
		|||
			if err == os.EOF {
 | 
			
		||||
				break loop
 | 
			
		||||
			}
 | 
			
		||||
			
 | 
			
		||||
			return "", err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		switch tt := tok.(type) {
 | 
			
		||||
		case xml.ProcInst:
 | 
			
		||||
			if tt.Target == "xml" { // xml doctype
 | 
			
		||||
				enc = strings.ToLower(string(tt.Inst))
 | 
			
		||||
				if i := strings.Index(enc, `encoding="`); i > -1 {
 | 
			
		||||
					enc = enc[i+len(`encoding="`):]
 | 
			
		||||
					i = strings.Index(enc, `"`)
 | 
			
		||||
					enc = enc[:i]
 | 
			
		||||
					break loop
 | 
			
		||||
				var pair []string
 | 
			
		||||
				var entry string
 | 
			
		||||
 | 
			
		||||
				list := strings.Split(string(tt.Inst), " ", -1)
 | 
			
		||||
				for _, entry = range list {
 | 
			
		||||
					if pair = strings.Split(entry, "=", -1); len(pair) < 2 {
 | 
			
		||||
						continue
 | 
			
		||||
					}
 | 
			
		||||
 | 
			
		||||
					switch pair[0] {
 | 
			
		||||
					case "encoding":
 | 
			
		||||
						enc = pair[1][1:len(pair[1])-1]
 | 
			
		||||
						break loop
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if enc == "utf-8" {
 | 
			
		||||
		// Data already in utf-8 format. Nothing to do here.
 | 
			
		||||
		return data, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -27,7 +27,7 @@ func TestLoadLocal(t *testing.T) {
 | 
			
		|||
func TestLoadRemote(t *testing.T) {
 | 
			
		||||
	doc := New()
 | 
			
		||||
 | 
			
		||||
	if err := doc.LoadUri("http://rss.cnn.com/rss/cnn_latest.rss"); err != nil {
 | 
			
		||||
	if err := doc.LoadUri("http://blog.golang.org/feeds/posts/default"); err != nil {
 | 
			
		||||
		t.Error(err.String())
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue