diff --git a/document.go b/document.go index 4e2fbd8..a718fc8 100644 --- a/document.go +++ b/document.go @@ -117,7 +117,9 @@ func (this *Document) LoadStream(r io.Reader, charset CharsetFunc) (err error) { case xml.SyntaxError: return errors.New(tt.Error()) case xml.CharData: - ct.Value = ct.Value + strings.TrimSpace(string([]byte(tt))) + t := NewNode(NT_TEXT) + t.Value = string([]byte(tt)) + ct.AddChild(t) case xml.Comment: t := NewNode(NT_COMMENT) t.Value = strings.TrimSpace(string([]byte(tt))) diff --git a/node.go b/node.go index 18fb9ee..5327b39 100644 --- a/node.go +++ b/node.go @@ -9,7 +9,6 @@ import ( "encoding/xml" "fmt" "strconv" - "strings" ) const ( @@ -17,6 +16,7 @@ const ( NT_DIRECTIVE NT_PROCINST NT_COMMENT + NT_TEXT NT_ELEMENT ) @@ -51,7 +51,7 @@ func NewNode(tid byte) *Node { // This wraps the standard xml.Unmarshal function and supplies this particular // node as the content to be unmarshalled. func (this *Node) Unmarshal(obj interface{}) error { - return xml.NewDecoder(bytes.NewBuffer(this.bytes(0))).Decode(obj) + return xml.NewDecoder(bytes.NewBuffer(this.bytes())).Decode(obj) } // Get node value as string @@ -433,20 +433,22 @@ func (this *Node) SetAttr(name, value string) { // Note that NT_ROOT is a special-case empty node used as the root for a // Document. This one has no representation by itself. It merely forwards the // String() call to it's child nodes. -func (this *Node) Bytes() []byte { return this.bytes(0) } +func (this *Node) Bytes() []byte { return this.bytes() } -func (this *Node) bytes(indent int) (b []byte) { +func (this *Node) bytes() (b []byte) { switch this.Type { case NT_PROCINST: - b = this.printProcInst(indent) + b = this.printProcInst() case NT_COMMENT: - b = this.printComment(indent) + b = this.printComment() case NT_DIRECTIVE: - b = this.printDirective(indent) + b = this.printDirective() case NT_ELEMENT: - b = this.printElement(indent) + b = this.printElement() + case NT_TEXT: + b = this.printText() case NT_ROOT: - b = this.printRoot(indent) + b = this.printRoot() } return } @@ -456,38 +458,42 @@ func (this *Node) bytes(indent int) (b []byte) { // Document. This one has no representation by itself. It merely forwards the // String() call to it's child nodes. func (this *Node) String() (s string) { - return string(this.bytes(0)) + return string(this.bytes()) } -func (this *Node) printRoot(indent int) []byte { +func (this *Node) printRoot() []byte { var b bytes.Buffer for _, v := range this.Children { - b.Write(v.bytes(indent)) + b.Write(v.bytes()) } return b.Bytes() } -func (this *Node) printProcInst(indent int) []byte { +func (this *Node) printProcInst() []byte { return []byte("") } -func (this *Node) printComment(indent int) []byte { +func (this *Node) printComment() []byte { return []byte("") } -func (this *Node) printDirective(indent int) []byte { +func (this *Node) printDirective() []byte { return []byte("") } -func (this *Node) printElement(indent int) []byte { +func (this *Node) printText() []byte { + val := []byte(this.Value) + if len(this.Parent.Children) > 1 { + return val + } + var b bytes.Buffer + xml.EscapeText(&b, val) + return b.Bytes() +} + +func (this *Node) printElement() []byte { var b bytes.Buffer - lineSuffix, linePrefix := "", strings.Repeat(IndentPrefix, indent) - if len(IndentPrefix) > 0 { - lineSuffix = "\n" - } - - b.WriteString(linePrefix) if len(this.Name.Space) > 0 { b.WriteRune('<') b.WriteString(this.Name.Space) @@ -509,23 +515,16 @@ func (this *Node) printElement(indent int) []byte { if len(this.Children) == 0 && len(this.Value) == 0 { b.WriteString(" />") - b.WriteString(lineSuffix) return b.Bytes() } b.WriteRune('>') - if len(this.Value) == 0 { - b.WriteString(lineSuffix) - } for _, v := range this.Children { - b.Write(v.bytes(indent + 1)) + b.Write(v.bytes()) } - b.WriteString(this.Value) - if len(this.Value) == 0 { - b.WriteString(linePrefix) - } + xml.EscapeText(&b, []byte(this.Value)) if len(this.Name.Space) > 0 { b.WriteString("') } - b.WriteString(lineSuffix) return b.Bytes() } diff --git a/test4.xml b/test4.xml new file mode 100644 index 0000000..cc32270 --- /dev/null +++ b/test4.xml @@ -0,0 +1 @@ +  <https://example.com/file/fm/SU0vRk0xLzIwMTMwOTEwLzA1MDA0MS5ybXdhdGVzdEByZXV0ZXJzLmNvbTEzNzg4NDU1OTk4OTA/Screen%20Shot%202013-09-10%20at%2021.33.54.png> File Attachment:-Screen Shot 2013-09-10 at 21.33.54.png  diff --git a/xmlx_test.go b/xmlx_test.go index 44a2ac8..5dd8b82 100644 --- a/xmlx_test.go +++ b/xmlx_test.go @@ -137,7 +137,7 @@ func TestUnmarshal(t *testing.T) { } } -func TestString(t *testing.T) { +func TestStringNamespaces(t *testing.T) { doc := New() err := doc.LoadFile("test3.xml", nil) @@ -147,11 +147,11 @@ func TestString(t *testing.T) { } expected := ` - - - - - + + + + + ` @@ -159,3 +159,20 @@ func TestString(t *testing.T) { t.Fatalf("expected: %s\ngot: %s\n", expected, got) } } + +func TestStringEscaping(t *testing.T) { + doc := New() + err := doc.LoadFile("test4.xml", nil) + + if err != nil { + t.Errorf("LoadFile(): %s", err) + return + } + + expected := `  <https://example.com/file/fm/SU0vRk0xLzIwMTMwOTEwLzA1MDA0MS5ybXdhdGVzdEByZXV0ZXJzLmNvbTEzNzg4NDU1OTk4OTA/Screen%20Shot%202013-09-10%20at%2021.33.54.png> File Attachment:-Screen Shot 2013-09-10 at 21.33.54.png  +` + + if got := doc.Root.String(); got != expected { + t.Fatalf("expected: %s\ngot: %s\n", expected, got) + } +}