Merge pull request #14 from ThomsonReutersEikon/master

Implement text nodes / content escaping
This commit is contained in:
jimt 2013-09-11 05:28:50 -07:00
commit d5a758279d
4 changed files with 57 additions and 39 deletions

View File

@ -117,7 +117,9 @@ func (this *Document) LoadStream(r io.Reader, charset CharsetFunc) (err error) {
case xml.SyntaxError: case xml.SyntaxError:
return errors.New(tt.Error()) return errors.New(tt.Error())
case xml.CharData: case xml.CharData:
ct.Value = ct.Value + strings.TrimSpace(string([]byte(tt))) t := NewNode(NT_TEXT)
t.Value = string([]byte(tt))
ct.AddChild(t)
case xml.Comment: case xml.Comment:
t := NewNode(NT_COMMENT) t := NewNode(NT_COMMENT)
t.Value = strings.TrimSpace(string([]byte(tt))) t.Value = strings.TrimSpace(string([]byte(tt)))

60
node.go
View File

@ -9,7 +9,6 @@ import (
"encoding/xml" "encoding/xml"
"fmt" "fmt"
"strconv" "strconv"
"strings"
) )
const ( const (
@ -17,6 +16,7 @@ const (
NT_DIRECTIVE NT_DIRECTIVE
NT_PROCINST NT_PROCINST
NT_COMMENT NT_COMMENT
NT_TEXT
NT_ELEMENT NT_ELEMENT
) )
@ -51,7 +51,7 @@ func NewNode(tid byte) *Node {
// This wraps the standard xml.Unmarshal function and supplies this particular // This wraps the standard xml.Unmarshal function and supplies this particular
// node as the content to be unmarshalled. // node as the content to be unmarshalled.
func (this *Node) Unmarshal(obj interface{}) error { func (this *Node) Unmarshal(obj interface{}) error {
return xml.NewDecoder(bytes.NewBuffer(this.bytes(0))).Decode(obj) return xml.NewDecoder(bytes.NewBuffer(this.bytes())).Decode(obj)
} }
// Get node value as string // Get node value as string
@ -433,20 +433,22 @@ func (this *Node) SetAttr(name, value string) {
// Note that NT_ROOT is a special-case empty node used as the root for a // Note that NT_ROOT is a special-case empty node used as the root for a
// Document. This one has no representation by itself. It merely forwards the // Document. This one has no representation by itself. It merely forwards the
// String() call to it's child nodes. // String() call to it's child nodes.
func (this *Node) Bytes() []byte { return this.bytes(0) } func (this *Node) Bytes() []byte { return this.bytes() }
func (this *Node) bytes(indent int) (b []byte) { func (this *Node) bytes() (b []byte) {
switch this.Type { switch this.Type {
case NT_PROCINST: case NT_PROCINST:
b = this.printProcInst(indent) b = this.printProcInst()
case NT_COMMENT: case NT_COMMENT:
b = this.printComment(indent) b = this.printComment()
case NT_DIRECTIVE: case NT_DIRECTIVE:
b = this.printDirective(indent) b = this.printDirective()
case NT_ELEMENT: case NT_ELEMENT:
b = this.printElement(indent) b = this.printElement()
case NT_TEXT:
b = this.printText()
case NT_ROOT: case NT_ROOT:
b = this.printRoot(indent) b = this.printRoot()
} }
return return
} }
@ -456,38 +458,42 @@ func (this *Node) bytes(indent int) (b []byte) {
// Document. This one has no representation by itself. It merely forwards the // Document. This one has no representation by itself. It merely forwards the
// String() call to it's child nodes. // String() call to it's child nodes.
func (this *Node) String() (s string) { func (this *Node) String() (s string) {
return string(this.bytes(0)) return string(this.bytes())
} }
func (this *Node) printRoot(indent int) []byte { func (this *Node) printRoot() []byte {
var b bytes.Buffer var b bytes.Buffer
for _, v := range this.Children { for _, v := range this.Children {
b.Write(v.bytes(indent)) b.Write(v.bytes())
} }
return b.Bytes() return b.Bytes()
} }
func (this *Node) printProcInst(indent int) []byte { func (this *Node) printProcInst() []byte {
return []byte("<?" + this.Target + " " + this.Value + "?>") return []byte("<?" + this.Target + " " + this.Value + "?>")
} }
func (this *Node) printComment(indent int) []byte { func (this *Node) printComment() []byte {
return []byte("<!-- " + this.Value + " -->") return []byte("<!-- " + this.Value + " -->")
} }
func (this *Node) printDirective(indent int) []byte { func (this *Node) printDirective() []byte {
return []byte("<!" + this.Value + "!>") return []byte("<!" + this.Value + "!>")
} }
func (this *Node) printElement(indent int) []byte { func (this *Node) printText() []byte {
val := []byte(this.Value)
if len(this.Parent.Children) > 1 {
return val
}
var b bytes.Buffer var b bytes.Buffer
xml.EscapeText(&b, val)
lineSuffix, linePrefix := "", strings.Repeat(IndentPrefix, indent) return b.Bytes()
if len(IndentPrefix) > 0 {
lineSuffix = "\n"
} }
b.WriteString(linePrefix) func (this *Node) printElement() []byte {
var b bytes.Buffer
if len(this.Name.Space) > 0 { if len(this.Name.Space) > 0 {
b.WriteRune('<') b.WriteRune('<')
b.WriteString(this.Name.Space) b.WriteString(this.Name.Space)
@ -509,23 +515,16 @@ func (this *Node) printElement(indent int) []byte {
if len(this.Children) == 0 && len(this.Value) == 0 { if len(this.Children) == 0 && len(this.Value) == 0 {
b.WriteString(" />") b.WriteString(" />")
b.WriteString(lineSuffix)
return b.Bytes() return b.Bytes()
} }
b.WriteRune('>') b.WriteRune('>')
if len(this.Value) == 0 {
b.WriteString(lineSuffix)
}
for _, v := range this.Children { for _, v := range this.Children {
b.Write(v.bytes(indent + 1)) b.Write(v.bytes())
} }
b.WriteString(this.Value) xml.EscapeText(&b, []byte(this.Value))
if len(this.Value) == 0 {
b.WriteString(linePrefix)
}
if len(this.Name.Space) > 0 { if len(this.Name.Space) > 0 {
b.WriteString("</") b.WriteString("</")
b.WriteString(this.Name.Space) b.WriteString(this.Name.Space)
@ -537,7 +536,6 @@ func (this *Node) printElement(indent int) []byte {
b.WriteString(this.Name.Local) b.WriteString(this.Name.Local)
b.WriteRune('>') b.WriteRune('>')
} }
b.WriteString(lineSuffix)
return b.Bytes() return b.Bytes()
} }

1
test4.xml Normal file
View File

@ -0,0 +1 @@
<body>  &lt;https://example.com/file/fm/SU0vRk0xLzIwMTMwOTEwLzA1MDA0MS5ybXdhdGVzdEByZXV0ZXJzLmNvbTEzNzg4NDU1OTk4OTA/Screen%20Shot%202013-09-10%20at%2021.33.54.png&gt; File Attachment:-Screen Shot 2013-09-10 at 21.33.54.png  </body>

View File

@ -137,7 +137,7 @@ func TestUnmarshal(t *testing.T) {
} }
} }
func TestString(t *testing.T) { func TestStringNamespaces(t *testing.T) {
doc := New() doc := New()
err := doc.LoadFile("test3.xml", nil) err := doc.LoadFile("test3.xml", nil)
@ -149,7 +149,7 @@ func TestString(t *testing.T) {
expected := `<root xmlns:foo="http:/example.org/foo"> expected := `<root xmlns:foo="http:/example.org/foo">
<child foo:bar="1"> <child foo:bar="1">
<grandchild xmlns:foo=""> <grandchild xmlns:foo="">
<great-grandchild bar="2" /> <great-grandchild bar="2">&#xA; </great-grandchild>
</grandchild> </grandchild>
</child> </child>
</root> </root>
@ -159,3 +159,20 @@ func TestString(t *testing.T) {
t.Fatalf("expected: %s\ngot: %s\n", expected, got) t.Fatalf("expected: %s\ngot: %s\n", expected, got)
} }
} }
func TestStringEscaping(t *testing.T) {
doc := New()
err := doc.LoadFile("test4.xml", nil)
if err != nil {
t.Errorf("LoadFile(): %s", err)
return
}
expected := `<body>  &lt;https://example.com/file/fm/SU0vRk0xLzIwMTMwOTEwLzA1MDA0MS5ybXdhdGVzdEByZXV0ZXJzLmNvbTEzNzg4NDU1OTk4OTA/Screen%20Shot%202013-09-10%20at%2021.33.54.png&gt; File Attachment:-Screen Shot 2013-09-10 at 21.33.54.png  </body>
`
if got := doc.Root.String(); got != expected {
t.Fatalf("expected: %s\ngot: %s\n", expected, got)
}
}