Skip to content

Commit

Permalink
Improve m.Xml() performance by orders of magnitude
Browse files Browse the repository at this point in the history
  • Loading branch information
clbanning committed May 1, 2020
1 parent a05be54 commit e8ece66
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 61 deletions.
52 changes: 31 additions & 21 deletions anyxml.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package mxj

import (
"bytes"
"encoding/xml"
"reflect"
)
Expand Down Expand Up @@ -77,40 +78,43 @@ func AnyXml(v interface{}, tags ...string) ([]byte, error) {
}

var err error
s := new(string)
s := new(bytes.Buffer)
p := new(pretty)

var ss string
var b []byte
switch v.(type) {
case []interface{}:
ss = "<" + rt + ">"
if _, err = s.WriteString("<" + rt + ">"); err != nil {
return nil, err
}
for _, vv := range v.([]interface{}) {
switch vv.(type) {
case map[string]interface{}:
m := vv.(map[string]interface{})
if len(m) == 1 {
for tag, val := range m {
err = mapToXmlIndent(false, s, tag, val, p)
err = marshalMapToXmlIndent(false, s, tag, val, p)
}
} else {
err = mapToXmlIndent(false, s, et, vv, p)
err = marshalMapToXmlIndent(false, s, et, vv, p)
}
default:
err = mapToXmlIndent(false, s, et, vv, p)
err = marshalMapToXmlIndent(false, s, et, vv, p)
}
if err != nil {
break
}
}
ss += *s + "</" + rt + ">"
b = []byte(ss)
if _, err = s.WriteString("</" + rt + ">"); err != nil {
return nil, err
}
b = s.Bytes()
case map[string]interface{}:
m := Map(v.(map[string]interface{}))
b, err = m.Xml(rt)
default:
err = mapToXmlIndent(false, s, rt, v, p)
b = []byte(*s)
err = marshalMapToXmlIndent(false, s, rt, v, p)
b = s.Bytes()
}

return b, err
Expand Down Expand Up @@ -143,46 +147,52 @@ func AnyXmlIndent(v interface{}, prefix, indent string, tags ...string) ([]byte,
}

var err error
s := new(string)
s := new(bytes.Buffer)
p := new(pretty)
p.indent = indent
p.padding = prefix

var ss string
var b []byte
switch v.(type) {
case []interface{}:
ss = "<" + rt + ">\n"
if _, err = s.WriteString("<" + rt + ">\n"); err != nil {
return nil, err
}
p.Indent()
for _, vv := range v.([]interface{}) {
switch vv.(type) {
case map[string]interface{}:
m := vv.(map[string]interface{})
if len(m) == 1 {
for tag, val := range m {
err = mapToXmlIndent(true, s, tag, val, p)
err = marshalMapToXmlIndent(true, s, tag, val, p)
}
} else {
p.start = 1 // we 1 tag in
err = mapToXmlIndent(true, s, et, vv, p)
*s += "\n"
err = marshalMapToXmlIndent(true, s, et, vv, p)
// *s += "\n"
if _, err = s.WriteString("\n"); err != nil {
return nil, err
}
}
default:
p.start = 0 // in case trailing p.start = 1
err = mapToXmlIndent(true, s, et, vv, p)
err = marshalMapToXmlIndent(true, s, et, vv, p)
}
if err != nil {
break
}
}
ss += *s + "</" + rt + ">"
b = []byte(ss)
if _, err = s.WriteString(`</` + rt + `>`); err != nil {
return nil, err
}
b = s.Bytes()
case map[string]interface{}:
m := Map(v.(map[string]interface{}))
b, err = m.XmlIndent(prefix, indent, rt)
default:
err = mapToXmlIndent(true, s, rt, v, p)
b = []byte(*s)
err = marshalMapToXmlIndent(true, s, rt, v, p)
b = s.Bytes()
}

return b, err
Expand Down
10 changes: 10 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ mxj supplants the legacy x2j and j2x packages. If you want the old syntax, use m

https://github.com/clbanning/checkxml provides functions for validating XML data.

<h4>Refactor Encoder - 2020.05.01</h4>
Issue #70 highlighted that encoding large maps does not scale well, since the original logic used string appends operations. Using bytes.Buffer results in linear scaling for very large XML docs. (Metrics based on MacBook Pro i7 w/ 16 GB.)

Nodes m.XML() time
54809 12.53708ms
109780 32.403183ms
164678 59.826412ms
482598 109.358007ms

<h4>Refactor Decoder - 2015.11.15</h4>
For over a year I've wanted to refactor the XML-to-map[string]interface{} decoder to make it more performant. I recently took the time to do that, since we were using github.com/clbanning/mxj in a production system that could be deployed on a Raspberry Pi. Now the decoder is comparable to the stdlib JSON-to-map[string]interface{} decoder in terms of its additional processing overhead relative to decoding to a structure value. As shown by:

Expand All @@ -21,6 +30,7 @@ For over a year I've wanted to refactor the XML-to-map[string]interface{} decode

<h4>Notices</h4>

2020.05.01: v2.2 - optimize map to XML encoding for large XML docs.
2019.07.04: v2.0 - remove unnecessary methods - mv.XmlWriterRaw, mv.XmlIndentWriterRaw - for Map and MapSeq.
2019.07.04: Add MapSeq type and move associated functions and methods from Map to MapSeq.
2019.01.21: DecodeSimpleValuesAsMap - decode to map[<tag>:map["#text":<value>]] rather than map[<tag>:<value>]
Expand Down
Loading

0 comments on commit e8ece66

Please sign in to comment.