modified: src/Makefile

modified:   src/document.go
	new file:   src/entitymap.go
This commit is contained in:
jim teeuwen 2009-11-23 18:28:44 +01:00
parent e0fdc48cf7
commit 4857865daf
3 changed files with 280 additions and 1 deletions

View File

@ -2,7 +2,7 @@
include $(GOROOT)/src/Make.$(GOARCH)
TARG=xmlx
GOFILES=document.go node.go io.go\
GOFILES=document.go node.go io.go entitymap.go\
include $(GOROOT)/src/Make.pkg

View File

@ -68,6 +68,15 @@ func New() *Document {
}
}
// This loads a rather massive table of non-conventional xml escape sequences.
// Needed to make the parser map them to characters properly. It is advised to
// set only those entities needed manually using the document.Entity map, but
// if need be, this method can be called to fill the map with the entire set
// defined on http://www.w3.org/TR/html4/sgml/entities.html
func (this *Document) LoadExtendedEntityMap() {
entitymap_load(&this.Entity);
}
func (this *Document) String() string {
s, _ := this.SaveString();
return s;

270
src/entitymap.go Normal file
View File

@ -0,0 +1,270 @@
package xmlx
/*
Portions © International Organization for Standardization 1986
Permission to copy in any form is granted for use with
conforming SGML systems and applications as defined in
ISO 8879, provided this notice is included in all copies.
*/
func entitymap_load(em *map[string]string) {
// Generic entities
(*em)["nbsp"] = " ";
(*em)["iexcl"] = "¡";
(*em)["cent"] = "¢";
(*em)["pound"] = "£";
(*em)["curren"] = "¤";
(*em)["yen"] = "¥";
(*em)["brvbar"] = "¦";
(*em)["sect"] = "§";
(*em)["uml"] = "¨";
(*em)["copy"] = "©";
(*em)["ordf"] = "ª";
(*em)["laquo"] = "«";
(*em)["not"] = "¬";
(*em)["shy"] = "­";
(*em)["reg"] = "®";
(*em)["macr"] = "¯";
(*em)["deg"] = "°";
(*em)["plusmn"] = "±";
(*em)["sup2"] = "²";
(*em)["sup3"] = "³";
(*em)["acute"] = "´";
(*em)["micro"] = "µ";
(*em)["para"] = "¶";
(*em)["middot"] = "·";
(*em)["cedil"] = "¸";
(*em)["sup1"] = "¹";
(*em)["ordm"] = "º";
(*em)["raquo"] = "»";
(*em)["frac14"] = "¼";
(*em)["frac12"] = "½";
(*em)["frac34"] = "¾";
(*em)["iquest"] = "¿";
(*em)["Agrave"] = "À";
(*em)["Aacute"] = "Á";
(*em)["Acirc"] = "Â";
(*em)["Atilde"] = "Ã";
(*em)["Auml"] = "Ä";
(*em)["Aring"] = "Å";
(*em)["AElig"] = "Æ";
(*em)["Ccedil"] = "Ç";
(*em)["Egrave"] = "È";
(*em)["Eacute"] = "É";
(*em)["Ecirc"] = "Ê";
(*em)["Euml"] = "Ë";
(*em)["Igrave"] = "Ì";
(*em)["Iacute"] = "Í";
(*em)["Icirc"] = "Î";
(*em)["Iuml"] = "Ï";
(*em)["ETH"] = "Ð";
(*em)["Ntilde"] = "Ñ";
(*em)["Ograve"] = "Ò";
(*em)["Oacute"] = "Ó";
(*em)["Ocirc"] = "Ô";
(*em)["Otilde"] = "Õ";
(*em)["Ouml"] = "Ö";
(*em)["times"] = "×";
(*em)["Oslash"] = "Ø";
(*em)["Ugrave"] = "Ù";
(*em)["Uacute"] = "Ú";
(*em)["Ucirc"] = "Û";
(*em)["Uuml"] = "Ü";
(*em)["Yacute"] = "Ý";
(*em)["THORN"] = "Þ";
(*em)["szlig"] = "ß";
(*em)["agrave"] = "à";
(*em)["aacute"] = "á";
(*em)["acirc"] = "â";
(*em)["atilde"] = "ã";
(*em)["auml"] = "ä";
(*em)["aring"] = "å";
(*em)["aelig"] = "æ";
(*em)["ccedil"] = "ç";
(*em)["egrave"] = "è";
(*em)["eacute"] = "é";
(*em)["ecirc"] = "ê";
(*em)["euml"] = "ë";
(*em)["igrave"] = "ì";
(*em)["iacute"] = "í";
(*em)["icirc"] = "î";
(*em)["iuml"] = "ï";
(*em)["eth"] = "ð";
(*em)["ntilde"] = "ñ";
(*em)["ograve"] = "ò";
(*em)["oacute"] = "ó";
(*em)["ocirc"] = "ô";
(*em)["otilde"] = "õ";
(*em)["ouml"] = "ö";
(*em)["divide"] = "÷";
(*em)["oslash"] = "ø";
(*em)["ugrave"] = "ù";
(*em)["uacute"] = "ú";
(*em)["ucirc"] = "û";
(*em)["uuml"] = "ü";
(*em)["yacute"] = "ý";
(*em)["thorn"] = "þ";
(*em)["yuml"] = "ÿ";
// Mathematical, Greek and Symbolic characters for HTML
(*em)["fnof"] = "ƒ";
(*em)["Alpha"] = "Α";
(*em)["Beta"] = "Β";
(*em)["Gamma"] = "Γ";
(*em)["Delta"] = "Δ";
(*em)["Epsilon"] = "Ε";
(*em)["Zeta"] = "Ζ";
(*em)["Eta"] = "Η";
(*em)["Theta"] = "Θ";
(*em)["Iota"] = "Ι";
(*em)["Kappa"] = "Κ";
(*em)["Lambda"] = "Λ";
(*em)["Mu"] = "Μ";
(*em)["Nu"] = "Ν";
(*em)["Xi"] = "Ξ";
(*em)["Omicron"] = "Ο";
(*em)["Pi"] = "Π";
(*em)["Rho"] = "Ρ";
(*em)["Sigma"] = "Σ";
(*em)["Tau"] = "Τ";
(*em)["Upsilon"] = "Υ";
(*em)["Phi"] = "Φ";
(*em)["Chi"] = "Χ";
(*em)["Psi"] = "Ψ";
(*em)["Omega"] = "Ω";
(*em)["alpha"] = "α";
(*em)["beta"] = "β";
(*em)["gamma"] = "γ";
(*em)["delta"] = "δ";
(*em)["epsilon"] = "ε";
(*em)["zeta"] = "ζ";
(*em)["eta"] = "η";
(*em)["theta"] = "θ";
(*em)["iota"] = "ι";
(*em)["kappa"] = "κ";
(*em)["lambda"] = "λ";
(*em)["mu"] = "μ";
(*em)["nu"] = "ν";
(*em)["xi"] = "ξ";
(*em)["omicron"] = "ο";
(*em)["pi"] = "π";
(*em)["rho"] = "ρ";
(*em)["sigmaf"] = "ς";
(*em)["sigma"] = "σ";
(*em)["tau"] = "τ";
(*em)["upsilon"] = "υ";
(*em)["phi"] = "φ";
(*em)["chi"] = "χ";
(*em)["psi"] = "ψ";
(*em)["omega"] = "ω";
(*em)["thetasym"] = "ϑ";
(*em)["upsih"] = "ϒ";
(*em)["piv"] = "ϖ";
(*em)["bull"] = "•";
(*em)["hellip"] = "…";
(*em)["prime"] = "′";
(*em)["Prime"] = "″";
(*em)["oline"] = "‾";
(*em)["frasl"] = "⁄";
(*em)["weierp"] = "℘";
(*em)["image"] = "ℑ";
(*em)["real"] = "ℜ";
(*em)["trade"] = "™";
(*em)["alefsym"] = "ℵ";
(*em)["larr"] = "←";
(*em)["uarr"] = "↑";
(*em)["rarr"] = "→";
(*em)["darr"] = "↓";
(*em)["harr"] = "↔";
(*em)["crarr"] = "↵";
(*em)["lArr"] = "⇐";
(*em)["uArr"] = "⇑";
(*em)["rArr"] = "⇒";
(*em)["dArr"] = "⇓";
(*em)["hArr"] = "⇔";
(*em)["forall"] = "∀";
(*em)["part"] = "∂";
(*em)["exist"] = "∃";
(*em)["empty"] = "∅";
(*em)["nabla"] = "∇";
(*em)["isin"] = "∈";
(*em)["notin"] = "∉";
(*em)["ni"] = "∋";
(*em)["prod"] = "∏";
(*em)["sum"] = "∑";
(*em)["minus"] = "−";
(*em)["lowast"] = "∗";
(*em)["radic"] = "√";
(*em)["prop"] = "∝";
(*em)["infin"] = "∞";
(*em)["ang"] = "∠";
(*em)["and"] = "∧";
(*em)["or"] = "∨";
(*em)["cap"] = "∩";
(*em)["cup"] = "∪";
(*em)["int"] = "∫";
(*em)["there4"] = "∴";
(*em)["sim"] = "∼";
(*em)["cong"] = "≅";
(*em)["asymp"] = "≈";
(*em)["ne"] = "≠";
(*em)["equiv"] = "≡";
(*em)["le"] = "≤";
(*em)["ge"] = "≥";
(*em)["sub"] = "⊂";
(*em)["sup"] = "⊃";
(*em)["nsub"] = "⊄";
(*em)["sube"] = "⊆";
(*em)["supe"] = "⊇";
(*em)["oplus"] = "⊕";
(*em)["otimes"] = "⊗";
(*em)["perp"] = "⊥";
(*em)["sdot"] = "⋅";
(*em)["lceil"] = "⌈";
(*em)["rceil"] = "⌉";
(*em)["lfloor"] = "⌊";
(*em)["rfloor"] = "⌋";
(*em)["lang"] = "〈";
(*em)["rang"] = "〉";
(*em)["loz"] = "◊";
(*em)["spades"] = "♠";
(*em)["clubs"] = "♣";
(*em)["hearts"] = "♥";
(*em)["diams"] = "♦";
// Special characters for HTML
(*em)["quot"] = """;
(*em)["amp"] = "&";
(*em)["lt"] = "<";
(*em)["gt"] = ">";
(*em)["OElig"] = "Œ";
(*em)["oelig"] = "œ";
(*em)["Scaron"] = "Š";
(*em)["scaron"] = "š";
(*em)["Yuml"] = "Ÿ";
(*em)["circ"] = "ˆ";
(*em)["tilde"] = "˜";
(*em)["ensp"] = " ";
(*em)["emsp"] = " ";
(*em)["thinsp"] = " ";
(*em)["zwnj"] = "‌";
(*em)["zwj"] = "‍";
(*em)["lrm"] = "‎";
(*em)["rlm"] = "‏";
(*em)["ndash"] = "–";
(*em)["mdash"] = "—";
(*em)["lsquo"] = "‘";
(*em)["rsquo"] = "’";
(*em)["sbquo"] = "‚";
(*em)["ldquo"] = "“";
(*em)["rdquo"] = "”";
(*em)["bdquo"] = "„";
(*em)["dagger"] = "†";
(*em)["Dagger"] = "‡";
(*em)["permil"] = "‰";
(*em)["lsaquo"] = "‹";
(*em)["rsaquo"] = "›";
(*em)["euro"] = "€";
}