normalize.js 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. (function(){
  2. $.normalize = function(html, customOpt){
  3. var html, root$, wrapped, opt;
  4. opt = html.opt || (customOpt ? prepareOptTags($.extend(true, baseOpt, customOpt))
  5. : defaultOpt);
  6. if(!html.opt){
  7. // first call
  8. unstableList.length = 0; // drop state from previous run (in case there has been error)
  9. root$ = $('<div>'+html+'</div>');
  10. }
  11. // initial recursion
  12. (html.$ || root$).contents().each(function(){
  13. if(this.nodeType === this.TEXT_NODE) {
  14. this.textContent = this.textContent.replace(/^[ \n]+|[ \n]+$/g, ' ');
  15. return;
  16. }
  17. var a = {$: $(this), opt: opt};
  18. initTag(a);
  19. $.normalize(a);
  20. });
  21. if(root$){
  22. stateMachine();
  23. return root$.html();
  24. }
  25. }
  26. var baseOpt = {
  27. hierarchy: ['div', 'pre', 'ol', 'ul', 'li',
  28. 'h1', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'a', // block
  29. 'b', 'code', 'i', 'span', 's', 'sub', 'sup', 'u', // inline
  30. 'br', 'img'] // empty
  31. ,tags: {
  32. 'a': {attrs:{'href':1}, exclude:{'a':1}},
  33. 'b': {exclude:{'b':1,'p':1}},
  34. 'br': {empty: 1},
  35. 'i': {exclude:{'i':1,'p':1}},
  36. 'img': {attrs:{'src':1}, empty: 1},
  37. 'span': {exclude:{'p':1,'ul':1,'ol':1,'li':1,'br':1}},
  38. 's': {space:1},
  39. 'u': {exclude:{'u':1,'p':1},space:1},
  40. }
  41. ,convert: {
  42. 'em': 'i', 'strong': 'b', 'strike': 's',
  43. }
  44. ,attrs: {
  45. 'id':1
  46. ,'class':1
  47. ,'style':1
  48. }
  49. ,blockTag: function(a){
  50. return a.opt.tags[a.tag].order < a.opt.tags.a.order;
  51. }
  52. ,mutate: [exclude, moveSpaceUp, next, parentOrderWrap]
  53. }
  54. var defaultOpt = prepareOptTags($.extend(true, {}, baseOpt));
  55. var unstableList = [];
  56. function addUnstable(a) { // NOT ES5
  57. if(!a.tag) { throw Error("not tag in ", a) }
  58. if(a.unstable) return;
  59. unstableList.push(a);
  60. a.unstable = true;
  61. }
  62. function initTag(a) {
  63. // initial handling (container, convert, attributes):
  64. a.tag = tag(a.$);
  65. if(empty(a)) {
  66. return;
  67. }
  68. parseAndRemoveAttrs(a);
  69. convert(a);
  70. setAttrs(a);
  71. a.$[0].a = a; // link from dom element back to a
  72. // state machine init
  73. unstableList.push(a);
  74. a.unstable = true;
  75. return a;
  76. }
  77. function stateMachine() {
  78. if(unstableList.length===0)
  79. return;
  80. var a, i = -1;
  81. while (a = unstableList.pop()) { // PERF: running index is probably faster than shift (mutates array)
  82. a.unstable = false;
  83. $(a.opt.mutate).each(function(i,fn){
  84. return fn && fn(a, addUnstable);
  85. });
  86. }
  87. }
  88. function prepareOptTags(opt) {
  89. var name, tag, tags = opt.tags;
  90. for(name in tags) {
  91. if(opt.hierarchy.indexOf(name)===-1)
  92. throw Error('tag "'+name+'" is missing hierachy definition');
  93. }
  94. opt.hierarchy.forEach(function(name){
  95. if(!tags[name]){
  96. tags[name] = {attrs: opt.attrs};
  97. }
  98. (tag=tags[name]).attrs = $.extend(tag.attrs||{}, opt.attrs);
  99. tag.name = name; // not used, debug help (REMOVE later?)
  100. // order
  101. tag.order = opt.hierarchy.indexOf(name)
  102. if(tag.order === -1) {
  103. throw Error("Order of '"+name+"' not defined in hierarchy");
  104. }
  105. });
  106. return opt;
  107. }
  108. // GENERAL UTILS
  109. function get(o, args){ // path argments as separate string parameters
  110. if(typeof args === 'string')
  111. return o[args[0]];
  112. var i = 0, l = args.length, u;
  113. while((o = o[args[i++]]) != null && i < l){};
  114. return i < l ? u : o;
  115. }
  116. function has(obj,prop){
  117. return Object.prototype.hasOwnProperty.call(obj, prop);
  118. }
  119. // ELEMENT UTILS
  120. function tag(e){
  121. return (($(e)[0]||{}).nodeName||'').toLowerCase();
  122. }
  123. function joint(e, d){
  124. d = (d? 'next' : 'previous') + 'Sibling';
  125. return $(($(e)[0]||{})[d]);
  126. }
  127. var xssattr = /[^a-z:]/ig, xssjs = /javascript:/ig;
  128. // url("javascript: // and all permutations
  129. // stylesheets can apparently have XSS?
  130. // create key val attributes object from elements attributes
  131. function attrsAsObj(e, filterCb){
  132. var attrObj = {};
  133. (e = $(e)) && e.length && $(e[0].attributes||[]).each(function(value,name){
  134. name = name.nodeName||name.name;
  135. value = e.attr(name);
  136. if(value.replace(xssattr,'').match(xssjs)){ e.removeAttr(name); return }
  137. value = filterCb? filterCb(value,name,e) : value;
  138. if(value !== undefined && value !== false)
  139. attrObj[name] = value;
  140. });
  141. return attrObj;
  142. }
  143. // TODO: PERF testing - for loop to compare through?
  144. function sameAttrs(a, b) {
  145. return JSON.stringify(a.attr) === JSON.stringify(b.attr);
  146. }
  147. // INITIAL MUTATORS
  148. function parseAndRemoveAttrs(a) {
  149. a.attrs = [];
  150. var tag = a.opt.convert[a.tag] || a.tag,
  151. tOpt = a.opt.tags[tag];
  152. a.attr = tOpt && attrsAsObj(a.$, function(value,name){
  153. a.$.removeAttr(name);
  154. if(tOpt.attrs[name.toLowerCase()]){
  155. a.attrs.push(name)
  156. return value;
  157. }
  158. });
  159. }
  160. function setAttrs(a){
  161. var l = function(ind,name){
  162. var t = name;
  163. name = a.attrs? name : ind;
  164. var value = a.attrs? a.attr[name.toLowerCase()] : t;
  165. a.$.attr(name, value);
  166. }
  167. a.attrs? $(a.attrs.sort()).each(l) : $.each(a.attr,l);
  168. }
  169. function convert(a){
  170. var t;
  171. if(t = a.opt.convert[a.tag]){
  172. a.$.replaceWith(a.$ = $('<'+ (a.tag = t.toLowerCase()) +'>').append(a.$.contents()));
  173. }
  174. }
  175. // LOOPING (STATE MACHINE) MUTATORS
  176. function exclude(a, addUnstable){
  177. var t = get(a.opt, ['tags', a.tag]),
  178. pt = get(a.opt, ['tags', tag(a.$.parent())]);
  179. if(!t || (pt && get(pt, ['exclude', a.tag]))){
  180. var c = a.$.contents();
  181. a.$.replaceWith(c);
  182. c.length===1 && c[0].a && addUnstable(c[0].a);
  183. return false;
  184. }
  185. }
  186. function moveSpaceUp(a, addUnstable){
  187. var n = a.$[0];
  188. if(moveSpace(n, true) + moveSpace(n, false)) {
  189. // either front, back or both spaces moved
  190. var c;
  191. if(n.textContent==='') {
  192. empty(a);
  193. } else if((c = a.$.contents()[0]) && c.a) {
  194. parentOrderWrap(c.a, addUnstable)
  195. }
  196. }
  197. }
  198. function moveSpace(n, bef) {
  199. var childRe = bef? /^ / : / $/,
  200. parentRe = bef? / $/ : /^ /,
  201. c = bef? 'firstChild' : 'lastChild',
  202. s = bef? 'previousSibling' : 'nextSibling';
  203. sAdd = bef? 'after' : 'before';
  204. pAdd = bef? 'prepend' : 'append';
  205. if(!n || !n[c] || n[c].nodeType !== n.TEXT_NODE || !n[c].wholeText.match(childRe)) {
  206. return 0;
  207. }
  208. if((n2 = n[s]) && !n.a.opt.blockTag(n.a)) {
  209. if(n2.nodeType === 3 && !n2.textContent.match(parentRe)) {
  210. n2.textContent = (bef?'':' ') + n2.textContent + (bef?' ':'');
  211. } else if(n2.nodeType === 1) {
  212. $(n2)[sAdd](' ');
  213. }
  214. } else if((n2 = n.parentNode) && !n.a.opt.blockTag(n.a)) {
  215. $(n2)[pAdd](' ');
  216. } else {
  217. return 0;
  218. }
  219. n[c].textContent = n[c].wholeText.replace(childRe, '');
  220. if(!n[c].wholeText.length)
  221. $(n[c]).remove();
  222. return 1;
  223. }
  224. function next(a, addUnstable, t){
  225. var t = t || joint(a.$, true), sm;
  226. if(!t.length || a.opt.blockTag(a))
  227. return;
  228. if(a.opt.spaceMerge && t.length===1 && t[0].nodeType === 3 && t[0].wholeText===' '){
  229. if(!(t2 = joint(t, true)).length || a.opt.blockTag(t2[0].a))
  230. return;
  231. t.remove();
  232. t2.prepend(' ');
  233. return next(a, addUnstable, t2);
  234. }
  235. if(!t[0].a || a.tag !== t[0].a.tag || !sameAttrs(a, t[0].a))
  236. return;
  237. t.prepend(a.$.contents());
  238. empty(a);
  239. addUnstable(t[0].a);
  240. (t = t.children(":first")).length && addUnstable(t[0].a);
  241. }
  242. function empty(a){
  243. var t = a.opt.tags[a.tag];
  244. if((!t || !t.empty) && !a.$.contents().length && !a.$[0].attributes.length){
  245. a.$.remove();
  246. return true; // NOTE true/false - different API than in exclude
  247. }
  248. }
  249. function parentOrderWrap(a, addUnstable){
  250. var parent = a.$.parent(), children = parent.contents(),
  251. tags = a.opt.tags, ptag;
  252. if(children.length===1 && children[0] === a.$[0]
  253. && (ptag=tags[tag(parent)]) && ptag.order > tags[a.tag].order){
  254. parent.after(a.$);
  255. parent.append(a.$.contents());
  256. a.$.append(parent);
  257. addUnstable(parent[0].a);
  258. addUnstable(a);
  259. }
  260. }
  261. })();