normalize.js 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. (function(){
  2. $.normalize = function(html, customOpt){
  3. html = html || '';
  4. var root$, wrapped, opt;
  5. opt = html.opt || (customOpt ? prepareOptTags($.extend(true, baseOpt, customOpt))
  6. : defaultOpt);
  7. if(!html.opt){
  8. // first call
  9. unstableList.length = 0; // drop state from previous run (in case there has been error)
  10. root$ = $('<div>'+html+'</div>');
  11. }
  12. // initial recursion
  13. (html.$ || root$).contents().each(function(){
  14. if(this.nodeType === this.TEXT_NODE) {
  15. this.textContent = this.textContent.replace(/^[ \n]+|[ \n]+$/g, ' ');
  16. return;
  17. }
  18. var a = {$: $(this), opt: opt};
  19. initTag(a);
  20. $.normalize(a);
  21. });
  22. if(root$){
  23. stateMachine();
  24. return root$.html();
  25. }
  26. }
  27. var baseOpt = {
  28. hierarchy: ['div', 'pre', 'ol', 'ul', 'li',
  29. 'h1', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'a', // block
  30. 'b', 'code', 'i', 'span', 's', 'sub', 'sup', 'u', // inline
  31. 'br', 'img'] // empty
  32. ,tags: {
  33. 'a': {attrs:{'href':1}, exclude:{'a':1}},
  34. 'b': {exclude:{'b':1,'p':1}},
  35. 'br': {empty: 1},
  36. 'i': {exclude:{'i':1,'p':1}},
  37. 'img': {attrs:{'src':1}, empty: 1},
  38. 'span': {exclude:{'p':1,'ul':1,'ol':1,'li':1,'br':1}},
  39. 's': {space:1},
  40. 'u': {exclude:{'u':1,'p':1},space:1},
  41. }
  42. ,convert: {
  43. 'em': 'i', 'strong': 'b', 'strike': 's',
  44. }
  45. ,attrs: {
  46. 'id':1
  47. ,'class':1
  48. ,'style':1
  49. }
  50. ,blockTag: function(a){
  51. return a.opt.tags[a.tag].order < a.opt.tags.a.order;
  52. }
  53. ,mutate: [exclude, moveSpaceUp, next, parentOrderWrap]
  54. }
  55. var defaultOpt = prepareOptTags($.extend(true, {}, baseOpt));
  56. var unstableList = [];
  57. function addUnstable(a) { // NOT ES5
  58. if(!a.tag) { throw Error("not tag in ", a) }
  59. if(a.unstable) return;
  60. unstableList.push(a);
  61. a.unstable = true;
  62. }
  63. function initTag(a) {
  64. // initial handling (container, convert, attributes):
  65. a.tag = tag(a.$);
  66. if(empty(a)) {
  67. return;
  68. }
  69. parseAndRemoveAttrs(a);
  70. convert(a);
  71. setAttrs(a);
  72. a.$[0].a = a; // link from dom element back to a
  73. // state machine init
  74. unstableList.push(a);
  75. a.unstable = true;
  76. return a;
  77. }
  78. function stateMachine() {
  79. if(unstableList.length===0)
  80. return;
  81. var a, i = -1;
  82. while (a = unstableList.pop()) { // PERF: running index is probably faster than shift (mutates array)
  83. a.unstable = false;
  84. $(a.opt.mutate).each(function(i,fn){
  85. return fn && fn(a, addUnstable);
  86. });
  87. }
  88. }
  89. function prepareOptTags(opt) {
  90. var name, tag, tags = opt.tags;
  91. for(name in tags) {
  92. if(opt.hierarchy.indexOf(name)===-1)
  93. throw Error('tag "'+name+'" is missing hierachy definition');
  94. }
  95. opt.hierarchy.forEach(function(name){
  96. if(!tags[name]){
  97. tags[name] = {attrs: opt.attrs};
  98. }
  99. (tag=tags[name]).attrs = $.extend(tag.attrs||{}, opt.attrs);
  100. tag.name = name; // not used, debug help (REMOVE later?)
  101. // order
  102. tag.order = opt.hierarchy.indexOf(name)
  103. if(tag.order === -1) {
  104. throw Error("Order of '"+name+"' not defined in hierarchy");
  105. }
  106. });
  107. return opt;
  108. }
  109. // GENERAL UTILS
  110. function get(o, args){ // path argments as separate string parameters
  111. if(typeof args === 'string')
  112. return o[args[0]];
  113. var i = 0, l = args.length, u;
  114. while((o = o[args[i++]]) != null && i < l){};
  115. return i < l ? u : o;
  116. }
  117. function has(obj,prop){
  118. return Object.prototype.hasOwnProperty.call(obj, prop);
  119. }
  120. // ELEMENT UTILS
  121. function tag(e){
  122. return (($(e)[0]||{}).nodeName||'').toLowerCase();
  123. }
  124. function joint(e, d){
  125. d = (d? 'next' : 'previous') + 'Sibling';
  126. return $(($(e)[0]||{})[d]);
  127. }
  128. var xssattr = /[^a-z:]/ig, xssjs = /javascript:/ig;
  129. // url("javascript: // and all permutations
  130. // stylesheets can apparently have XSS?
  131. // create key val attributes object from elements attributes
  132. function attrsAsObj(e, filterCb){
  133. var attrObj = {};
  134. (e = $(e)) && e.length && $(e[0].attributes||[]).each(function(value,name){
  135. name = name.nodeName||name.name;
  136. value = e.attr(name);
  137. if(value.replace(xssattr,'').match(xssjs)){ e.removeAttr(name); return }
  138. value = filterCb? filterCb(value,name,e) : value;
  139. if(value !== undefined && value !== false)
  140. attrObj[name] = value;
  141. });
  142. return attrObj;
  143. }
  144. // TODO: PERF testing - for loop to compare through?
  145. function sameAttrs(a, b) {
  146. return JSON.stringify(a.attr) === JSON.stringify(b.attr);
  147. }
  148. // INITIAL MUTATORS
  149. function parseAndRemoveAttrs(a) {
  150. a.attrs = [];
  151. var tag = a.opt.convert[a.tag] || a.tag,
  152. tOpt = a.opt.tags[tag];
  153. a.attr = tOpt && attrsAsObj(a.$, function(value,name){
  154. a.$.removeAttr(name);
  155. if(tOpt.attrs[name.toLowerCase()]){
  156. a.attrs.push(name)
  157. return value;
  158. }
  159. });
  160. }
  161. function setAttrs(a){
  162. var l = function(ind,name){
  163. var t = name;
  164. name = a.attrs? name : ind;
  165. var value = a.attrs? a.attr[name.toLowerCase()] : t;
  166. a.$.attr(name, value);
  167. }
  168. a.attrs? $(a.attrs.sort()).each(l) : $.each(a.attr,l);
  169. }
  170. function convert(a){
  171. var t;
  172. if(t = a.opt.convert[a.tag]){
  173. a.$.replaceWith(a.$ = $('<'+ (a.tag = t.toLowerCase()) +'>').append(a.$.contents()));
  174. }
  175. }
  176. // LOOPING (STATE MACHINE) MUTATORS
  177. function exclude(a, addUnstable){
  178. var t = get(a.opt, ['tags', a.tag]),
  179. pt = get(a.opt, ['tags', tag(a.$.parent())]);
  180. if(!t || (pt && get(pt, ['exclude', a.tag]))){
  181. var c = a.$.contents();
  182. a.$.replaceWith(c);
  183. c.length===1 && c[0].a && addUnstable(c[0].a);
  184. return false;
  185. }
  186. }
  187. function moveSpaceUp(a, addUnstable){
  188. var n = a.$[0];
  189. if(moveSpace(n, true) + moveSpace(n, false)) {
  190. // either front, back or both spaces moved
  191. var c;
  192. if(n.textContent==='') {
  193. empty(a);
  194. } else if((c = a.$.contents()[0]) && c.a) {
  195. parentOrderWrap(c.a, addUnstable)
  196. }
  197. }
  198. }
  199. function moveSpace(n, bef) {
  200. var childRe = bef? /^ / : / $/,
  201. parentRe = bef? / $/ : /^ /,
  202. c = bef? 'firstChild' : 'lastChild',
  203. s = bef? 'previousSibling' : 'nextSibling';
  204. sAdd = bef? 'after' : 'before';
  205. pAdd = bef? 'prepend' : 'append';
  206. if(!n || !n[c] || n[c].nodeType !== n.TEXT_NODE || !n[c].wholeText.match(childRe)) {
  207. return 0;
  208. }
  209. if((n2 = n[s]) && !n.a.opt.blockTag(n.a)) {
  210. if(n2.nodeType === 3 && !n2.textContent.match(parentRe)) {
  211. n2.textContent = (bef?'':' ') + n2.textContent + (bef?' ':'');
  212. } else if(n2.nodeType === 1) {
  213. $(n2)[sAdd](' ');
  214. }
  215. } else if((n2 = n.parentNode) && !n.a.opt.blockTag(n.a)) {
  216. $(n2)[pAdd](' ');
  217. } else {
  218. return 0;
  219. }
  220. n[c].textContent = n[c].wholeText.replace(childRe, '');
  221. if(!n[c].wholeText.length)
  222. $(n[c]).remove();
  223. return 1;
  224. }
  225. function next(a, addUnstable, t){
  226. var t = t || joint(a.$, true), sm;
  227. if(!t.length || a.opt.blockTag(a))
  228. return;
  229. if(a.opt.spaceMerge && t.length===1 && t[0].nodeType === 3 && t[0].wholeText===' '){
  230. if(!(t2 = joint(t, true)).length || a.opt.blockTag(t2[0].a))
  231. return;
  232. t.remove();
  233. t2.prepend(' ');
  234. return next(a, addUnstable, t2);
  235. }
  236. if(!t[0].a || a.tag !== t[0].a.tag || !sameAttrs(a, t[0].a))
  237. return;
  238. t.prepend(a.$.contents());
  239. empty(a);
  240. addUnstable(t[0].a);
  241. (t = t.children(":first")).length && addUnstable(t[0].a);
  242. }
  243. function empty(a){
  244. var t = a.opt.tags[a.tag];
  245. if((!t || !t.empty) && !a.$.contents().length && !a.$[0].attributes.length){
  246. a.$.remove();
  247. return true; // NOTE true/false - different API than in exclude
  248. }
  249. }
  250. function parentOrderWrap(a, addUnstable){
  251. var parent = a.$.parent(), children = parent.contents(),
  252. tags = a.opt.tags, ptag;
  253. if(children.length===1 && children[0] === a.$[0]
  254. && (ptag=tags[tag(parent)]) && ptag.order > tags[a.tag].order){
  255. parent.after(a.$);
  256. parent.append(a.$.contents());
  257. a.$.append(parent);
  258. addUnstable(parent[0].a);
  259. addUnstable(a);
  260. }
  261. }
  262. })();