123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291 |
- (function(){
- $.normalize = function(html, customOpt){
- html = html || '';
- var root$, wrapped, opt;
- opt = html.opt || (customOpt ? prepareOptTags($.extend(true, baseOpt, customOpt))
- : defaultOpt);
- if(!html.opt){
- // first call
- unstableList.length = 0; // drop state from previous run (in case there has been error)
- root$ = $('<div>'+html+'</div>');
- }
- // initial recursion
- (html.$ || root$).contents().each(function(){
- if(this.nodeType === this.TEXT_NODE) {
- this.textContent = this.textContent.replace(/^[ \n]+|[ \n]+$/g, ' ');
- return;
- }
- var a = {$: $(this), opt: opt};
- initTag(a);
- $.normalize(a);
- });
- if(root$){
- stateMachine();
- return root$.html();
- }
- }
- var baseOpt = {
- hierarchy: ['div', 'pre', 'ol', 'ul', 'li',
- 'h1', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'a', // block
- 'b', 'code', 'i', 'span', 's', 'sub', 'sup', 'u', // inline
- 'br', 'img'] // empty
- ,tags: {
- 'a': {attrs:{'href':1}, exclude:{'a':1}},
- 'b': {exclude:{'b':1,'p':1}},
- 'br': {empty: 1},
- 'i': {exclude:{'i':1,'p':1}},
- 'img': {attrs:{'src':1}, empty: 1},
- 'span': {exclude:{'p':1,'ul':1,'ol':1,'li':1,'br':1}},
- 's': {space:1},
- 'u': {exclude:{'u':1,'p':1},space:1},
- }
- ,convert: {
- 'em': 'i', 'strong': 'b', 'strike': 's',
- }
- ,attrs: {
- 'id':1
- ,'class':1
- ,'style':1
- }
- ,blockTag: function(a){
- return a.opt.tags[a.tag].order < a.opt.tags.a.order;
- }
- ,mutate: [exclude, moveSpaceUp, next, parentOrderWrap]
- }
- var defaultOpt = prepareOptTags($.extend(true, {}, baseOpt));
- var unstableList = [];
- function addUnstable(a) { // NOT ES5
- if(!a.tag) { throw Error("not tag in ", a) }
- if(a.unstable) return;
- unstableList.push(a);
- a.unstable = true;
- }
- function initTag(a) {
- // initial handling (container, convert, attributes):
- a.tag = tag(a.$);
- if(empty(a)) {
- return;
- }
- parseAndRemoveAttrs(a);
- convert(a);
- setAttrs(a);
- a.$[0].a = a; // link from dom element back to a
- // state machine init
- unstableList.push(a);
- a.unstable = true;
- return a;
- }
- function stateMachine() {
- if(unstableList.length===0)
- return;
- var a, i = -1;
- while (a = unstableList.pop()) { // PERF: running index is probably faster than shift (mutates array)
- a.unstable = false;
- $(a.opt.mutate).each(function(i,fn){
- return fn && fn(a, addUnstable);
- });
- }
- }
- function prepareOptTags(opt) {
- var name, tag, tags = opt.tags;
- for(name in tags) {
- if(opt.hierarchy.indexOf(name)===-1)
- throw Error('tag "'+name+'" is missing hierachy definition');
- }
- opt.hierarchy.forEach(function(name){
- if(!tags[name]){
- tags[name] = {attrs: opt.attrs};
- }
- (tag=tags[name]).attrs = $.extend(tag.attrs||{}, opt.attrs);
- tag.name = name; // not used, debug help (REMOVE later?)
- // order
- tag.order = opt.hierarchy.indexOf(name)
- if(tag.order === -1) {
- throw Error("Order of '"+name+"' not defined in hierarchy");
- }
- });
- return opt;
- }
- // GENERAL UTILS
- function get(o, args){ // path argments as separate string parameters
- if(typeof args === 'string')
- return o[args[0]];
- var i = 0, l = args.length, u;
- while((o = o[args[i++]]) != null && i < l){};
- return i < l ? u : o;
- }
- function has(obj,prop){
- return Object.prototype.hasOwnProperty.call(obj, prop);
- }
- // ELEMENT UTILS
- function tag(e){
- return (($(e)[0]||{}).nodeName||'').toLowerCase();
- }
- function joint(e, d){
- d = (d? 'next' : 'previous') + 'Sibling';
- return $(($(e)[0]||{})[d]);
- }
- var xssattr = /[^a-z:]/ig, xssjs = /javascript:/ig;
- // url("javascript: // and all permutations
- // stylesheets can apparently have XSS?
- // create key val attributes object from elements attributes
- function attrsAsObj(e, filterCb){
- var attrObj = {};
- (e = $(e)) && e.length && $(e[0].attributes||[]).each(function(value,name){
- name = name.nodeName||name.name;
- value = e.attr(name);
- if(value.replace(xssattr,'').match(xssjs)){ e.removeAttr(name); return }
- value = filterCb? filterCb(value,name,e) : value;
- if(value !== undefined && value !== false)
- attrObj[name] = value;
- });
- return attrObj;
- }
- // TODO: PERF testing - for loop to compare through?
- function sameAttrs(a, b) {
- return JSON.stringify(a.attr) === JSON.stringify(b.attr);
- }
- // INITIAL MUTATORS
- function parseAndRemoveAttrs(a) {
- a.attrs = [];
- var tag = a.opt.convert[a.tag] || a.tag,
- tOpt = a.opt.tags[tag];
- a.attr = tOpt && attrsAsObj(a.$, function(value,name){
- a.$.removeAttr(name);
- if(tOpt.attrs[name.toLowerCase()]){
- a.attrs.push(name)
- return value;
- }
- });
- }
- function setAttrs(a){
- var l = function(ind,name){
- var t = name;
- name = a.attrs? name : ind;
- var value = a.attrs? a.attr[name.toLowerCase()] : t;
- a.$.attr(name, value);
- }
- a.attrs? $(a.attrs.sort()).each(l) : $.each(a.attr,l);
- }
- function convert(a){
- var t;
- if(t = a.opt.convert[a.tag]){
- a.$.replaceWith(a.$ = $('<'+ (a.tag = t.toLowerCase()) +'>').append(a.$.contents()));
- }
- }
- // LOOPING (STATE MACHINE) MUTATORS
- function exclude(a, addUnstable){
- var t = get(a.opt, ['tags', a.tag]),
- pt = get(a.opt, ['tags', tag(a.$.parent())]);
- if(!t || (pt && get(pt, ['exclude', a.tag]))){
- var c = a.$.contents();
- a.$.replaceWith(c);
- c.length===1 && c[0].a && addUnstable(c[0].a);
- return false;
- }
- }
- function moveSpaceUp(a, addUnstable){
- var n = a.$[0];
- if(moveSpace(n, true) + moveSpace(n, false)) {
- // either front, back or both spaces moved
- var c;
- if(n.textContent==='') {
- empty(a);
- } else if((c = a.$.contents()[0]) && c.a) {
- parentOrderWrap(c.a, addUnstable)
- }
- }
- }
- function moveSpace(n, bef) {
- var childRe = bef? /^ / : / $/,
- parentRe = bef? / $/ : /^ /,
- c = bef? 'firstChild' : 'lastChild',
- s = bef? 'previousSibling' : 'nextSibling';
- sAdd = bef? 'after' : 'before';
- pAdd = bef? 'prepend' : 'append';
- if(!n || !n[c] || n[c].nodeType !== n.TEXT_NODE || !n[c].wholeText.match(childRe)) {
- return 0;
- }
- if((n2 = n[s]) && !n.a.opt.blockTag(n.a)) {
- if(n2.nodeType === 3 && !n2.textContent.match(parentRe)) {
- n2.textContent = (bef?'':' ') + n2.textContent + (bef?' ':'');
- } else if(n2.nodeType === 1) {
- $(n2)[sAdd](' ');
- }
- } else if((n2 = n.parentNode) && !n.a.opt.blockTag(n.a)) {
- $(n2)[pAdd](' ');
- } else {
- return 0;
- }
- n[c].textContent = n[c].wholeText.replace(childRe, '');
- if(!n[c].wholeText.length)
- $(n[c]).remove();
- return 1;
- }
- function next(a, addUnstable, t){
- var t = t || joint(a.$, true), sm;
- if(!t.length || a.opt.blockTag(a))
- return;
- if(a.opt.spaceMerge && t.length===1 && t[0].nodeType === 3 && t[0].wholeText===' '){
- if(!(t2 = joint(t, true)).length || a.opt.blockTag(t2[0].a))
- return;
- t.remove();
- t2.prepend(' ');
- return next(a, addUnstable, t2);
- }
- if(!t[0].a || a.tag !== t[0].a.tag || !sameAttrs(a, t[0].a))
- return;
- t.prepend(a.$.contents());
- empty(a);
- addUnstable(t[0].a);
- (t = t.children(":first")).length && addUnstable(t[0].a);
- }
- function empty(a){
- var t = a.opt.tags[a.tag];
- if((!t || !t.empty) && !a.$.contents().length && !a.$[0].attributes.length){
- a.$.remove();
- return true; // NOTE true/false - different API than in exclude
- }
- }
- function parentOrderWrap(a, addUnstable){
- var parent = a.$.parent(), children = parent.contents(),
- tags = a.opt.tags, ptag;
- if(children.length===1 && children[0] === a.$[0]
- && (ptag=tags[tag(parent)]) && ptag.order > tags[a.tag].order){
- parent.after(a.$);
- parent.append(a.$.contents());
- a.$.append(parent);
- addUnstable(parent[0].a);
- addUnstable(a);
- }
- }
- })();
|