@@ -1,138 +1,284 @@
- function normalize(opt){
- var el = $(this);
- opt = opt || $.extend(true, normalize.opt, opt||{});
- el.children().each(function(){
- var a = {$: $(this), opt: opt};
- a.tag = normalize.tag(a.$);
- $(a.opt.mutate).each(function(i,fn){
- fn && fn(a);
- });
- })
- return el;
- };
- var n = normalize, u;
- n.get = function(o, p){
- p = p.split('.');
- var i = 0, l = p.length, u;
- while((o = o[p[i++]]) != null && i < l){};
- return i < l ? u : o;
- }
- n.has = function(o,p){
- return Object.prototype.hasOwnProperty.call(o, p);
- }
- n.tag = function(e){
- return (($(e)[0]||{}).nodeName||'').toLowerCase();
- }
- n.attrs = function(e, cb){
- var attr = {};
- (e = $(e)) && e.length && $(e[0].attributes||[]).each(function(v,n){
- n = n.nodeName||n.name;
- v = e.attr(n);
- v = cb? cb(v,n,e) : v;
- if(v !== u && v !== false){ attr[n] = v }
- });
- return attr;
- }
- n.joint = function(e, d){
- d = (d? 'next' : 'previous') + 'Sibling'
- return $(($(e)[0]||{})[d]);
- }
- var h = {
- attr: function(a$, av, al){
- var l = function(i,v){
- var t = v;
- i = al? v : i;
- v = al? av[v.toLowerCase()] : t;
- a$.attr(i, v);
- }
- al? $(al.sort()).each(l) : $.each(av,l);
- }
- }
- n.opt = { // some reasonable defaults, limited to content alone.
- tags: {
- 'a': {attrs:{'src':1}, exclude:{'a':1}},
- 'b': {exclude:{'b':1}},
- //'blockquote':1,
- 'br': {empty: 1},
- 'div': 1,
- //'code':1,
- 'i': {exclude:{'i':1}},
- 'img': {attrs:{'src':1}, empty: 1},
- 'li':1, 'ol':1,
- 'p': {exclude:{'p':1,'div':1}},
- //'pre':1,
- 's': {exclude:{'s':1}},
- 'sub':1, 'sup':1,
- 'span': {exclude:{'p':1,'ul':1,'ol':1,'li':1,'br':1}},
- 'u': {exclude:{'u':1,'p':1}},
- 'ul':1
- }
- // a, audio, b, br, div, i, img, li, ol, p, s, span, sub, sup, u, ul, video
- // button, canvas, embed, form, iframe, input, style, svg, table,
- // Text: bold, italics, underline, align, bullet, list,
- ,convert: {
- 'em': 'i', 'strong': 'b'
- }
- ,attrs: {
- 'id':1
- ,'class':1
- ,'style':1
- }
- ,mutate: [
- function(a){ // attr
- a.attrs = [];
- a.attr = $.extend(a.opt.attrs, n.get(a.opt,'tags.'+ a.tag +'attrs'));
- a.attr = n.attrs(a.$, function(v,i){
- a.$.removeAttr(i);
- if(a.attr[i.toLowerCase()]){
- a.attrs.push(i)
- return v;
- }
- });
- // if this tag is gonna get converted, wait to add attr back till after the convert
- if(a.attrs && !n.get(a.opt, 'convert.' + a.tag)){
- h.attr(a.$, a.attr, a.attrs);
- }
- }
- ,function(a, tmp){ // convert
- if(!(tmp = n.get(a.opt,'convert.' + a.tag))){ return }
- a.attr = a.attr || n.attrs(a.$);
- a.$.replaceWith(a.$ = $('<'+ (a.tag = tmp.toLowerCase()) +'>').append(a.$.contents()));
- h.attr(a.$, a.attr, a.attrs);
- }
- ,function(a, tmp){ // lookahead
- if((tmp = n.joint(a.$,1)) && (tmp = tmp.contents()).length === 1 && a.tag === n.tag(tmp = tmp.first())){
- a.$.append(tmp.parent()); // no need to unwrap the child, since the recursion will do it for us
- }
- }
- ,function(a){ // recurse
- // this needs to precede the exclusion and empty.
- normalize(a);
- }
- ,function(a, tmp){ // exclude
- if(!n.get(a.opt,'tags.' + a.tag)
- || ((tmp = n.get(a.opt,'tags.'+ a.tag +'.exclude'))
- && a.$.parents($.map(tmp,function(i,v){return v})+' ').length)
- ){
- a.$.replaceWith(a.$.contents());
- }
- }
- ,function(a, tmp){ // prior
- if((tmp = n.joint(a.$)).length && a.tag === n.tag(tmp)){
- tmp.append(a.$.contents());
- }
- }
- ,function(a){ // empty
- // should always go last, since the element will be removed!
- if(a.opt.empty || !n.has(a.opt,'empty')){
- if(!n.get(a.opt,'tags.'+ a.tag +'.empty')
- && !a.$.contents().length){
- a.$.remove();
- }
- }
- }
- ]
- }
- $.fn.normalize = normalize;
+ $.normalize = function(html, customOpt){
+ var html, root$, wrapped, opt;
+ opt = html.opt || (customOpt ? prepareOptTags($.extend(true, baseOpt, customOpt))
+ : defaultOpt);
+ if(!html.opt){
+ // first call
+ unstableList.length = 0; // drop state from previous run (in case there has been error)
+ root$ = $('<div>'+html+'</div>');
+ }
+ // initial recursion
+ (html.$ || root$).contents().each(function(){
+ if(this.nodeType === this.TEXT_NODE) {
+ this.textContent = this.textContent.replace(/^[ \n]+|[ \n]+$/g, ' ');
+ return;
+ }
+ var a = {$: $(this), opt: opt};
+ initTag(a);
+ $.normalize(a);
+ });
+ if(root$){
+ stateMachine();
+ return root$.html();
+ }
+ }
+ var baseOpt = {
+ hierarchy: ['div', 'pre', 'ol', 'ul', 'li',
+ 'h1', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'a', // block
+ 'b', 'code', 'i', 'span', 's', 'sub', 'sup', 'u', // inline
+ 'br'] // empty
+ ,tags: {
+ 'a': {attrs:{'href':1}, exclude:{'a':1}},
+ 'b': {exclude:{'b':1,'p':1}},
+ 'br': {empty: 1},
+ 'i': {exclude:{'i':1,'p':1}},
+ 'span': {exclude:{'p':1,'ul':1,'ol':1,'li':1,'br':1}},
+ 's': {space:1},
+ 'u': {exclude:{'u':1,'p':1},space:1},
+ }
+ ,convert: {
+ 'em': 'i', 'strong': 'b', 'strike': 's',
+ }
+ ,attrs: {
+ 'id':1
+ ,'class':1
+ ,'style':1
+ }
+ ,blockTag: function(a){
+ return a.opt.tags[a.tag].order < a.opt.tags.a.order;
+ }
+ ,mutate: [exclude, moveSpaceUp, next, parentOrderWrap]
+ }
+ var defaultOpt = prepareOptTags($.extend(true, {}, baseOpt));
+ var unstableList = [];
+ function addUnstable(a) { // NOT ES5
+ if(!a.tag) { throw Error("not tag in ", a) }
+ if(a.unstable) return;
+ unstableList.push(a);
+ a.unstable = true;
+ }
+ function initTag(a) {
+ // initial handling (container, convert, attributes):
+ a.tag = tag(a.$);
+ if(empty(a)) {
+ return;
+ }
+ parseAndRemoveAttrs(a);
+ convert(a);
+ setAttrs(a);
+ a.$[0].a = a; // link from dom element back to a
+ // state machine init
+ unstableList.push(a);
+ a.unstable = true;
+ return a;
+ }
+ function stateMachine() {
+ if(unstableList.length===0)
+ return;
+ var a, i = -1;
+ while (a = unstableList.pop()) { // PERF: running index is probably faster than shift (mutates array)
+ a.unstable = false;
+ $(a.opt.mutate).each(function(i,fn){
+ return fn && fn(a, addUnstable);
+ });
+ }
+ }
+ function prepareOptTags(opt) {
+ var name, tag, tags = opt.tags;
+ for(name in tags) {
+ if(opt.hierarchy.indexOf(name)===-1)
+ throw Error('tag "'+name+'" is missing hierachy definition');
+ }
+ opt.hierarchy.forEach(function(name){
+ if(!tags[name]){
+ tags[name] = {attrs: opt.attrs};
+ }
+ (tag=tags[name]).attrs = $.extend(tag.attrs||{}, opt.attrs);
+ tag.name = name; // not used, debug help (REMOVE later?)
+ // order
+ tag.order = opt.hierarchy.indexOf(name)
+ if(tag.order === -1) {
+ throw Error("Order of '"+name+"' not defined in hierarchy");
+ }
+ });
+ return opt;
+ }
+ function get(o, args){ // path argments as separate string parameters
+ if(typeof args === 'string')
+ return o[args[0]];
+ var i = 0, l = args.length, u;
+ while((o = o[args[i++]]) != null && i < l){};
+ return i < l ? u : o;
+ }
+ function has(obj,prop){
+ return Object.prototype.hasOwnProperty.call(obj, prop);
+ }
+ function tag(e){
+ return (($(e)[0]||{}).nodeName||'').toLowerCase();
+ }
+ function joint(e, d){
+ d = (d? 'next' : 'previous') + 'Sibling';
+ return $(($(e)[0]||{})[d]);
+ }
+ // create key val attributes object from elements attributes
+ function attrsAsObj(e, filterCb){
+ var attrObj = {};
+ (e = $(e)) && e.length && $(e[0].attributes||[]).each(function(value,name){
+ name = name.nodeName||name.name;
+ value = e.attr(name);
+ value = filterCb? filterCb(value,name,e) : value;
+ if(value !== undefined && value !== false)
+ attrObj[name] = value;
+ });
+ return attrObj;
+ }
+ // TODO: PERF testing - for loop to compare through?
+ function sameAttrs(a, b) {
+ return JSON.stringify(a.attr) === JSON.stringify(b.attr);
+ }
+ function parseAndRemoveAttrs(a) {
+ a.attrs = [];
+ var tag = a.opt.convert[a.tag] || a.tag,
+ tOpt = a.opt.tags[tag];
+ a.attr = tOpt && attrsAsObj(a.$, function(value,name){
+ a.$.removeAttr(name);
+ if(tOpt.attrs[name.toLowerCase()]){
+ a.attrs.push(name)
+ return value;
+ }
+ });
+ }
+ function setAttrs(a){
+ var l = function(ind,name){
+ var t = name;
+ name = a.attrs? name : ind;
+ var value = a.attrs? a.attr[name.toLowerCase()] : t;
+ a.$.attr(name, value);
+ }
+ a.attrs? $(a.attrs.sort()).each(l) : $.each(a.attr,l);
+ }
+ function convert(a){
+ var t;
+ if(t = a.opt.convert[a.tag]){
+ a.$.replaceWith(a.$ = $('<'+ (a.tag = t.toLowerCase()) +'>').append(a.$.contents()));
+ }
+ }
+ function exclude(a, addUnstable){
+ var t = get(a.opt, ['tags', a.tag]),
+ pt = get(a.opt, ['tags', tag(a.$.parent())]);
+ if(!t || (pt && get(pt, ['exclude', a.tag]))){
+ var c = a.$.contents();
+ a.$.replaceWith(c);
+ c.length===1 && c[0].a && addUnstable(c[0].a);
+ return false;
+ }
+ }
+ function moveSpaceUp(a, addUnstable){
+ var n = a.$[0];
+ if(moveSpace(n, true) + moveSpace(n, false)) {
+ // either front, back or both spaces moved
+ var c;
+ if(n.textContent==='') {
+ empty(a);
+ } else if((c = a.$.contents()[0]) && c.a) {
+ parentOrderWrap(c.a, addUnstable)
+ }
+ }
+ }
+ function moveSpace(n, bef) {
+ var childRe = bef? /^ / : / $/,
+ parentRe = bef? / $/ : /^ /,
+ c = bef? 'firstChild' : 'lastChild',
+ s = bef? 'previousSibling' : 'nextSibling';
+ sAdd = bef? 'after' : 'before';
+ pAdd = bef? 'prepend' : 'append';
+ if(!n || !n[c] || n[c].nodeType !== n.TEXT_NODE || !n[c].wholeText.match(childRe)) {
+ return 0;
+ }
+ if((n2 = n[s]) && !n.a.opt.blockTag(n.a)) {
+ if(n2.nodeType === 3 && !n2.textContent.match(parentRe)) {
+ n2.textContent = (bef?'':' ') + n2.textContent + (bef?' ':'');
+ } else if(n2.nodeType === 1) {
+ $(n2)[sAdd](' ');
+ }
+ } else if((n2 = n.parentNode) && !n.a.opt.blockTag(n.a)) {
+ $(n2)[pAdd](' ');
+ } else {
+ return 0;
+ }
+ n[c].textContent = n[c].wholeText.replace(childRe, '');
+ if(!n[c].wholeText.length)
+ $(n[c]).remove();
+ return 1;
+ }
+ function next(a, addUnstable, t){
+ var t = t || joint(a.$, true), sm;
+ if(!t.length || a.opt.blockTag(a))
+ return;
+ if(a.opt.spaceMerge && t.length===1 && t[0].nodeType === 3 && t[0].wholeText===' '){
+ if(!(t2 = joint(t, true)).length || a.opt.blockTag(t2[0].a))
+ return;
+ t.remove();
+ t2.prepend(' ');
+ return next(a, addUnstable, t2);
+ }
+ if(!t[0].a || a.tag !== t[0].a.tag || !sameAttrs(a, t[0].a))
+ return;
+ t.prepend(a.$.contents());
+ empty(a);
+ addUnstable(t[0].a);
+ (t = t.children(":first")).length && addUnstable(t[0].a);
+ }
+ function empty(a){
+ var t = a.opt.tags[a.tag];
+ if((!t || !t.empty) && !a.$.contents().length && !a.$[0].attributes.length){
+ a.$.remove();
+ return true; // NOTE true/false - different API than in exclude
+ }
+ }
+ function parentOrderWrap(a, addUnstable){
+ var parent = a.$.parent(), children = parent.contents(),
+ tags = a.opt.tags, ptag;
+ if(children.length===1 && children[0] === a.$[0]
+ && (ptag=tags[tag(parent)]) && ptag.order > tags[a.tag].order){
+ parent.after(a.$);
+ parent.append(a.$.contents());
+ a.$.append(parent);
+ addUnstable(parent[0].a);
+ addUnstable(a);
+ }
+ }