|
@@ -1,138 +1,284 @@
|
|
|
-;(function(){
|
|
|
- function normalize(opt){
|
|
|
- var el = $(this);
|
|
|
- opt = opt || $.extend(true, normalize.opt, opt||{});
|
|
|
- el.children().each(function(){
|
|
|
- var a = {$: $(this), opt: opt};
|
|
|
- a.tag = normalize.tag(a.$);
|
|
|
- $(a.opt.mutate).each(function(i,fn){
|
|
|
- fn && fn(a);
|
|
|
- });
|
|
|
- })
|
|
|
- return el;
|
|
|
- };
|
|
|
- var n = normalize, u;
|
|
|
- n.get = function(o, p){
|
|
|
- p = p.split('.');
|
|
|
- var i = 0, l = p.length, u;
|
|
|
- while((o = o[p[i++]]) != null && i < l){};
|
|
|
- return i < l ? u : o;
|
|
|
- }
|
|
|
- n.has = function(o,p){
|
|
|
- return Object.prototype.hasOwnProperty.call(o, p);
|
|
|
- }
|
|
|
- n.tag = function(e){
|
|
|
- return (($(e)[0]||{}).nodeName||'').toLowerCase();
|
|
|
- }
|
|
|
- n.attrs = function(e, cb){
|
|
|
- var attr = {};
|
|
|
- (e = $(e)) && e.length && $(e[0].attributes||[]).each(function(v,n){
|
|
|
- n = n.nodeName||n.name;
|
|
|
- v = e.attr(n);
|
|
|
- v = cb? cb(v,n,e) : v;
|
|
|
- if(v !== u && v !== false){ attr[n] = v }
|
|
|
- });
|
|
|
- return attr;
|
|
|
- }
|
|
|
- n.joint = function(e, d){
|
|
|
- d = (d? 'next' : 'previous') + 'Sibling'
|
|
|
- return $(($(e)[0]||{})[d]);
|
|
|
- }
|
|
|
- var h = {
|
|
|
- attr: function(a$, av, al){
|
|
|
- var l = function(i,v){
|
|
|
- var t = v;
|
|
|
- i = al? v : i;
|
|
|
- v = al? av[v.toLowerCase()] : t;
|
|
|
- a$.attr(i, v);
|
|
|
- }
|
|
|
- al? $(al.sort()).each(l) : $.each(av,l);
|
|
|
- }
|
|
|
- }
|
|
|
- n.opt = { // some reasonable defaults, limited to content alone.
|
|
|
- tags: {
|
|
|
- 'a': {attrs:{'src':1}, exclude:{'a':1}},
|
|
|
- 'b': {exclude:{'b':1}},
|
|
|
- //'blockquote':1,
|
|
|
- 'br': {empty: 1},
|
|
|
- 'div': 1,
|
|
|
- //'code':1,
|
|
|
- 'i': {exclude:{'i':1}},
|
|
|
- 'img': {attrs:{'src':1}, empty: 1},
|
|
|
- 'li':1, 'ol':1,
|
|
|
- 'p': {exclude:{'p':1,'div':1}},
|
|
|
- //'pre':1,
|
|
|
- 's': {exclude:{'s':1}},
|
|
|
- 'sub':1, 'sup':1,
|
|
|
- 'span': {exclude:{'p':1,'ul':1,'ol':1,'li':1,'br':1}},
|
|
|
- 'u': {exclude:{'u':1,'p':1}},
|
|
|
- 'ul':1
|
|
|
- }
|
|
|
- // a, audio, b, br, div, i, img, li, ol, p, s, span, sub, sup, u, ul, video
|
|
|
- // button, canvas, embed, form, iframe, input, style, svg, table,
|
|
|
- // Text: bold, italics, underline, align, bullet, list,
|
|
|
- ,convert: {
|
|
|
- 'em': 'i', 'strong': 'b'
|
|
|
- }
|
|
|
- ,attrs: {
|
|
|
- 'id':1
|
|
|
- ,'class':1
|
|
|
- ,'style':1
|
|
|
- }
|
|
|
- ,mutate: [
|
|
|
- function(a){ // attr
|
|
|
- a.attrs = [];
|
|
|
- a.attr = $.extend(a.opt.attrs, n.get(a.opt,'tags.'+ a.tag +'attrs'));
|
|
|
- a.attr = n.attrs(a.$, function(v,i){
|
|
|
- a.$.removeAttr(i);
|
|
|
- if(a.attr[i.toLowerCase()]){
|
|
|
- a.attrs.push(i)
|
|
|
- return v;
|
|
|
- }
|
|
|
- });
|
|
|
- // if this tag is gonna get converted, wait to add attr back till after the convert
|
|
|
- if(a.attrs && !n.get(a.opt, 'convert.' + a.tag)){
|
|
|
- h.attr(a.$, a.attr, a.attrs);
|
|
|
- }
|
|
|
- }
|
|
|
- ,function(a, tmp){ // convert
|
|
|
- if(!(tmp = n.get(a.opt,'convert.' + a.tag))){ return }
|
|
|
- a.attr = a.attr || n.attrs(a.$);
|
|
|
- a.$.replaceWith(a.$ = $('<'+ (a.tag = tmp.toLowerCase()) +'>').append(a.$.contents()));
|
|
|
- h.attr(a.$, a.attr, a.attrs);
|
|
|
- }
|
|
|
- ,function(a, tmp){ // lookahead
|
|
|
- if((tmp = n.joint(a.$,1)) && (tmp = tmp.contents()).length === 1 && a.tag === n.tag(tmp = tmp.first())){
|
|
|
- a.$.append(tmp.parent()); // no need to unwrap the child, since the recursion will do it for us
|
|
|
- }
|
|
|
- }
|
|
|
- ,function(a){ // recurse
|
|
|
- // this needs to precede the exclusion and empty.
|
|
|
- normalize(a);
|
|
|
- }
|
|
|
- ,function(a, tmp){ // exclude
|
|
|
- if(!n.get(a.opt,'tags.' + a.tag)
|
|
|
- || ((tmp = n.get(a.opt,'tags.'+ a.tag +'.exclude'))
|
|
|
- && a.$.parents($.map(tmp,function(i,v){return v})+' ').length)
|
|
|
- ){
|
|
|
- a.$.replaceWith(a.$.contents());
|
|
|
- }
|
|
|
- }
|
|
|
- ,function(a, tmp){ // prior
|
|
|
- if((tmp = n.joint(a.$)).length && a.tag === n.tag(tmp)){
|
|
|
- tmp.append(a.$.contents());
|
|
|
- }
|
|
|
- }
|
|
|
- ,function(a){ // empty
|
|
|
- // should always go last, since the element will be removed!
|
|
|
- if(a.opt.empty || !n.has(a.opt,'empty')){
|
|
|
- if(!n.get(a.opt,'tags.'+ a.tag +'.empty')
|
|
|
- && !a.$.contents().length){
|
|
|
- a.$.remove();
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
- }
|
|
|
- $.fn.normalize = normalize;
|
|
|
-}());
|
|
|
+(function(){
|
|
|
+
|
|
|
+ $.normalize = function(html, customOpt){
|
|
|
+ var html, root$, wrapped, opt;
|
|
|
+ opt = html.opt || (customOpt ? prepareOptTags($.extend(true, baseOpt, customOpt))
|
|
|
+ : defaultOpt);
|
|
|
+ if(!html.opt){
|
|
|
+ // first call
|
|
|
+ unstableList.length = 0; // drop state from previous run (in case there has been error)
|
|
|
+ root$ = $('<div>'+html+'</div>');
|
|
|
+ }
|
|
|
+ // initial recursion
|
|
|
+ (html.$ || root$).contents().each(function(){
|
|
|
+ if(this.nodeType === this.TEXT_NODE) {
|
|
|
+ this.textContent = this.textContent.replace(/^[ \n]+|[ \n]+$/g, ' ');
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ var a = {$: $(this), opt: opt};
|
|
|
+ initTag(a);
|
|
|
+ $.normalize(a);
|
|
|
+ });
|
|
|
+ if(root$){
|
|
|
+ stateMachine();
|
|
|
+ return root$.html();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ var baseOpt = {
|
|
|
+ hierarchy: ['div', 'pre', 'ol', 'ul', 'li',
|
|
|
+ 'h1', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'a', // block
|
|
|
+ 'b', 'code', 'i', 'span', 's', 'sub', 'sup', 'u', // inline
|
|
|
+ 'br'] // empty
|
|
|
+ ,tags: {
|
|
|
+ 'a': {attrs:{'href':1}, exclude:{'a':1}},
|
|
|
+ 'b': {exclude:{'b':1,'p':1}},
|
|
|
+ 'br': {empty: 1},
|
|
|
+ 'i': {exclude:{'i':1,'p':1}},
|
|
|
+ 'span': {exclude:{'p':1,'ul':1,'ol':1,'li':1,'br':1}},
|
|
|
+ 's': {space:1},
|
|
|
+ 'u': {exclude:{'u':1,'p':1},space:1},
|
|
|
+ }
|
|
|
+ ,convert: {
|
|
|
+ 'em': 'i', 'strong': 'b', 'strike': 's',
|
|
|
+ }
|
|
|
+ ,attrs: {
|
|
|
+ 'id':1
|
|
|
+ ,'class':1
|
|
|
+ ,'style':1
|
|
|
+ }
|
|
|
+ ,blockTag: function(a){
|
|
|
+ return a.opt.tags[a.tag].order < a.opt.tags.a.order;
|
|
|
+ }
|
|
|
+ ,mutate: [exclude, moveSpaceUp, next, parentOrderWrap]
|
|
|
+ }
|
|
|
+
|
|
|
+ var defaultOpt = prepareOptTags($.extend(true, {}, baseOpt));
|
|
|
+
|
|
|
+ var unstableList = [];
|
|
|
+
|
|
|
+ function addUnstable(a) { // NOT ES5
|
|
|
+ if(!a.tag) { throw Error("not tag in ", a) }
|
|
|
+ if(a.unstable) return;
|
|
|
+ unstableList.push(a);
|
|
|
+ a.unstable = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ function initTag(a) {
|
|
|
+ // initial handling (container, convert, attributes):
|
|
|
+ a.tag = tag(a.$);
|
|
|
+ if(empty(a)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ parseAndRemoveAttrs(a);
|
|
|
+ convert(a);
|
|
|
+ setAttrs(a);
|
|
|
+ a.$[0].a = a; // link from dom element back to a
|
|
|
+ // state machine init
|
|
|
+ unstableList.push(a);
|
|
|
+ a.unstable = true;
|
|
|
+ return a;
|
|
|
+ }
|
|
|
+
|
|
|
+ function stateMachine() {
|
|
|
+ if(unstableList.length===0)
|
|
|
+ return;
|
|
|
+ var a, i = -1;
|
|
|
+ while (a = unstableList.pop()) { // PERF: running index is probably faster than shift (mutates array)
|
|
|
+ a.unstable = false;
|
|
|
+ $(a.opt.mutate).each(function(i,fn){
|
|
|
+ return fn && fn(a, addUnstable);
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ function prepareOptTags(opt) {
|
|
|
+ var name, tag, tags = opt.tags;
|
|
|
+ for(name in tags) {
|
|
|
+ if(opt.hierarchy.indexOf(name)===-1)
|
|
|
+ throw Error('tag "'+name+'" is missing hierachy definition');
|
|
|
+ }
|
|
|
+ opt.hierarchy.forEach(function(name){
|
|
|
+ if(!tags[name]){
|
|
|
+ tags[name] = {attrs: opt.attrs};
|
|
|
+ }
|
|
|
+ (tag=tags[name]).attrs = $.extend(tag.attrs||{}, opt.attrs);
|
|
|
+ tag.name = name; // not used, debug help (REMOVE later?)
|
|
|
+ // order
|
|
|
+ tag.order = opt.hierarchy.indexOf(name)
|
|
|
+ if(tag.order === -1) {
|
|
|
+ throw Error("Order of '"+name+"' not defined in hierarchy");
|
|
|
+ }
|
|
|
+ });
|
|
|
+ return opt;
|
|
|
+ }
|
|
|
+
|
|
|
+ // GENERAL UTILS
|
|
|
+
|
|
|
+ function get(o, args){ // path argments as separate string parameters
|
|
|
+ if(typeof args === 'string')
|
|
|
+ return o[args[0]];
|
|
|
+ var i = 0, l = args.length, u;
|
|
|
+ while((o = o[args[i++]]) != null && i < l){};
|
|
|
+ return i < l ? u : o;
|
|
|
+ }
|
|
|
+
|
|
|
+ function has(obj,prop){
|
|
|
+ return Object.prototype.hasOwnProperty.call(obj, prop);
|
|
|
+ }
|
|
|
+
|
|
|
+ // ELEMENT UTILS
|
|
|
+
|
|
|
+ function tag(e){
|
|
|
+ return (($(e)[0]||{}).nodeName||'').toLowerCase();
|
|
|
+ }
|
|
|
+
|
|
|
+ function joint(e, d){
|
|
|
+ d = (d? 'next' : 'previous') + 'Sibling';
|
|
|
+ return $(($(e)[0]||{})[d]);
|
|
|
+ }
|
|
|
+
|
|
|
+ // create key val attributes object from elements attributes
|
|
|
+ function attrsAsObj(e, filterCb){
|
|
|
+ var attrObj = {};
|
|
|
+ (e = $(e)) && e.length && $(e[0].attributes||[]).each(function(value,name){
|
|
|
+ name = name.nodeName||name.name;
|
|
|
+ value = e.attr(name);
|
|
|
+ value = filterCb? filterCb(value,name,e) : value;
|
|
|
+ if(value !== undefined && value !== false)
|
|
|
+ attrObj[name] = value;
|
|
|
+ });
|
|
|
+ return attrObj;
|
|
|
+ }
|
|
|
+
|
|
|
+ // TODO: PERF testing - for loop to compare through?
|
|
|
+ function sameAttrs(a, b) {
|
|
|
+ return JSON.stringify(a.attr) === JSON.stringify(b.attr);
|
|
|
+ }
|
|
|
+
|
|
|
+ // INITIAL MUTATORS
|
|
|
+
|
|
|
+ function parseAndRemoveAttrs(a) {
|
|
|
+ a.attrs = [];
|
|
|
+ var tag = a.opt.convert[a.tag] || a.tag,
|
|
|
+ tOpt = a.opt.tags[tag];
|
|
|
+ a.attr = tOpt && attrsAsObj(a.$, function(value,name){
|
|
|
+ a.$.removeAttr(name);
|
|
|
+ if(tOpt.attrs[name.toLowerCase()]){
|
|
|
+ a.attrs.push(name)
|
|
|
+ return value;
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ function setAttrs(a){
|
|
|
+ var l = function(ind,name){
|
|
|
+ var t = name;
|
|
|
+ name = a.attrs? name : ind;
|
|
|
+ var value = a.attrs? a.attr[name.toLowerCase()] : t;
|
|
|
+ a.$.attr(name, value);
|
|
|
+ }
|
|
|
+ a.attrs? $(a.attrs.sort()).each(l) : $.each(a.attr,l);
|
|
|
+ }
|
|
|
+
|
|
|
+ function convert(a){
|
|
|
+ var t;
|
|
|
+ if(t = a.opt.convert[a.tag]){
|
|
|
+ a.$.replaceWith(a.$ = $('<'+ (a.tag = t.toLowerCase()) +'>').append(a.$.contents()));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // LOOPING (STATE MACHINE) MUTATORS
|
|
|
+
|
|
|
+ function exclude(a, addUnstable){
|
|
|
+ var t = get(a.opt, ['tags', a.tag]),
|
|
|
+ pt = get(a.opt, ['tags', tag(a.$.parent())]);
|
|
|
+ if(!t || (pt && get(pt, ['exclude', a.tag]))){
|
|
|
+ var c = a.$.contents();
|
|
|
+ a.$.replaceWith(c);
|
|
|
+ c.length===1 && c[0].a && addUnstable(c[0].a);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ function moveSpaceUp(a, addUnstable){
|
|
|
+ var n = a.$[0];
|
|
|
+ if(moveSpace(n, true) + moveSpace(n, false)) {
|
|
|
+ // either front, back or both spaces moved
|
|
|
+ var c;
|
|
|
+ if(n.textContent==='') {
|
|
|
+ empty(a);
|
|
|
+ } else if((c = a.$.contents()[0]) && c.a) {
|
|
|
+ parentOrderWrap(c.a, addUnstable)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ function moveSpace(n, bef) {
|
|
|
+ var childRe = bef? /^ / : / $/,
|
|
|
+ parentRe = bef? / $/ : /^ /,
|
|
|
+ c = bef? 'firstChild' : 'lastChild',
|
|
|
+ s = bef? 'previousSibling' : 'nextSibling';
|
|
|
+ sAdd = bef? 'after' : 'before';
|
|
|
+ pAdd = bef? 'prepend' : 'append';
|
|
|
+ if(!n || !n[c] || n[c].nodeType !== n.TEXT_NODE || !n[c].wholeText.match(childRe)) {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ if((n2 = n[s]) && !n.a.opt.blockTag(n.a)) {
|
|
|
+ if(n2.nodeType === 3 && !n2.textContent.match(parentRe)) {
|
|
|
+ n2.textContent = (bef?'':' ') + n2.textContent + (bef?' ':'');
|
|
|
+ } else if(n2.nodeType === 1) {
|
|
|
+ $(n2)[sAdd](' ');
|
|
|
+ }
|
|
|
+ } else if((n2 = n.parentNode) && !n.a.opt.blockTag(n.a)) {
|
|
|
+ $(n2)[pAdd](' ');
|
|
|
+ } else {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ n[c].textContent = n[c].wholeText.replace(childRe, '');
|
|
|
+ if(!n[c].wholeText.length)
|
|
|
+ $(n[c]).remove();
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ function next(a, addUnstable, t){
|
|
|
+ var t = t || joint(a.$, true), sm;
|
|
|
+ if(!t.length || a.opt.blockTag(a))
|
|
|
+ return;
|
|
|
+ if(a.opt.spaceMerge && t.length===1 && t[0].nodeType === 3 && t[0].wholeText===' '){
|
|
|
+ if(!(t2 = joint(t, true)).length || a.opt.blockTag(t2[0].a))
|
|
|
+ return;
|
|
|
+ t.remove();
|
|
|
+ t2.prepend(' ');
|
|
|
+ return next(a, addUnstable, t2);
|
|
|
+ }
|
|
|
+ if(!t[0].a || a.tag !== t[0].a.tag || !sameAttrs(a, t[0].a))
|
|
|
+ return;
|
|
|
+ t.prepend(a.$.contents());
|
|
|
+ empty(a);
|
|
|
+ addUnstable(t[0].a);
|
|
|
+ (t = t.children(":first")).length && addUnstable(t[0].a);
|
|
|
+ }
|
|
|
+
|
|
|
+ function empty(a){
|
|
|
+ var t = a.opt.tags[a.tag];
|
|
|
+ if((!t || !t.empty) && !a.$.contents().length && !a.$[0].attributes.length){
|
|
|
+ a.$.remove();
|
|
|
+ return true; // NOTE true/false - different API than in exclude
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ function parentOrderWrap(a, addUnstable){
|
|
|
+ var parent = a.$.parent(), children = parent.contents(),
|
|
|
+ tags = a.opt.tags, ptag;
|
|
|
+
|
|
|
+ if(children.length===1 && children[0] === a.$[0]
|
|
|
+ && (ptag=tags[tag(parent)]) && ptag.order > tags[a.tag].order){
|
|
|
+ parent.after(a.$);
|
|
|
+ parent.append(a.$.contents());
|
|
|
+ a.$.append(parent);
|
|
|
+ addUnstable(parent[0].a);
|
|
|
+ addUnstable(a);
|
|
|
+ }
|
|
|
+ }
|
|
|
+})();
|