/* * Copyright (c) 2010 - The OWASP Foundation * * The jquery-encoder is published by OWASP under the MIT license. You should read and accept the * LICENSE before you use, modify, and/or redistribute this software. */ (function($) { var default_immune = { 'js' : [',','.','_',' '] }; var attr_whitelist_classes = { 'default': [',','.','-','_',' '] }; var attr_whitelist = { 'width': ['%'], 'height': ['%'] }; var css_whitelist_classes = { 'default': ['-',' ','%'], 'color': ['#',' ','(',')'], 'image': ['(',')',':','/','?','&','-','.','"','=',' '] }; var css_whitelist = { 'background': ['(',')',':','%','/','?','&','-',' ','.','"','=','#'], 'background-image': css_whitelist_classes['image'], 'background-color': css_whitelist_classes['color'], 'border-color': css_whitelist_classes['color'], 'border-image': css_whitelist_classes['image'], 'color': css_whitelist_classes['color'], 'icon': css_whitelist_classes['image'], 'list-style-image': css_whitelist_classes['image'], 'outline-color': css_whitelist_classes['color'] }; // In addition to whitelist filtering for proper encoding - there are some things that should just simply be // considered to be unsafe. Setting javascript events or style properties with the encodeHTMLAttribute method and // using javascript: urls should be looked at as bad form all the way around and should be avoided. The blacklisting // feature of the plugin can be disabled by calling $.encoder.disableBlacklist() prior to the first call encoding // takes place (ES5 Compatibility Only) var unsafeKeys = { // Style and JS Event attributes should be set through the appropriate methods encodeForCSS, encodeForURL, or // encodeForJavascript 'attr_name' : ['on[a-z]{1,}', 'style', 'href', 'src'], // Allowing Javascript url's in untrusted data is a bad idea. 'attr_val' : ['javascript:'], // These css keys and values are considered to be unsafe to pass in untrusted data into. 'css_key' : ['behavior', '-moz-behavior', '-ms-behavior'], 'css_val' : ['expression'] }; var options = { blacklist: true }; var hasBeenInitialized = false; /** * Encoder is the static container for the encodeFor* series and canonicalize methods. They are contained within * the encoder object so the plugin can take advantage of object freezing provided in ES5 to protect these methods * from being tampered with at runtime. */ $.encoder = { author: 'Chris Schmidt (chris.schmidt@owasp.org)', version: '${project.version}', /** * Allows configuration of runtime options prior to using the plugin. Once the plugin has been initialized, * options cannot be changed. * * Possible Options: *
* Options Description Default * ---------------------------------------------------------------------------- * blacklist Enable blacklist validation true ** * @param opts */ init: function(opts) { if ( hasBeenInitialized ) throw "jQuery Encoder has already been initialized - cannot set options after initialization"; hasBeenInitialized = true; $.extend( options, opts ); }, /** * Encodes the provided input in a manner safe to place between to HTML tags * @param input The untrusted input to be encoded */ encodeForHTML: function(input) { hasBeenInitialized = true; var div = document.createElement('div'); $(div).text(input); return $(div).html(); }, /** * Encodes the provided input in a manner safe to place in the value (between to "'s) in an HTML attribute. * * Unless directed not to, this method will return the full
attr="value"
as a string. If
* omitAttributeName
is true, the method will only return the value
. Both the attribute
* name and value are canonicalized and verified with whitelist and blacklist prior to returning.
*
* Example:
* * $('#container').html('<div ' + $.encoder.encodeForHTMLAttribute('class', untrustedData) + '/>'); ** * @param attr The attribute to encode for * @param input The untrusted input to be encoded * @param omitAttributeName Whether to omit the attribute name and the enclosing quotes or not from the encoded * output. * @throws String Reports error when an unsafe attribute name or value is used (unencoded) * @throws String Reports error when attribute name contains invalid characters (unencoded) */ encodeForHTMLAttribute: function(attr,input,omitAttributeName) { hasBeenInitialized = true; // Check for unsafe attributes attr = $.encoder.canonicalize(attr).toLowerCase(); input = $.encoder.canonicalize(input); if ( $.inArray(attr, unsafeKeys['attr_name']) >= 0 ) { throw "Unsafe attribute name used: " + attr; } for ( var a=0; a < unsafeKeys['attr_val']; a++ ) { if ( input.toLowerCase().match(unsafeKeys['attr_val'][a]) ) { throw "Unsafe attribute value used: " + input; } } immune = attr_whitelist[attr]; // If no whitelist exists for the attribute, use the minimal default whitelist if ( !immune ) immune = attr_whitelist_classes['default']; var encoded = ''; if (!omitAttributeName) { for (var p = 0; p < attr.length; p++ ) { var pc = attr.charAt(p); if (!pc.match(/[a-zA-Z\-0-9]/)) { throw "Invalid attribute name specified"; } encoded += pc; } encoded += '="'; } for (var i = 0; i < input.length; i++) { var ch = input.charAt(i), cc = input.charCodeAt(i); if (!ch.match(/[a-zA-Z0-9]/) && $.inArray(ch, immune) < 0) { var hex = cc.toString(16); encoded += '' + hex + ';'; } else { encoded += ch; } } if (!omitAttributeName) { encoded += '"'; } return encoded; }, /** * Encodes the provided input in a manner safe to place in the value of an elements
style
attribute
*
* Unless directed not to, this method will return the full property: value
as a string. If
* omitPropertyName
is true
, the method will only return the value
. Both
* the property name and value are canonicalized and verified with whitelist and blacklist prior to returning.
*
* Example:
* * $('#container').html('<div style="' + $.encoder.encodeForCSS('background-image', untrustedData) + '"/>'); ** * @param propName The property name that is being set * @param input The untrusted input to be encoded * @param omitPropertyName Whether to omit the property name from the encoded output * * @throws String Reports error when an unsafe property name or value is used * @throws String Reports error when illegal characters passed in property name */ encodeForCSS: function(propName,input,omitPropertyName) { hasBeenInitialized = true; // Check for unsafe properties propName = $.encoder.canonicalize(propName).toLowerCase(); input = $.encoder.canonicalize(input); if ( $.inArray(propName, unsafeKeys['css_key'] ) >= 0 ) { throw "Unsafe property name used: " + propName; } for ( var a=0; a < unsafeKeys['css_val'].length; a++ ) { if ( input.toLowerCase().indexOf(unsafeKeys['css_val'][a]) >= 0 ) { throw "Unsafe property value used: " + input; } } immune = css_whitelist[propName]; // If no whitelist exists for that property, use the minimal default whitelist if ( !immune ) immune = css_whitelist_classes['default']; var encoded = ''; if (!omitPropertyName) { for (var p = 0; p < propName.length; p++) { var pc = propName.charAt(p); if (!pc.match(/[a-zA-Z\-]/)) { throw "Invalid Property Name specified"; } encoded += pc; } encoded += ': '; } for (var i = 0; i < input.length; i++) { var ch = input.charAt(i), cc = input.charCodeAt(i); if (!ch.match(/[a-zA-Z0-9]/) && $.inArray(ch, immune) < 0) { var hex = cc.toString(16); var pad = '000000'.substr((hex.length)); encoded += '\\' + pad + hex; } else { encoded += ch; } } return encoded; }, /** * Encodes the provided input in a manner safe to place in the value of a POST or GET parameter on a request. This * is primarily used to mitigate parameter-splitting attacks and ensure that parameter values are within specification * * @param input The untrusted data to be encoded * @param attr (optional) If passed in, the method will return the full string
attr="value"
where
* the value will be encoded for a URL and both the attribute and value will be canonicalized prior
* to encoding the value.
*/
encodeForURL: function(input,attr) {
hasBeenInitialized = true;
var encoded = '';
if (attr) {
if (attr.match(/^[A-Za-z\-0-9]{1,}$/)) {
encoded += $.encoder.canonicalize(attr).toLowerCase();
} else {
throw "Illegal Attribute Name Specified";
}
encoded += '="';
}
encoded += encodeURIComponent(input);
encoded += attr ? '"' : '';
return encoded;
},
/**
* Encodes the provided input in a manner safe to place in a javascript context, such as the value of an entity
* event like onmouseover. This encoding is slightly different than just encoding for an html attribute value as
* it follows the escaping rules of javascript. Use this method when dynamically writing out html to an element
* as opposed to building an element up using the DOM - as with the .html() method.
*
* Example $('#element').html('<a onclick=somefunction(\'"' + $.encodeForJavascript($('#input').val()) + '\');">Blargh</a>');
*
* @param input The untrusted input to be encoded
*/
encodeForJavascript: function(input) {
hasBeenInitialized = true;
if ( !immune ) immune = default_immune['js'];
var encoded = '';
for (var i=0; i < input.length; i++ ) {
var ch = input.charAt(i), cc = input.charCodeAt(i);
if ($.inArray(ch, immune) >= 0 || hex[cc] == null ) {
encoded += ch;
continue;
}
var temp = cc.toString(16), pad;
if ( cc < 256 ) {
pad = '00'.substr(temp.length);
encoded += '\\x' + pad + temp.toUpperCase();
} else {
pad = '0000'.substr(temp.length);
encoded += '\\u' + pad + temp.toUpperCase();
}
}
return encoded;
},
/**
* Encodes the provided input to allow only alphanumeric characters, '-' and '_'. Other charactesr are replaced with '_'.
* This encoding allows for using the resulting value as a CSS or jQuery selector, but it cannot be reversed.
*
* @param input The untrusted input to be encoded
*/
encodeForAlphaNumeric: function(input) {
hasBeenInitialized = true;
input = $.encoder.canonicalize(input);
var encoded = '';
for (var i = 0; i < input.length; i++) {
var ch = input.charAt(i), cc = input.charCodeAt(i);
if (!ch.match(/[a-zA-Z0-9-_]/)) {
encoded += '_';
} else {
encoded += ch;
}
}
return encoded;
},
canonicalize: function(input,strict) {
hasBeenInitialized = true;
if (input===null) return null;
var out = input, cycle_out = input;
var decodeCount = 0, cycles = 0;
var codecs = [ new HTMLEntityCodec(), new PercentCodec(), new CSSCodec() ];
while (true) {
cycle_out = out;
for (var i=0; i < codecs.length; i++ ) {
var new_out = codecs[i].decode(out);
if (new_out != out) {
decodeCount++;
out = new_out;
}
}
if (cycle_out == out) {
break;
}
cycles++;
}
if (strict && decodeCount > 1) {
throw "Attack Detected - Multiple/Double Encodings used in input";
}
return out;
}
};
var hex = [];
for ( var c = 0; c < 0xFF; c++ ) {
if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5a || c >= 0x61 && c <= 0x7a ) {
hex[c] = null;
} else {
hex[c] = c.toString(16);
}
}
var methods = {
html: function(opts) {
return $.encoder.encodeForHTML(opts.unsafe);
},
css: function(opts) {
var work = [];
var out = [];
if (opts.map) {
work = opts.map;
} else {
work[opts.name] = opts.unsafe;
}
for (var k in work) {
if ( !(typeof work[k] == 'function') && work.hasOwnProperty(k) ) {
out[k] = $.encoder.encodeForCSS(k, work[k], true);
}
}
return out;
},
attr: function(opts) {
var work = [];
var out = [];
if (opts.map) {
work = opts.map;
} else {
work[opts.name] = opts.unsafe;
}
for (var k in work) {
if ( ! (typeof work[k] == 'function') && work.hasOwnProperty(k) ) {
out[k] = $.encoder.encodeForHTMLAttribute(k,work[k],true);
}
}
return out;
}
};
/**
* Use this instead of setting the content of an element manually with untrusted user supplied data. The context can
* be one of 'html', 'css', or 'attr'
*/
$.fn.encode = function() {
hasBeenInitialized = true;
var argCount = arguments.length;
var opts = {
'context' : 'html',
'unsafe' : null,
'name' : null,
'map' : null,
'setter' : null,
'strict' : true
};
if (argCount == 1 && typeof arguments[0] == 'object') {
$.extend(opts, arguments[0]);
} else {
opts.context = arguments[0];
if (arguments.length == 2) {
if (opts.context == 'html') {
opts.unsafe = arguments[1];
}
else if (opts.content == 'attr' || opts.content == 'css') {
opts.map = arguments[1];
}
} else {
opts.name = arguments[1];
opts.unsafe = arguments[2];
}
}
if (opts.context == 'html') {
opts.setter = this.html;
}
else if (opts.context == 'css') {
opts.setter = this.css;
}
else if (opts.context == 'attr') {
opts.setter = this.attr;
}
return opts.setter.call(this, methods[opts.context].call(this, opts));
};
/**
* The pushback string is used by Codecs to allow them to push decoded characters back onto a string for further
* decoding. This is necessary to detect double-encoding.
*/
var PushbackString = Class.extend({
_input: null,
_pushback: null,
_temp: null,
_index: 0,
_mark: 0,
_hasNext: function() {
if ( this._input == null ) return false;
if ( this._input.length == 0 ) return false;
return this._index < this._input.length;
},
init: function(input) {
this._input = input;
},
pushback: function(c) {
this._pushback = c;
},
index: function() {
return this._index;
},
hasNext: function() {
if ( this._pushback != null ) return true;
return this._hasNext();
},
next: function() {
if ( this._pushback != null ) {
var save = this._pushback;
this._pushback = null;
return save;
}
return ( this._hasNext() ) ? this._input.charAt( this._index++ ) : null;
},
nextHex: function() {
var c = this.next();
if ( c == null ) return null;
if ( c.match(/[0-9A-Fa-f]/) ) return c;
return null;
},
peek: function(c) {
if (c) {
if ( this._pushback && this._pushback == c ) return true;
return this._hasNext() ? this._input.charAt(this._index) == c : false;
}
if ( this._pushback ) return this._pushback;
return this._hasNext() ? this._input.charAt(this._index) : null;
},
mark: function() {
this._temp = this._pushback;
this._mark = this._index;
},
reset: function() {
this._pushback = this._temp;
this._index = this._mark;
},
remainder: function() {
var out = this._input.substr( this._index );
if ( this._pushback != null ) {
out = this._pushback + out;
}
return out;
}
});
/**
* Base class for all codecs to extend. This class defines the default behavior or codecs
*/
var Codec = Class.extend({
decode: function(input) {
var out = '', pbs = new PushbackString(input);
while(pbs.hasNext()) {
var c = this.decodeCharacter(pbs);
if (c != null) {
out += c;
} else {
out += pbs.next();
}
}
return out;
},
/** @Abstract */
decodeCharacter: function(pbs) {
return pbs.next();
}
});
/**
* Codec for decoding HTML Entities in strings. This codec will decode named entities as well as numeric and hex
* entities even with padding. For named entities, it interally uses a Trie to locate the 'best-match' and speed
* up the search.
*/
var HTMLEntityCodec = Codec.extend({
decodeCharacter: function(input) {
input.mark();
var first = input.next();
// If there is no input, or this is not an entity - return null
if ( first == null || first != '&' ) {
input.reset();
return null;
}
var second = input.next();
if ( second == null ) {
input.reset();
return null;
}
var c;
if ( second == '#' ) {
c = this._getNumericEntity(input);
if ( c != null ) return c;
} else if ( second.match(/[A-Za-z]/) ) {
input.pushback(second);
c = this._getNamedEntity(input);
if ( c != null ) return c;
}
input.reset();
return null;
},
_getNamedEntity: function(input) {
var possible = '', entry, len;
len = Math.min(input.remainder().length, ENTITY_TO_CHAR_TRIE.getMaxKeyLength());
for(var i=0;i