jsFlagState = $jsFlags;
$this->htmlOptions = $htmlOptions;
}
/*****************************************************************
* HTML parsers - main parsing function splits up document into
* component parts ('normal' HTML, scripts and styles)
******************************************************************/
function HTMLDocument($input, $insert='', $inject=false, $footer='') {
//
// Apply parsing that only needs to be done once..
//
// Remove titles if option is enabled
if ( $this->htmlOptions['stripTitle'] ) {
$input = preg_replace('#
#is', '', $input, 1);
}
// Remove and record a href
$input = preg_replace_callback('# ]{1,1000}))(?(1)\\1|)[^>]*>#i', 'html_stripBase', $input, 1);
// Proxify url= values in meta redirects
$input = preg_replace_callback('#content\s*=\s*(["\\\'])?[0-9]+\s*;\s*url=([\\\'"]|&\#39;)?((?(?<=")[^"]+|(?(?<=\\\')[^\\\']+|[^\\\'" >]+)))(?(2)\\2|)(?(1)\\1|)#i', 'html_metaRefresh', $input, 1);
// Process forms
$input = preg_replace_callback('##is', 'html_form', $input);
// Remove scripts blocks (avoids individual processing below)
if ( $this->htmlOptions['stripJS'] ) {
$input = preg_replace('##is', '', $input);
}
//
// Split up the document into its different types and parse them
//
// Build up new document into this var
$new = '';
$offset = 0;
// Find instances of script or style blocks
while ( preg_match('#<(s(?:cript|tyle))[^>]*>#i', $input, $match, PREG_OFFSET_CAPTURE, $offset) ) {
// What type of block is this?
$block = strtolower($match[1][0]);
// Start position of content
$outerStart = $match[0][1];
$innerStart = $outerStart + strlen($match[0][0]);
// Determine type of end tag and find it's position
$endTag = "$block>";
$innerEnd = stripos($input, $endTag, $innerStart);
$outerEnd = $innerEnd + strlen($endTag);
// Parse everything up till here and add to the new document
$new .= $this->HTML(substr($input, $offset, $innerStart - $offset));
// Find parsing function
$parseFunction = $block == 'style' ? 'CSS' : 'JS' ;
// Add the parsed block
$new .= $this->$parseFunction(substr($input, $innerStart, $innerEnd - $innerStart));
// Move offset to new position
$offset = $innerEnd;
}
// And add the final chunk (between last script/style block and end of doc)
$new .= $this->HTML(substr($input, $offset));
// Replace input with the updated document
$input = $new;
// Encode the page
if ( $this->htmlOptions['encodePage'] ) {
$input = encodePage($input);
}
//
// Now add our own code bits
//
// Insert our mini form after the
if ( $insert !== false ) {
// Check for a frameset
if ( ( $useFrames = stripos($input, ']+src\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)#i', 'html_flagFrames', $input);
}
// Attempt to add after body
$input = preg_replace('#(]*>)#i', '$1' . $insert, $input, 1, $tmp);
// Check it inserted and append (if not a frameset)
if ( ! $tmp && ! $useFrames ) {
$input = $insert . $input;
}
}
// Insert our javascript library
if ( $inject ) {
// Generate javascript to insert
$inject = injectionJS();
// Add our proxy javascript after
$input = preg_replace('#(]*>)#i', '$1' . $inject, $input, 1, $tmp);
// If no , just prepend
if ( ! $tmp ) {
$input = $inject . $input;
}
}
// Add anything to the footer?
if ( $footer ) {
$input = preg_replace('#(]*>)#i', $footer . '$1', $input, 1, $tmp);
// If no , just append the footer
if ( ! $tmp ){
$input .= $footer;
}
}
// Return new document
return $input;
}
// Parse HTML sections
function HTML($input) {
// Removing objects? Follow spec and display inner content of object tags instead.
if ( $this->htmlOptions['stripObjects'] ) {
// Remove all object tags (including those deprecated but still common)
$input = preg_replace('#<(?>object|applet|param|embed)[^>]*>#i', '', $input, -1, $tmp);
// Found any? Remove the corresponding end tags
if ( $tmp ) {
$input = preg_replace('#(?>object|applet|param|embed)>#i', '', $input, $tmp);
}
} else {
// Parse tags
$input = preg_replace_callback('# ]+value\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)[^>]*>#i', 'html_paramValue', $input);
// To do: proxify object related URLs
}
// Show content within tags
// (preg_ seems to be faster than 2 str_ireplace() calls)
if ( $this->htmlOptions['stripJS'] ) {
$input = preg_replace('#?noscript>#i', '', $input);
}
// Parse onX events
$input = preg_replace_callback('#\b(on(?]{1,1000}))(?(2)\\2|)#i', array(&$this, 'html_eventJS'), $input);
// Parse style attributes
$input = preg_replace_callback('#style\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)#i', array(&$this, 'html_elementCSS'), $input);
// Proxify URL attributes - this is the bottleneck but optimized
// as much as possible (or at least, as much as I can).
$input = preg_replace_callback('#(?><[A-Z][A-Z0-9]{0,15})(?>\s+[^>\s]+)*?\s*(?>(href|src|background)\s*=(?!\\\\)\s*)(?>([\\\'"])?)((?(2)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^ >]{1,1000}))(?(2)\\2|)#i', 'html_attribute', $input);
// Return changed input
return $input;
}
// Proxify an onX javascript event
function html_eventJS($input) {
return $this->htmlOptions['stripJS'] ? '' : $input[1] . '=' . $input[2] . $this->JS($input[3]) . $input[2];
}
// Proxify a style="CSS" attribute
function html_elementCSS($input) {
return 'style=' . $input[1] . $this->CSS($input[2]) . $input[1];
}
/*****************************************************************
* CSS parser - main parsing function
* CSS parsing is a complicated by the caching of CSS files. We need
* to consider (A) cross-domain caching and (B) the unique URLs option.
* A) If possible, use a relative URL so the saved URLs do not explictly
* point to a single domain.
* B) There is a second set of callback functions with "_unique" suffixed
* and these return the original URL to be reparesed.
******************************************************************/
// The URLs depend on the unique and path info settings. The type parameter allows
// us to specify the unique callbacks.
function CSS($input, $storeUnique=false) {
// What type of parsing is this? Normally we parse any URLs to redirect
// back through the proxy but not when storing a cache with unique URLs.
$type = $storeUnique ? '_unique' : '';
// CSS needs proxifying the calls to url(), @import and src=''
$input = preg_replace_callback('#\burl\s*\(\s*[\\\'"]?([^\\\'"\)]+)[\\\'"]?\s*\)#i', 'css_URL' . $type, $input);
$input = preg_replace_callback('#@import\s*[\\\'"]([^\\\'"\(\)]+)[\\\'"]#i', 'css_import' . $type, $input);
$input = preg_replace_callback('#\bsrc\s*=\s*([\\\'"])?([^)\\\'"]+)(?(1)\\1|)#i', 'css_src' . $type, $input);
// Return changed
return $input;
}
/*****************************************************************
* Javascript parser - main parsing function
*
* The specific parts that need 'proxifying' depends on which javascript
* functions we've been able to override. On first page load, the browser
* capabilities are tested to see what we can do client-side and the results
* sent back to us. This allows us to parse only what we have to.
* If $CONFIG['override_javascript'] is disabled, all commands are parsed
* server-side. This will use much more CPU!
*
* Commands to proxify only if no override at all:
* document.write()
* document.writeln()
* window.open()
* eval()
*
* Commands to proxify, regardless of browser capabilities:
* location.replace()
* .innerHTML=
*
* Commands to proxify if the extra "watch" flag is set
* (the browser doesn't support the .watch() method):
* location=
* x.location=
* location.href=
*
* Commands to proxify if the extra "setters" flag is set
* (the browser doesn't support the __defineSetter__() method):
* .src=
* .href=
* .background=
* .action=
*
* Commands to proxify if the extra "ajax" flag is set
* (the browser failed to override the .open() method):
* XMLHttpRequest.open()
******************************************************************/
function JS($input) {
// Stripping?
if ( $this->htmlOptions['stripJS'] ) {
return '';
}
// Get our flags
$flags = $this->jsFlagState;
// Unless we know we don't need to, apply all the browser-specific flags
if ( ! is_array($this->jsFlagState) ) {
$flags = array('ajax', 'watch', 'setters');
}
// If override is disabled, add a "base" flag
if ( $this->jsFlagState === null ) {
$flags[] = 'base';
}
// Start parsing!
$search = array();
// Create shortcuts to various search patterns:
// "before" - matches preceeding character (string of single char) [ignoring whitespace]
// "after" - matches next character (string of single char) [ignoring whitespace]
// "id" - key for identifying the original match (e.g. if we have >1 of the same key)
$assignmentPattern = array('before' => '.', 'after' => '=');
$methodPattern = array('before' => '.', 'after' => '(');
$functionPattern = array('after' => '(');
// Configure strings to search for, starting with always replaced commands
$search['innerHTML'][] = $assignmentPattern;
$search['location'][] = array('after' => '.', 'id' => 'replace()');
# ^ This is only for location.replace() - other forms are handled later
// Look for attribute assignments
if ( in_array('setters', $flags) ) {
$search['src'][] = $assignmentPattern;
$search['href'][] = $assignmentPattern;
$search['action'][] = $assignmentPattern;
$search['background'][] = $assignmentPattern;
}
// Look for location changes
// location.href will be handled above, location= is handled here
if ( in_array('watch', $flags) ) {
$search['location'][] = array('after' => '=', 'id' => 'assignment');
}
// Look for .open() if either AJAX (XMLHttpRequest.open) or
// base (window.open) flags are present
if ( in_array('ajax', $flags) || in_array('base', $flags) ) {
$search['open'][] = $methodPattern;
}
// Add the basic code if no override
if ( in_array('base', $flags) ) {
$search['eval'][] = $functionPattern;
$search['writeln'][] = $methodPattern;
$search['write'][] = $methodPattern;
}
// Set up starting parameters
$offset = 0;
$length = strlen($input);
$searchStrings = array_keys($search);
while ( $offset < $length ) {
// Start off by assuming no more items (i.e. the next position
// of interest is the end of the document)
$commandPos = $length;
// Loop through the search subjects
foreach ( $searchStrings as $item ) {
// Any more instances of this?
if ( ( $tmp = strpos($input, $item, $offset) ) === false ) {
// Nope, skip to next item
continue;
}
// Closer to the currently held 'next' position?
if ( $tmp < $commandPos ) {
$commandPos = $tmp;
$command = $item;
}
}
// No matches found? Finish parsing.
if ( $commandPos == $length ) {
break;
}
// We've found the main point of interest; now use the
// search parameters to check the surrounding chars to validate
// the match.
$valid = false;
foreach ( $search[$command] as $pattern ) {
// Check the preceeding chars
if ( isset($pattern['before']) && str_checkprev($input, $pattern['before'], $commandPos-1) === false ) {
continue;
}
// Check next chars
if ( isset($pattern['after']) && ( $postCharPos = str_checknext($input, $pattern['after'], $commandPos + strlen($command), false, true) ) === false ) {
continue;
}
// Still here? Match must be OK so generate a match ID
if ( isset($pattern['id']) ) {
$valid = $command . $pattern['id'];
} else {
$valid = $command;
}
break;
}
// What we do next depends on which match (if any) we've found...
switch ( $valid ) {
// Assigment
case 'src':
case 'href':
case 'background':
case 'action':
case 'locationassignment':
case 'innerHTML':
// Check our post-char position for = as well (could be equality
// test rather than assignment, i.e. == )
if ( ! isset($input[$postCharPos]) || $input[$postCharPos] == '=' ) {
break;
}
// Find the end of this statement
$endPos = analyze_js($input, $postCharPos);
$valueLength = $endPos - $postCharPos;
// Produce replacement command
$replacement = sprintf('parse%s(%s)', $command=='innerHTML' ? 'HTML' : 'URL', substr($input, $postCharPos, $valueLength));
// Adjust total document length as appropriate
$length += strlen($replacement);
// Make the replacement
$input = substr_replace($input, $replacement, $postCharPos, $valueLength);
// Move offset up to new position
$offset = $endPos + 10;
// Go get next match
continue 2;
// Function calls - we don't know for certain if these are in fact members of the
// appropriate objects (window/XMLHttpRequest for .open(), document for .write() and
// .writeln) so we won't change anything. Main.js still overrides these functions but
// does nothing with them by default. We add an extra parameter to tell our override
// to kick in.
case 'open':
case 'write':
case 'writeln':
// Find the end position (the closing ")" for the function call)
$endPos = analyze_js($input, $postCharPos);
// Insert our additional argument just before that
$input = substr_replace($input, ',"gl"', $endPos, 0);
// Adjust the document length
$length += 5;
// And move the offset
$offset = $endPos + 5;
// Get next match
continue 2;
// Eval() is a just as easy since we can just wrap the entire thing in parseJS().
case 'eval':
// Ensure this is a call to eval(), not anotherfunctionendingineval()
if ( isset($input[$commandPos-1]) && strpos('abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_', $input[$commandPos-1]) !== false ) {
break;
}
// Find the end position (the closing ")" for the function call)
$endPos = analyze_js($input, $postCharPos);
$valueLength = $endPos - $postCharPos;
// Generate our replacement
$replacement = sprintf('parseJS(%s)', substr($input, $postCharPos, $valueLength));
// Make the replacement
$input = substr_replace($input, $replacement, $postCharPos, $valueLength);
// Adjust the document length
$length += 9;
// And move the offset
$offset = $endPos + 9;
continue 2;
// location.replace() is a tricky one. We have the position of the char
// after . as $postCharPos and need to ensure we're calling replace(),
// then parse the entire URL
case 'locationreplace()':
// Validate the match
if ( ! preg_match('#\Greplace\s*\(#', $input, $tmp, 0, $postCharPos) ) {
break;
}
// Move $postCharPos to inside the brackets of .replace()
$postCharPos += strlen($tmp[0]);
// Find the end position (the closing ")" for the function call)
$endPos = analyze_js($input, $postCharPos);
$valueLength = $endPos - $postCharPos;
// Generate our replacement
$replacement = sprintf('parseURL(%s)', substr($input, $postCharPos, $valueLength));
// Make the replacement
$input = substr_replace($input, $replacement, $postCharPos, $valueLength);
// Adjust the document length
$length += 9;
// And move the offset
$offset = $endPos + 9;
continue 2;
}
// Still here? A match didn't validate so adjust offset to just after
// current position
$offset = $commandPos + 1;
}
// Ignore document.domain
$input = str_replace('document.domain', 'ignore', $input);
// Return changed
return $input;
}
}
/*****************************************************************
* HTML callbacks
******************************************************************/
// Remove and record the href
function html_stripBase($input) {
global $base;
$base = $input[2];
return '';
}
// Proxify the location of a meta refresh
function html_metaRefresh($input) {
return str_replace($input[3], proxifyURL($input[3]), $input[0]);
}
// Proxify URL in
function html_paramValue($input) {
// Check for a name="movie" tag
if ( stripos($input[0], 'movie') === false ) {
return $input[0];
}
return str_replace($input[2], proxifyURL($input[2]), $input[0]);
}
// Process forms - the query string is used by the proxy script
// and GET data needs to be encoded anyway. We convert all GET
// forms to POST and then the proxy script will forward it properly.
function html_form($input) {
// Check for a given method
if ( preg_match('#\bmethod\s*=\s*["\\\']?(get|post)["\\\']?#i', $input[1], $tmp) ) {
// Not POST?
if ( strtolower($tmp[1]) != 'post' ) {
// Convert to post and flag as a conversion
$input[1] = str_replace($tmp[0], 'method="post"', $input[1]);
$converted = true;
}
} else {
// Append a POST method (no method given and GET is default)
$input[1] .= ' method="post"';
$converted = true;
}
// Prepare the extra input to insert
$add = empty($converted) ? '' : ' ';
// To do: javascript onsubmit event to immediately redirect to the appropriate
// location using GET data, without an intermediate POST to the proxy script.
// Proxify the form action
$input[1] = preg_replace_callback('#\baction\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)#i', 'html_formAction', $input[1]);
// What type of form is this? Due to register_globals support, PHP converts
// a number of characters to _ in incoming variable names. To get around this,
// we can use the raw post data from php://input but this is not available
// for multipart forms. Instead we must encode the input names in these forms.
if ( stripos($input[1], 'multipart/form-data') ) {
$input[2] = preg_replace_callback('#name\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)#i', 'html_inputName', $input[2]);
}
// Return updated form
return '';
}
// Proxify the action="URL" value in forms
function html_formAction($input) {
return 'action=' . $input[1] . proxifyURL($input[2]) . $input[1];
}
// Encode input names
function html_inputName($input) {
return 'name=' . $input[1] . inputEncode($input[2]) . $input[1];
}
// Proxify URL values in attributes
function html_attribute($input) {
// Is this an iframe?
$flag = stripos($input[0], 'iframe') === 1 ? 'frame' : '';
// URL occurred as value of an attribute and should have been htmlspecialchar()ed
// We need to do the job of the browser and decode before proxifying.
return str_replace($input[3], htmlspecialchars(proxifyURL(htmlspecialchars_decode($input[3]), $flag)), $input[0]);
}
// Flag frames in a frameset so only the first one shows the mini-form.
// This could be done in the above callback but adds extra processing
// when 99% of the time, it won't be needed.
function html_flagFrames($input) {
static $addFlag;
// If it's the first frame, leave it but set the flag var
if ( ! isset($addFlag) ) {
$addFlag = true;
return $input[0];
}
// Add the frame flag
$newURL = $input[2] . ( strpos($input[2], '?') ? '&f=frame' : 'fframe/');
return str_replace($input[2], $newURL, $input[0]);
}
/*****************************************************************
* CSS callbacks
******************************************************************/
// Proxify CSS url(LOCATION)
function css_URL($input) {
return 'url(' . proxifyURL(trim($input[1])) . ')';
}
// Proxify CSS @import "URL"
function css_import($input) {
return '@import "' . proxifyURL($input[1]) . '"';
}
// Proxify CSS src=
function css_src($input) {
return 'src=' . $input[1] . proxifyURL($input[2]) . $input[1];
}
// Callbacks for use with unique URLs and cached CSS
// The acts as a marker for quick and easy processing later
// Unique CSS url(LOCATION)
function css_URL_unique($input) {
return 'url()';
}
// Unique CSS @import "URL"
function css_import_unique($input) {
return '@import ""';
}
// Unique CSS src=
function css_src_unique($input) {
return 'src=' . $input[1] . '' . $input[1];
}
/*****************************************************************
* Helper functions
******************************************************************/
// Take a string, and check that the next non-whitespace char is the
// passed in char (X). Return false if non-whitespace and non-X char is
// found. Otherwise, return the position of X.
// If $inverse is true, the next non-whitespace char must NOT be in $char
// If $pastChar is true, ignore whitespace after finding X and return
// the position of the last post-X whitespace char.
function str_checknext($input, $char, $offset, $inverse = false, $pastChar = false) {
for ( $i = $offset, $length = strlen($input); $i < $length; ++$i ) {
// Examine char
switch ( $input[$i] ) {
// Ignore whitespace
case ' ':
case "\t":
case "\r":
case "\n":
break;
// Found the passed char
case $char:
// $inverse means we do NOT want this char
if ( $inverse ) {
return false;
}
// Move past this to the next non-whitespace?
if ( $pastChar ) {
++$i;
return $i + strspn($input, " \t\r\n", $i);
}
// Found desired char, no $pastChar, just return X offset
return $i;
// Found non-$char non-whitespace
default:
// This is the desired result if $inverse
if ( $inverse ) {
return $i;
}
// No $inverse, found a non-$char, return false
return false;
}
}
return false;
}
// Same as above but go backwards
function str_checkprev($input, $char, $offset, $inverse = false) {
for ( $i = $offset; $i > 0; --$i ) {
// Examine char
switch ( $input[$i] ) {
// Ignore whitespace
case ' ':
case "\t":
case "\r":
case "\n":
break;
// Found char
case $char:
return $inverse ? false : $i;
// Found non-$char char
default:
return $inverse ? $i : false;
}
}
return $inverse;
}
// Analyze javascript and return offset positions.
// Default is to find the end of the statement, indicated by:
// (1) ; while not in string
// (2) newline which, if not there, would create invalid syntax
// (3) a closing bracket (object, language construct or function call) for which
// no corresponding opening bracket was detected AFTER the passed offset
// If (int) $argPos is true, we return an array of the start and end position
// for the nth argument, where n = $argPos. The $start position must be just inside
// the parenthesis of the function call we're interested in.
function analyze_js($input, $start, $argPos = false) {
// Set chars we're interested in
$specialChars = ";\n\r\"'+{}()[]";
// Add , if looking for an argument position
if ( $argPos ) {
$specialChars .= ',';
$currentArg = 1;
}
// Loop through the input, stopping only at special chars
for ( $i = $start, $length = strlen($input), $end = false, $openObjects = $openBrackets = $openArrays = 0;
$end === false && ( $i += strcspn($input, $specialChars, $i) ) && $i < $length && ( $char = $input[$i] );
++$i ) {
switch ( $char ) {
// Starting string delimiters
case '"':
case "'":
if ( $input[$i-1] == '\\' ) {
break;
}
// Skip straight to end of string
// Find the corresponding end delimiter and ensure it's not escaped
while ( ( $i = strpos($input, $char, $i+1) ) && $input[$i-1] == '\\' );
// Check for false, in which case we assume the end is the end of the doc
if ( $i === false ) {
break 2;
}
break;
// End of operation?
case ';':
$end = $i;
break;
// New lines
case "\n":
case "\r":
// Newlines are OK if occuring within an open brackets, arrays or objects.
if ( $openObjects || $openBrackets || $openArrays || $argPos ) {
break;
}
// Newlines are also OK if followed by an opening function OR concatenation
// e.g. someFunc\n(params) or someVar \n + anotherVar
// Find next non-whitespace char position
$tmp = $i + strspn($input, " \t\r\n", $i+1);
// And compare to allowed chars
if ( isset($input[$tmp+1]) && ( $input[$tmp+1] == '(' || $input[$tmp+1] == '+' ) ) {
$i = $tmp;
break;
}
// Newline not indicated as OK, set the end to here
$end = $i;
break;
// Concatenation
case '+':
// Our interest in the + operator is it's use in allowing an expression
// to span multiple lines. If we come across a +, move past all whitespace,
// including newlines (which would otherwise indicate end of expression).
$i += strspn($input, " \t\r\n", $i+1);
break;
// Opening chars (objects, parenthesis and arrays)
case '{':
++$openObjects;
break;
case '(':
++$openBrackets;
break;
case '[':
++$openArrays;
break;
// Closing chars - is there a corresponding open char?
// Yes = reduce stored count. No = end of statement.
case '}':
$openObjects ? --$openObjects : $end = $i;
break;
case ')':
$openBrackets ? --$openBrackets : $end = $i;
break;
case ']':
$openArrays ? --$openArrays : $end = $i;
break;
// Commas - tell us which argument it is
case ',':
// Ignore commas inside other functions or whatnot
if ( $openObjects || $openBrackets || $openArrays ) {
break;
}
// End now
if ( $currentArg == $argPos ) {
$end = $i;
}
// Increase the current argument number
++$currentArg;
// If we're not after the first arg, start now?
if ( $currentArg == $argPos ) {
$start = $i+1;
}
break;
}
}
// End not found? Use end of document
if ( $end === false ) {
$end = $length;
}
// Return array of start/end
if ( $argPos ) {
return array($start, $end);
}
// Return end
return $end;
}
/*****************************************************************
* Page encoding functions
******************************************************************/
// Encode page - splits into HTML/script sections and encodes HTML
function encodePage($input) {
// Look for script blocks
if ( preg_match_all('##is', $input, $scripts, PREG_OFFSET_CAPTURE) ) {
// Create starting offset - only start encoding after the
// as this seems to help browsers cope!
$offset = preg_match('#]*>(.)#is', $input, $tmp, PREG_OFFSET_CAPTURE) ? $tmp[1][1] : 0;
$new = $offset ? substr($input, 0, $offset) : '';
// Go through all the matches
foreach ( $scripts[0] as $id => $match ) {
// Determine position of the preceeding non-script block
$end = $match[1] ? $match[1]-1 : 0;
$start = $offset;
$length = $end - $start;
// Add encoded block to page if there is one
if ( $length )
$new .= encodeBlock(substr($input, $start, $length));
// Add unencoded script to page
$new .= $match[0];
// Move offset up
$offset = $match[1] + strlen($match[0]);
}
// Add final block
if ( $remainder = substr($input, $offset) ) {
$new .= encodeBlock($remainder);
}
// Update input with new
$input = $new;
} else {
// No scripts is easy - just encode the lot
$input = encodeBlock($input);
}
// Return the encoded page
return $input;
}
// Encode block - applies the actual encoding (or rather "escaping")
function encodeBlock($input) {
// Escape values
$s = array('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','\'',"\r","\n",'-');
$r = array('%61','%62','%63','%64','%65','%66','%67','%68','%69','%6a','%6b','%6c','%6d','%6e','%6f','%70','%71','%72','%73','%74','%75','%76','%77','%78','%79','%7a','%41','%42','%43','%44','%45','%46','%47','%48','%49','%4a','%4b','%4c','%4d','%4e','%4f','%50','%51','%52','%53','%54','%55','%56','%57','%58','%59','%5a','%27','%0d','%0a','%2D');
// Return javascript decoder
return '';
}