JavaScript xdiff_string_patch
!No description available for xdiff_string_patch. @php.js developers: Please update the function summary text file.
1 2 3 4 56 7 8 9 1011 12 13 14 1516 17 18 19 2021 22 23 24 2526 27 28 29 3031 32 33 34 3536 37 38 39 4041 42 43 44 4546 47 48 49 5051 52 53 54 5556 57 58 59 6061 62 63 64 6566 67 68 69 7071 72 73 74 7576 77 78 79 8081 82 83 84 8586 87 88 89 9091 92 93 94 9596 97 98 99 100101 102 103 104 105106 107 108 109 110111 112 113 114 115116 117 118 119 120121 122 123 124 125126 127 128 129 130131 132 133 134 135136 137 138 139 140141 142 143 144 145146 147 148 149 150151 152 153 154 155156 157 158 159 160161 162 163 164 165166 167 168 169 170171 172 173 174 175 | function xdiff_string_patch (originalStr, patch, flags, error) { // !No description available for xdiff_string_patch. @php.js developers: Please update the function summary text file. // // version: 1109.2015 // discuss at: http://phpjs.org/functions/xdiff_string_patch // + original by: Brett Zamir (http://brett-zamir.me) // + improved by: Steven Levithan (stevenlevithan.com) // % note 1: The XDIFF_PATCH_IGNORESPACE flag and the error argument are not currently supported // % note 2: This has not been widely tested // * example 1: xdiff_string_patch('', '@@ -0,0 +1,1 @@\n+Hello world!'); // * returns 1: 'Hello world!' // First two functions were adapted from Steven Levithan, also under an MIT license // Adapted from XRegExp 1.5.0 // (c) 2007-2010 Steven Levithan // MIT License // <http://xregexp.com> var getNativeFlags = function (regex) { return (regex.global ? "g" : "") + (regex.ignoreCase ? "i" : "") + (regex.multiline ? "m" : "") + (regex.extended ? "x" : "") + // Proposed for ES4; included in AS3 (regex.sticky ? "y" : ""); }, cbSplit = function (str, s /* separator */ ) { // If separator `s` is not a regex, use the native `split` if (!(s instanceof RegExp)) { // Had problems to get it to work here using prototype test return String.prototype.split.apply(str, arguments); } str = str + ''; var output = [], lastLastIndex = 0, match, lastLength, limit = Infinity; // This is required if not `s.global`, and it avoids needing to set `s.lastIndex` to zero // and restore it to its original value when we're done using the regex var x = s._xregexp; s = new RegExp(s.source, getNativeFlags(s) + 'g'); // Brett paring down if (x) { s._xregexp = { source: x.source, captureNames: x.captureNames ? x.captureNames.slice(0) : null }; } while ((match = s.exec(str))) { // Run the altered `exec` (required for `lastIndex` fix, etc.) if (s.lastIndex > lastLastIndex) { output.push(str.slice(lastLastIndex, match.index)); if (match.length > 1 && match.index < str.length) { Array.prototype.push.apply(output, match.slice(1)); } lastLength = match[0].length; lastLastIndex = s.lastIndex; if (output.length >= limit) break; } if (s.lastIndex === match.index) { s.lastIndex++; } } if (lastLastIndex === str.length) { if (!s.test("") || lastLength) { output.push(""); } } else { output.push(str.slice(lastLastIndex)); } return output.length > limit ? output.slice(0, limit) : output; }, i = 0, ll = 0, ranges = [], lastLinePos = 0, firstChar = '', rangeExp = /^@@\s+-(\d+),(\d+)\s+\+(\d+),(\d+)\s+@@$/, lineBreaks = /\r?\n/, lines = cbSplit(patch.replace(/(\r?\n)+$/, ''), lineBreaks), origLines = cbSplit(originalStr, lineBreaks), newStrArr = [], linePos = 0, errors = '', // Both string & integer (constant) input is allowed optTemp = 0, OPTS = { // Unsure of actual PHP values, so better to rely on string 'XDIFF_PATCH_NORMAL': 1, 'XDIFF_PATCH_REVERSE': 2, 'XDIFF_PATCH_IGNORESPACE': 4 }; // Input defaulting & sanitation if (typeof originalStr !== 'string' || !patch) { return false; } if (!flags) { flags = 'XDIFF_PATCH_NORMAL'; } if (typeof flags !== 'number') { // Allow for a single string or an array of string flags flags = [].concat(flags); for (i = 0; i < flags.length; i++) { // Resolve string input to bitwise e.g. 'XDIFF_PATCH_NORMAL' becomes 1 if (OPTS[flags[i]]) { optTemp = optTemp | OPTS[flags[i]]; } } flags = optTemp; } if (flags & OPTS.XDIFF_PATCH_NORMAL) { for (i = 0, ll = lines.length; i < ll; i++) { ranges = lines[i].match(rangeExp); if (ranges) { lastLinePos = linePos; linePos = ranges[1] - 1; while (lastLinePos < linePos) { newStrArr[newStrArr.length] = origLines[lastLinePos++]; } while (lines[++i] && (rangeExp.exec(lines[i])) == null) { firstChar = lines[i].charAt(0); switch (firstChar) { case '-': ++linePos; // Skip including that line break; case '+': newStrArr[newStrArr.length] = lines[i].slice(1); break; case ' ': newStrArr[newStrArr.length] = origLines[linePos++]; break; default: throw 'Unrecognized initial character in unidiff line'; // Reconcile with returning errrors arg? } } if (lines[i]) { i--; } } } } else if (flags & OPTS.XDIFF_PATCH_REVERSE) { // Only differs from above by a few lines for (i = 0, ll = lines.length; i < ll; i++) { ranges = lines[i].match(rangeExp); if (ranges) { lastLinePos = linePos; linePos = ranges[3] - 1; while (lastLinePos < linePos) { newStrArr[newStrArr.length] = origLines[lastLinePos++]; } while (lines[++i] && (rangeExp.exec(lines[i])) == null) { firstChar = lines[i].charAt(0); switch (firstChar) { case '-': newStrArr[newStrArr.length] = lines[i].slice(1); break; case '+': ++linePos; // Skip including that line break; case ' ': newStrArr[newStrArr.length] = origLines[linePos++]; break; default: throw 'Unrecognized initial character in unidiff line'; // Reconcile with returning errrors arg? } } if (lines[i]) { i--; } } } } if (typeof(error === 'string')) { this.window[error] = errors; } return newStrArr.join('\n'); } |
Examples
Running
1 | xdiff_string_patch('', '@@ -0,0 +1,1 @@\n+Hello world!'); |
Should return
1 | 'Hello world!' |
Dependencies
No dependencies, you can use this function standalone.
Open syntax issues
php.js uses JsLint to help us keep our code consistent and prevent some common bugs.
Eventually we want all code to pass or at least take into consideration most fixes suggested by JsLint, following this JsLint configuration we’ve decided on.
Authors
Thanks to the following developers, you get to have xdiff_string_patch goodness in JavaScript.
Hi,
My own patching code does not validate. If you want some code that does, the following may work. It is based on code by Imgen Tata, though I have reformatted them to work as self-contained functions in the spirit of php.js (if combined, they could replace xdiff_string_patch):
/*
* Patches original text to generate new text
* @author Imgen Tata (http://www.myipdf.com/)
* @see http://en.wikipedia.org/wiki/Diff#Unified_format
* @param {String} ori_text The original text
* @param {String} unidiff_patch The patch in unidiff format. will be validated.
* @returns {String} The generated new text
* @see Unified diff format on http://en.wikipedia.org/wiki/Diff#Unified_format
*/
function string_unidiff_patch (ori_text, unidiff_patch) {
var HEADER_PREFIX = '@@ ',
HEADER_SUFFIX = ' @@',
ORIGINAL_INDICATOR = '-',
NEW_INDICATOR = '+',
RANGE_SEPARATOR = ',',
CONTEXT_INDICATOR = ' ',
DELETION_INDICATOR = '-',
ADDITION_INDICATOR = '+',
ori_lines,
patch_lines,
patch_line,
new_lines = [], NEW_LINE = '\n',
line_index = 0,
last_line_index = 0,
ori_hunk_start = 0,
ori_hunk_size = 0,
new_hunk_start = 0,
new_hunk_size = 0,
context_size = 0,
deletion_size = 0,
addition_size = 0,
i, j,
one_or_more_whitespace = '\\s*',
number_extractor = '(\\d+)',
//Construct the range extractor regular expression string
range_extractor_reg_exp_str = HEADER_PREFIX + one_or_more_whitespace +
ORIGINAL_INDICATOR + one_or_more_whitespace +
number_extractor +
RANGE_SEPARATOR + one_or_more_whitespace +
number_extractor +
one_or_more_whitespace +
'\\' + NEW_INDICATOR +
number_extractor +
RANGE_SEPARATOR + one_or_more_whitespace +
number_extractor +
one_or_more_whitespace +
HEADER_SUFFIX,
range_extractor = new RegExp(range_extractor_reg_exp_str),
ranges,
ori_len,
first_char,
/*
*Trims string
*/
trim = function(text) {
if (typeof text != 'string') {
throw Error('String parameter required');
}
return text.replace(/(^\s*)|(\s*$)/g, '');
},
/*
*Verifies type of arguments
*/
verify_type = function(type) {
var args = arguments,
args_len = arguments.length,
basic_types = ['number', 'boolean', 'string', 'function', 'object', 'undefined'],
basic_type,
i, j,
type_of_type = typeof type;
if (type_of_type != 'string' &&
type_of_type != 'function') {
throw new Error('Bad type parameter');
}
if (args_len < 2) {
throw new Error('Too few arguments');
}
if (type_of_type == 'string') {
type = trim(type);
if (type == '') {
throw new Error('Bad type parameter');
}
for (j = 0; j < basic_types.length; j++) {
basic_type = basic_types[j];
if (basic_type == type) {
for (i = 1; i < args_len; i++) {
if (typeof args[i] != type) {
throw new Error('Bad type');
}
}
return;
}
}
throw new Error('Bad type parameter');
}
//Not basic type. we need to use instanceof operator
for (i = 1; i < args_len; i++) {
if (!(args[i] instanceof type)) {
throw new Error('Bad type');
}
}
},
/*
*Splits text into lines and return as a string array
*/
split_into_lines = function(text) {
verify_type('string', text);
if (text == '') {
return [];
}
return text.split('\n');
},
patch_hunk_header_validator = function( context_size,
ori_hunk_size, delection_size,
new_hunk_size, addition_size) {
if (ori_hunk_size != (context_size + deletion_size) ||
new_hunk_size != (context_size + addition_size)
) {
throw 'invalid patch header';
}
};
verify_type('string', ori_text);
verify_type('string', unidiff_patch);
ori_lines = split_into_lines(ori_text);
ori_len = ori_lines.length;
patch_lines = split_into_lines(unidiff_patch);
for (i = 0; i < patch_lines.length; i++) {
patch_line = patch_lines[i];
if (patch_line === '') {//Skip empty line
continue;
}
if (patch_line.indexOf(HEADER_PREFIX) != -1) {//A diff hunk header found
//validate the previous patch header
patch_hunk_header_validator(context_size,
ori_hunk_size, deletion_size,
new_hunk_size, addition_size);
ranges = patch_line.match(range_extractor);
if (ranges === null) {
throw 'invalid patch header';
}
ori_hunk_start = ranges[1];
ori_hunk_size = ranges[2];
new_hunk_start = ranges[3];
new_hunk_size = ranges[4];
last_line_index = line_index;
line_index = Math.max( ori_hunk_start - 1, 0);
if(ori_len > 0){
//Validate line index
if (last_line_index > line_index || line_index > ori_len) {
throw 'invalid patch header';
}
for (j = last_line_index; j < line_index; j++) {
new_lines.push(ori_lines[j]);
}
}
//validate new hunk start index
if (new_hunk_start > 0 && new_hunk_start != new_lines.length + 1) {
throw 'invalid patch header';
}
//Reset sizes
context_size = 0;
deletion_size = 0;
addition_size = 0;
continue;
}
first_char = patch_line.charAt(0);
switch (first_char) {
case CONTEXT_INDICATOR:
new_lines.push(ori_lines[line_index]);
line_index++;
context_size++;
break;
case DELETION_INDICATOR:
line_index++;
deletion_size++;
break;
case ADDITION_INDICATOR:
new_lines.push(patch_line.substr(1));
addition_size++;
break;
default:
throw 'Unrecognized initial character in unidiff line';
}
}
//Validate the last patch header
patch_hunk_header_validator(context_size,
ori_hunk_size, deletion_size,
new_hunk_size, addition_size);
//Append the remaining lines
while (line_index < ori_len) {
new_lines.push(ori_lines[line_index]);
line_index++;
}
return new_lines.join(NEW_LINE);
}
/*
* Reverse patches original text to generate new text
* @author Imgen Tata (http://www.myipdf.com/)
* @see http://en.wikipedia.org/wiki/Diff#Unified_format
* @param {String} new_text The new text
* @param {String} unidiff_patch The patch in unidiff format. Will be validated.
* @returns {String} The generated original text
* @see Unified diff format on http://en.wikipedia.org/wiki/Diff#Unified_format
*/
function string_unidiff_reverse_patch (new_text, unidiff_patch) {
var HEADER_PREFIX = '@@ ',
HEADER_SUFFIX = ' @@',
ORIGINAL_INDICATOR = '-',
NEW_INDICATOR = '+',
RANGE_SEPARATOR = ',',
CONTEXT_INDICATOR = ' ',
DELETION_INDICATOR = '-',
ADDITION_INDICATOR = '+',
new_lines, NEW_LINE = '\n',
patch_lines,
patch_line,
ori_lines = [],
line_index = 0,
last_line_index = 0,
ori_hunk_start = 0,
ori_hunk_size = 0,
new_hunk_start = 0,
new_hunk_size = 0,
context_size = 0,
deletion_size = 0,
addition_size = 0,
i, j,
one_or_more_whitespace = '\\s*',
number_extractor = '(\\d+)',
//Construct the range extractor regular expression string
range_extractor_reg_exp_str = HEADER_PREFIX + one_or_more_whitespace +
ORIGINAL_INDICATOR + one_or_more_whitespace +
number_extractor +
RANGE_SEPARATOR + one_or_more_whitespace +
number_extractor +
one_or_more_whitespace +
'\\' + NEW_INDICATOR +
number_extractor +
RANGE_SEPARATOR + one_or_more_whitespace +
number_extractor +
one_or_more_whitespace +
HEADER_SUFFIX,
range_extractor = new RegExp(range_extractor_reg_exp_str),
ranges,
new_len,
first_char,
/*
*Trims string
*/
trim = function(text) {
if (typeof text != 'string') {
throw Error('String parameter required');
}
return text.replace(/(^\s*)|(\s*$)/g, '');
},
/*
*Verifies type of arguments
*/
verify_type = function(type) {
var args = arguments,
args_len = arguments.length,
basic_types = ['number', 'boolean', 'string', 'function', 'object', 'undefined'],
basic_type,
i, j,
type_of_type = typeof type;
if (type_of_type != 'string' &&
type_of_type != 'function') {
throw new Error('Bad type parameter');
}
if (args_len < 2) {
throw new Error('Too few arguments');
}
if (type_of_type == 'string') {
type = trim(type);
if (type == '') {
throw new Error('Bad type parameter');
}
for (j = 0; j < basic_types.length; j++) {
basic_type = basic_types[j];
if (basic_type == type) {
for (i = 1; i < args_len; i++) {
if (typeof args[i] != type) {
throw new Error('Bad type');
}
}
return;
}
}
throw new Error('Bad type parameter');
}
//Not basic type. we need to use instanceof operator
for (i = 1; i < args_len; i++) {
if (!(args[i] instanceof type)) {
throw new Error('Bad type');
}
}
},
/*
*Splits text into lines and return as a string array
*/
split_into_lines = function(text) {
verify_type('string', text);
if (text == '') {
return [];
}
return text.split('\n');
},
patch_hunk_header_validator = function( context_size,
ori_hunk_size, delection_size,
new_hunk_size, addition_size) {
if (ori_hunk_size != (context_size + deletion_size) ||
new_hunk_size != (context_size + addition_size)
) {
throw 'invalid patch header';
}
};
verify_type('string', new_text);
verify_type('string', unidiff_patch);
new_lines = split_into_lines(new_text);
new_len = new_lines.length;
patch_lines = split_into_lines(unidiff_patch);
for (i = 0; i < patch_lines.length; i++) {
patch_line = patch_lines[i];
if (patch_line === '') {//Skip empty line
continue;
}
if (patch_line.indexOf(HEADER_PREFIX) != -1) {//A diff hunk header found
//validate the previous patch header
patch_hunk_header_validator(context_size,
ori_hunk_size, deletion_size,
new_hunk_size, addition_size);
ranges = patch_line.match(range_extractor);
if (ranges === null) {
throw 'invalid patch header';
}
ori_hunk_start = ranges[1];
ori_hunk_size = ranges[2];
new_hunk_start = ranges[3];
new_hunk_size = ranges[4];
last_line_index = line_index;
line_index = Math.max( new_hunk_start - 1, 0);
if(new_len > 0){
//Validate line index
if (last_line_index > line_index || line_index > new_len) {
throw 'invalid patch header';
}
for (j = last_line_index; j < line_index; j++) {
ori_lines.push(new_lines[j]);
}
}
//validate original hunk start index
if (ori_hunk_start > 0 && ori_hunk_start != ori_lines.length + 1) {
throw 'invalid patch header';
}
//Reset sizes
context_size = 0;
deletion_size = 0;
addition_size = 0;
continue;
}
first_char = patch_line.charAt(0);
switch (first_char) {
case CONTEXT_INDICATOR:
ori_lines.push(new_lines[line_index]);
line_index++;
context_size++;
break;
case DELETION_INDICATOR:
//Put deleted line back
ori_lines.push(patch_line.substr(1));
deletion_size++;
break;
case ADDITION_INDICATOR:
line_index++;
addition_size++;
break;
default:
throw 'Unrecognized initial character in unidiff line';
}
}
//Validate the last patch header
patch_hunk_header_validator(context_size,
ori_hunk_size, deletion_size,
new_hunk_size, addition_size);
//Append the remaining lines
while (line_index < new_len) {
ori_lines.push(new_lines[line_index]);
line_index++;
}
return ori_lines.join(NEW_LINE);
}


Brett Zamir
11 Apr '10