1
0
mirror of synced 2025-12-12 09:23:21 +08:00
Files
Mergely/src/diff.js
2024-07-31 19:44:47 +01:00

317 lines
9.2 KiB
JavaScript

const SMS_TIMEOUT_SECONDS = 1.0;
function diff(lhs, rhs, options = {}) {
const {
ignorews = false,
ignoreaccents = false,
ignorecase = false,
split = 'lines'
} = options;
this.codeify = new CodeifyText(lhs, rhs, {
ignorews,
ignoreaccents,
ignorecase,
split
});
const lhs_ctx = {
codes: this.codeify.getCodes('lhs'),
modified: {}
};
const rhs_ctx = {
codes: this.codeify.getCodes('rhs'),
modified: {}
};
const vector_d = [];
const vector_u = [];
this._lcs(lhs_ctx, 0, lhs_ctx.codes.length, rhs_ctx, 0, rhs_ctx.codes.length, vector_u, vector_d);
this._optimize(lhs_ctx);
this._optimize(rhs_ctx);
this.items = this._create_diffs(lhs_ctx, rhs_ctx);
};
diff.prototype.changes = function() {
return this.items;
};
diff.prototype.getLines = function(side) {
return this.codeify.getLines(side);
};
diff.prototype.normal_form = function() {
let nf = '';
for (let index = 0; index < this.items.length; ++index) {
const item = this.items[index];
let lhs_str = '';
let rhs_str = '';
let change = 'c';
if (item.lhs_deleted_count === 0 && item.rhs_inserted_count > 0) change = 'a';
else if (item.lhs_deleted_count > 0 && item.rhs_inserted_count === 0) change = 'd';
if (item.lhs_deleted_count === 1) lhs_str = item.lhs_start + 1;
else if (item.lhs_deleted_count === 0) lhs_str = item.lhs_start;
else lhs_str = (item.lhs_start + 1) + ',' + (item.lhs_start + item.lhs_deleted_count);
if (item.rhs_inserted_count === 1) rhs_str = item.rhs_start + 1;
else if (item.rhs_inserted_count === 0) rhs_str = item.rhs_start;
else rhs_str = (item.rhs_start + 1) + ',' + (item.rhs_start + item.rhs_inserted_count);
nf += lhs_str + change + rhs_str + '\n';
const lhs_lines = this.getLines('lhs');
const rhs_lines = this.getLines('rhs');
if (rhs_lines && lhs_lines) {
let i;
// if rhs/lhs lines have been retained, output contextual diff
for (i = item.lhs_start; i < item.lhs_start + item.lhs_deleted_count; ++i) {
nf += '< ' + lhs_lines[i] + '\n';
}
if (item.rhs_inserted_count && item.lhs_deleted_count) nf += '---\n';
for (i = item.rhs_start; i < item.rhs_start + item.rhs_inserted_count; ++i) {
nf += '> ' + rhs_lines[i] + '\n';
}
}
}
return nf;
};
diff.prototype._lcs = function(lhs_ctx, lhs_lower, lhs_upper, rhs_ctx, rhs_lower, rhs_upper, vector_u, vector_d) {
while ( (lhs_lower < lhs_upper) && (rhs_lower < rhs_upper) && (lhs_ctx.codes[lhs_lower] === rhs_ctx.codes[rhs_lower]) ) {
++lhs_lower;
++rhs_lower;
}
while ( (lhs_lower < lhs_upper) && (rhs_lower < rhs_upper) && (lhs_ctx.codes[lhs_upper - 1] === rhs_ctx.codes[rhs_upper - 1]) ) {
--lhs_upper;
--rhs_upper;
}
if (lhs_lower === lhs_upper) {
while (rhs_lower < rhs_upper) {
rhs_ctx.modified[ rhs_lower++ ] = true;
}
}
else if (rhs_lower === rhs_upper) {
while (lhs_lower < lhs_upper) {
lhs_ctx.modified[ lhs_lower++ ] = true;
}
}
else {
const sms = this._sms(lhs_ctx, lhs_lower, lhs_upper, rhs_ctx, rhs_lower, rhs_upper, vector_u, vector_d);
this._lcs(lhs_ctx, lhs_lower, sms.x, rhs_ctx, rhs_lower, sms.y, vector_u, vector_d);
this._lcs(lhs_ctx, sms.x, lhs_upper, rhs_ctx, sms.y, rhs_upper, vector_u, vector_d);
}
};
diff.prototype._sms = function(lhs_ctx, lhs_lower, lhs_upper, rhs_ctx, rhs_lower, rhs_upper, vector_u, vector_d) {
const timeout = Date.now() + SMS_TIMEOUT_SECONDS * 1000;
const max = lhs_ctx.codes.length + rhs_ctx.codes.length + 1;
const kdown = lhs_lower - rhs_lower;
const kup = lhs_upper - rhs_upper;
const delta = (lhs_upper - lhs_lower) - (rhs_upper - rhs_lower);
const odd = (delta & 1) != 0;
const offset_down = max - kdown;
const offset_up = max - kup;
const maxd = ((lhs_upper - lhs_lower + rhs_upper - rhs_lower) / 2) + 1;
vector_d[ offset_down + kdown + 1 ] = lhs_lower;
vector_u[ offset_up + kup - 1 ] = lhs_upper;
const ret = { x:0, y:0 }
let x;
let y;
for (let d = 0; d <= maxd; ++d) {
if (SMS_TIMEOUT_SECONDS && Date.now() > timeout) {
// bail if taking too long
return { x: lhs_lower, y: rhs_upper };
}
for (let k = kdown - d; k <= kdown + d; k += 2) {
if (k === kdown - d) {
x = vector_d[ offset_down + k + 1 ];//down
}
else {
x = vector_d[ offset_down + k - 1 ] + 1;//right
if ((k < (kdown + d)) && (vector_d[ offset_down + k + 1 ] >= x)) {
x = vector_d[ offset_down + k + 1 ];//down
}
}
y = x - k;
// find the end of the furthest reaching forward D-path in diagonal k.
while ((x < lhs_upper) && (y < rhs_upper) && (lhs_ctx.codes[x] === rhs_ctx.codes[y])) {
x++; y++;
}
vector_d[ offset_down + k ] = x;
// overlap ?
if (odd && (kup - d < k) && (k < kup + d)) {
if (vector_u[offset_up + k] <= vector_d[offset_down + k]) {
ret.x = vector_d[offset_down + k];
ret.y = vector_d[offset_down + k] - k;
return (ret);
}
}
}
// Extend the reverse path.
for (let k = kup - d; k <= kup + d; k += 2) {
// find the only or better starting point
if (k === kup + d) {
x = vector_u[offset_up + k - 1]; // up
} else {
x = vector_u[offset_up + k + 1] - 1; // left
if ((k > kup - d) && (vector_u[offset_up + k - 1] < x))
x = vector_u[offset_up + k - 1]; // up
}
y = x - k;
while ((x > lhs_lower) && (y > rhs_lower) && (lhs_ctx.codes[x - 1] === rhs_ctx.codes[y - 1])) {
// diagonal
x--;
y--;
}
vector_u[offset_up + k] = x;
// overlap ?
if (!odd && (kdown - d <= k) && (k <= kdown + d)) {
if (vector_u[offset_up + k] <= vector_d[offset_down + k]) {
ret.x = vector_d[offset_down + k];
ret.y = vector_d[offset_down + k] - k;
return (ret);
}
}
}
}
throw "the algorithm should never come here.";
};
diff.prototype._optimize = function(ctx) {
let start = 0;
let end = 0;
while (start < ctx.codes.length) {
while ((start < ctx.codes.length) && (ctx.modified[start] === undefined || ctx.modified[start] === false)) {
start++;
}
end = start;
while ((end < ctx.codes.length) && (ctx.modified[end] === true)) {
end++;
}
if ((end < ctx.codes.length) && (ctx.codes[start] === ctx.codes[end])) {
ctx.modified[start] = false;
ctx.modified[end] = true;
}
else {
start = end;
}
}
};
diff.prototype._create_diffs = function(lhs_ctx, rhs_ctx) {
const items = [];
let lhs_start = 0;
let rhs_start = 0;
let lhs_line = 0;
let rhs_line = 0;
while (lhs_line < lhs_ctx.codes.length || rhs_line < rhs_ctx.codes.length) {
if ((lhs_line < lhs_ctx.codes.length) && (!lhs_ctx.modified[lhs_line])
&& (rhs_line < rhs_ctx.codes.length) && (!rhs_ctx.modified[rhs_line])) {
// equal lines
lhs_line++;
rhs_line++;
}
else {
// maybe deleted and/or inserted lines
lhs_start = lhs_line;
rhs_start = rhs_line;
while (lhs_line < lhs_ctx.codes.length && (rhs_line >= rhs_ctx.codes.length || lhs_ctx.modified[lhs_line]))
lhs_line++;
while (rhs_line < rhs_ctx.codes.length && (lhs_line >= lhs_ctx.codes.length || rhs_ctx.modified[rhs_line]))
rhs_line++;
if ((lhs_start < lhs_line) || (rhs_start < rhs_line)) {
// store a new difference-item
items.push({
lhs_start: lhs_start,
rhs_start: rhs_start,
lhs_deleted_count: lhs_line - lhs_start,
rhs_inserted_count: rhs_line - rhs_start
});
}
}
}
return items;
};
function CodeifyText(lhs, rhs, options) {
this._max_code = 0;
this._diff_codes = {};
this.ctxs = {};
this.options = options;
this.options.split = this.options.split || 'lines';
const exp = /\p{Letter}\p{Mark}*|\p{Number}\p{Mark}*|\p{Punctuation}\p{Mark}*|\p{Symbol}\p{Mark}*|\p{White_Space}/gu;
if (typeof lhs === 'string') {
if (this.options.split === 'chars') {
// split characters and include their diacritical marks
this.lhs = lhs.match(exp) || [];
// this.lhs = [...lhs];
} else if (this.options.split === 'words') {
this.lhs = lhs.split(/\s/);
} else if (this.options.split === 'lines') {
this.lhs = lhs.split('\n');
}
} else {
this.lhs = lhs;
}
if (typeof rhs === 'string') {
if (this.options.split === 'chars') {
// split characters and include their diacritical marks
this.rhs = rhs.match(exp) || [];
// this.rhs = [...rhs];
} else if (this.options.split === 'words') {
this.rhs = rhs.split(/\s/);
} else if (this.options.split === 'lines') {
this.rhs = rhs.split('\n');
}
} else {
this.rhs = rhs;
}
};
CodeifyText.prototype.getCodes = function(side) {
if (!this.ctxs.hasOwnProperty(side)) {
var ctx = this._diff_ctx(this[side]);
this.ctxs[side] = ctx;
ctx.codes.length = Object.keys(ctx.codes).length;
}
return this.ctxs[side].codes;
}
CodeifyText.prototype.getLines = function(side) {
return this.ctxs[side].lines;
}
CodeifyText.prototype._diff_ctx = function(lines) {
var ctx = {i: 0, codes: {}, lines: lines};
this._codeify(lines, ctx);
return ctx;
}
CodeifyText.prototype._codeify = function(lines, ctx) {
for (let i = 0; i < lines.length; ++i) {
let line = lines[i];
if (this.options.ignorews) {
line = line.replace(/\s+/g, '');
}
if (this.options.ignorecase) {
line = line.toLowerCase();
}
if (this.options.ignoreaccents) {
line = line.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
}
const aCode = this._diff_codes[line];
if (aCode !== undefined) {
ctx.codes[i] = aCode;
} else {
++this._max_code;
this._diff_codes[line] = this._max_code;
ctx.codes[i] = this._max_code;
}
}
}
module.exports = diff;