1
0
mirror of synced 2025-11-06 04:30:40 +08:00

feat: new encoder (wip)

This commit is contained in:
Jamie Peabody
2023-08-27 11:16:37 +01:00
parent cd09815b7d
commit 8c2d68074c
12 changed files with 231 additions and 130 deletions

View File

@@ -93,7 +93,8 @@ Mergely will emit an `updated` event when the editor is first initialized, and e
|<a name="ignorews"></a>ignorews|boolean|`false`|Ignores white-space.|
|<a name="ignorecase"></a>ignorecase|boolean|`false`|Ignores case.|
|<a name="ignoreaccents"></a>ignoreaccents|boolean|`false`|Ignores accented characters.|
|<a name="lcs"></a>lcs|boolean|`true`|Enables/disables LCS computation for paragraphs (char-by-char changes). Disabling can give a performance gain for large documents.|
|<a name="inline"></a>inline|string|`chars`|The line-by-line (inline) type of diff. Valid values are: `none`, `chars`, `words`. When `none`, inline diff is disabled. When `chars` differientiation is done on a character-by-character basis. When `words` differentiation is done on a whitespace basis.|
|<a name="lcs"></a>lcs|boolean|`true`|:warning: **Deprecated**, use [`inline`](#inline). Enables/disables LCS computation for paragraphs (char-by-char changes). Disabling can give a performance gain for large documents.|
|<a name="lhs"></a>lhs|boolean,`function handler(setValue)`|`null`|Sets the value of the editor on the left-hand side.|
|<a name="license"></a>license|string|`lgpl`|The choice of license to use with Mergely. Valid values are: `lgpl`, `gpl`, `mpl` or `lgpl-separate-notice`, `gpl-separate-notice`, `mpl-separate-notice` (the license requirements are met in a separate notice file).|
|<a name="line_numbers"></a>line_numbers|boolean|`true`|Enables/disables line numbers. Enabling line numbers will toggle the visibility of the line number margins.|

View File

@@ -4,15 +4,9 @@ require('codemirror/addon/selection/mark-selection.js');
require('codemirror/lib/codemirror.css');
require('../src/mergely.css');
const lhs = `\
the quick red fox
jumped over the hairy dog
`;
const lhs = `hello`;
const rhs = `\
the quick brown fox
jumped over the lazy dog
`;
const rhs = `hello\ngoodbye`;
document.onreadystatechange = function () {
@@ -22,6 +16,7 @@ document.onreadystatechange = function () {
const mergely = new Mergely('#compare', {
license: 'lgpl',
inline: 'words',
lhs,
rhs
});

View File

@@ -304,7 +304,6 @@ dog
and the postman
`
}];
console.log(data.length);
for (let i = 0; i < data.length; ++i) {
const { lhs, rhs } = data[i];
const darkModeOptions = i === 11 ? {

View File

@@ -298,7 +298,6 @@
rhs.style = 'color:initial';
});
jQuery('#search-text').on('keypress', (ev) => {
console.log(ev.which)
if (event.which === 13) {
ev.preventDefault();
jQuery('#search').click();

View File

@@ -941,7 +941,6 @@ CodeMirrorDiffView.prototype._markupLineChanges = function (changes) {
for (let i = 0; i < changes.length; ++i) {
const change = changes[i];
const isCurrent = current_diff === i;
const lineDiff = this.settings.lcs !== false;
const lhsInView = this._isChangeInView('lhs', lhsvp, change);
const rhsInView = this._isChangeInView('rhs', rhsvp, change);
@@ -952,7 +951,7 @@ CodeMirrorDiffView.prototype._markupLineChanges = function (changes) {
vdoc.addRender('lhs', change, i, {
isCurrent,
lineDiff,
lineDiff: this.settings.inline !== 'none',
// TODO: move out of loop
getMergeHandler: (change, side, oside) => {
return () => this._merge_change(change, side, oside);
@@ -969,7 +968,7 @@ CodeMirrorDiffView.prototype._markupLineChanges = function (changes) {
vdoc.addRender('rhs', change, i, {
isCurrent,
lineDiff,
lineDiff: this.settings.inline !== 'none',
// TODO: move out of loop
getMergeHandler: (change, side, oside) => {
return () => this._merge_change(change, side, oside);
@@ -979,13 +978,14 @@ CodeMirrorDiffView.prototype._markupLineChanges = function (changes) {
});
}
if (lineDiff
if (this.settings.inline !== 'none'
&& (lhsInView || rhsInView)
&& change.op === 'c') {
vdoc.addInlineDiff(change, i, {
ignoreaccents: this.settings.ignoreaccents,
ignorews: this.settings.ignorews,
ignorecase: this.settings.ignorecase,
split: this.settings.inline,
getText: (side, lineNum) => {
if (side === 'lhs') {
const text = led.getLine(lineNum);

View File

@@ -1,43 +1,52 @@
const Encoder = require('./encoder.js');
const SMS_TIMEOUT_SECONDS = 1.0;
function diff(lhs, rhs, options = {}) {
function diff(lhs, rhs, opts) {
const {
ignorews = false,
ignoreaccents = false,
ignorecase = false,
split = 'lines'
} = options;
this.codeify = new CodeifyText(lhs, rhs, {
} = opts || {};
const options = {
ignorews,
ignoreaccents,
ignorecase,
split
});
const lhs_ctx = {
codes: this.codeify.getCodes('lhs'),
};
const encoder = new Encoder();
const lhsCodes = encoder.encode(lhs, options);
const rhsCodes = encoder.encode(rhs, options);
const lhsCtx = {
codes: lhsCodes.codes,
length: lhsCodes.length,
parts: lhsCodes.parts,
modified: {}
};
const rhs_ctx = {
codes: this.codeify.getCodes('rhs'),
const rhsCtx = {
codes: rhsCodes.codes,
length: rhsCodes.length,
parts: rhsCodes.parts,
modified: {}
};
const vector_d = [];
const vector_u = [];
this._lcs(lhs_ctx, 0, lhs_ctx.codes.length, rhs_ctx, 0, rhs_ctx.codes.length, vector_u, vector_d);
this._optimize(lhs_ctx);
this._optimize(rhs_ctx);
this.items = this._create_diffs(lhs_ctx, rhs_ctx);
this._lcs(lhsCtx, 0, lhsCodes.length, rhsCtx, 0, rhsCodes.length, vector_u, vector_d);
this._optimize(lhsCtx);
this._optimize(rhsCtx);
this.items = this._create_diffs(lhsCtx, rhsCtx, options);
this.sides = {
lhs: lhsCtx,
rhs: rhsCtx
};
};
diff.prototype.changes = function() {
return this.items;
};
diff.prototype.getLines = function(side) {
return this.codeify.getLines(side);
};
diff.prototype.normal_form = function() {
let nf = '';
for (let index = 0; index < this.items.length; ++index) {
@@ -57,8 +66,8 @@ diff.prototype.normal_form = function() {
else rhs_str = (item.rhs_start + 1) + ',' + (item.rhs_start + item.rhs_inserted_count);
nf += lhs_str + change + rhs_str + '\n';
const lhs_lines = this.getLines('lhs');
const rhs_lines = this.getLines('rhs');
const lhs_lines = this.sides.lhs.parts;
const rhs_lines = this.sides.rhs.parts;
if (rhs_lines && lhs_lines) {
let i;
// if rhs/lhs lines have been retained, output contextual diff
@@ -102,7 +111,7 @@ diff.prototype._lcs = function(lhs_ctx, lhs_lower, lhs_upper, rhs_ctx, rhs_lower
diff.prototype._sms = function(lhs_ctx, lhs_lower, lhs_upper, rhs_ctx, rhs_lower, rhs_upper, vector_u, vector_d) {
const timeout = Date.now() + SMS_TIMEOUT_SECONDS * 1000;
const max = lhs_ctx.codes.length + rhs_ctx.codes.length + 1;
const max = lhs_ctx.length + rhs_ctx.length + 1;
const kdown = lhs_lower - rhs_lower;
const kup = lhs_upper - rhs_upper;
const delta = (lhs_upper - lhs_lower) - (rhs_upper - rhs_lower);
@@ -115,12 +124,13 @@ diff.prototype._sms = function(lhs_ctx, lhs_lower, lhs_upper, rhs_ctx, rhs_lower
const ret = { x:0, y:0 }
let x;
let y;
let k;
for (let d = 0; d <= maxd; ++d) {
if (SMS_TIMEOUT_SECONDS && Date.now() > timeout) {
// bail if taking too long
return { x: lhs_lower, y: rhs_upper };
}
for (let k = kdown - d; k <= kdown + d; k += 2) {
for (k = kdown - d; k <= kdown + d; k += 2) {
if (k === kdown - d) {
x = vector_d[ offset_down + k + 1 ];//down
}
@@ -178,15 +188,15 @@ diff.prototype._sms = function(lhs_ctx, lhs_lower, lhs_upper, rhs_ctx, rhs_lower
diff.prototype._optimize = function(ctx) {
let start = 0;
let end = 0;
while (start < ctx.codes.length) {
while ((start < ctx.codes.length) && (ctx.modified[start] === undefined || ctx.modified[start] === false)) {
while (start < ctx.length) {
while ((start < ctx.length) && (ctx.modified[start] === undefined || ctx.modified[start] === false)) {
start++;
}
end = start;
while ((end < ctx.codes.length) && (ctx.modified[end] === true)) {
while ((end < ctx.length) && (ctx.modified[end] === true)) {
end++;
}
if ((end < ctx.codes.length) && (ctx.codes[start] === ctx.codes[end])) {
if ((end < ctx.length) && (ctx.codes[start] === ctx.codes[end])) {
ctx.modified[start] = false;
ctx.modified[end] = true;
}
@@ -196,16 +206,16 @@ diff.prototype._optimize = function(ctx) {
}
};
diff.prototype._create_diffs = function(lhs_ctx, rhs_ctx) {
diff.prototype._create_diffs = function(lhs_ctx, rhs_ctx, options) {
const items = [];
let lhs_start = 0;
let rhs_start = 0;
let lhs_line = 0;
let rhs_line = 0;
while (lhs_line < lhs_ctx.codes.length || rhs_line < rhs_ctx.codes.length) {
if ((lhs_line < lhs_ctx.codes.length) && (!lhs_ctx.modified[lhs_line])
&& (rhs_line < rhs_ctx.codes.length) && (!rhs_ctx.modified[rhs_line])) {
while (lhs_line < lhs_ctx.length || rhs_line < rhs_ctx.length) {
if ((lhs_line < lhs_ctx.length) && (!lhs_ctx.modified[lhs_line])
&& (rhs_line < rhs_ctx.length) && (!rhs_ctx.modified[rhs_line])) {
// equal lines
lhs_line++;
rhs_line++;
@@ -215,19 +225,50 @@ diff.prototype._create_diffs = function(lhs_ctx, rhs_ctx) {
lhs_start = lhs_line;
rhs_start = rhs_line;
while (lhs_line < lhs_ctx.codes.length && (rhs_line >= rhs_ctx.codes.length || lhs_ctx.modified[lhs_line]))
while (lhs_line < lhs_ctx.length && (rhs_line >= rhs_ctx.length || lhs_ctx.modified[lhs_line]))
lhs_line++;
while (rhs_line < rhs_ctx.codes.length && (lhs_line >= lhs_ctx.codes.length || rhs_ctx.modified[rhs_line]))
while (rhs_line < rhs_ctx.length && (lhs_line >= lhs_ctx.length || rhs_ctx.modified[rhs_line]))
rhs_line++;
if ((lhs_start < lhs_line) || (rhs_start < rhs_line)) {
// store a new difference-item
let deleted_count;
let inserted_count;
let lhs_start = lhs_line;
let rhs_start = rhs_line;
if (options.split === 'lines') {
lhs_start = lhs_line;
rhs_start = rhs_line;
deleted_count = lhs_line - lhs_start;
inserted_count = rhs_line - rhs_start;
} else {
const ditem_lhs_start = (lhs_start >= lhs_ctx.length)
? lhs_ctx.length
: lhs_ctx.parts[lhs_start].from;
const ditem_rhs_start = (rhs_start >= rhs_ctx.length)
? rhs_ctx.length
: rhs_ctx.parts[rhs_start].from;
const ditem_lhs_end = (lhs_line >= lhs_ctx.length)
? lhs_ctx.length
: lhs_ctx.parts[lhs_line].from;
const ditem_rhs_end = (rhs_line >= rhs_ctx.length)
? rhs_ctx.length
: rhs_ctx.parts[rhs_line];
// const delim_len = (options.split === 'words') ? 1 : 0;
// const ditemLhs
deleted_count = ditem_lhs_end - ditem_lhs_start;
inserted_count = ditem_rhs_end - ditem_rhs_start;
lhs_start = ditem_lhs_start;
rhs_start = ditem_rhs_start;
}
items.push({
lhs_start: lhs_start,
rhs_start: rhs_start,
lhs_deleted_count: lhs_line - lhs_start,
rhs_inserted_count: rhs_line - rhs_start
lhs_deleted_count: deleted_count,
rhs_inserted_count: inserted_count
});
}
}
@@ -235,77 +276,4 @@ diff.prototype._create_diffs = function(lhs_ctx, rhs_ctx) {
return items;
};
function CodeifyText(lhs, rhs, options) {
this._max_code = 0;
this._diff_codes = {};
this.ctxs = {};
this.options = options;
this.options.split = this.options.split || 'lines';
if (typeof lhs === 'string') {
if (this.options.split === 'chars') {
this.lhs = lhs.split('');
} else if (this.options.split === 'words') {
this.lhs = lhs.split(/\s/);
} else if (this.options.split === 'lines') {
this.lhs = lhs.split('\n');
}
} else {
this.lhs = lhs;
}
if (typeof rhs === 'string') {
if (this.options.split === 'chars') {
this.rhs = rhs.split('');
} else if (this.options.split === 'words') {
this.rhs = rhs.split(/\s/);
} else if (this.options.split === 'lines') {
this.rhs = rhs.split('\n');
}
} else {
this.rhs = rhs;
}
};
CodeifyText.prototype.getCodes = function(side) {
if (!this.ctxs.hasOwnProperty(side)) {
var ctx = this._diff_ctx(this[side]);
this.ctxs[side] = ctx;
ctx.codes.length = Object.keys(ctx.codes).length;
}
return this.ctxs[side].codes;
}
CodeifyText.prototype.getLines = function(side) {
return this.ctxs[side].lines;
}
CodeifyText.prototype._diff_ctx = function(lines) {
var ctx = {i: 0, codes: {}, lines: lines};
this._codeify(lines, ctx);
return ctx;
}
CodeifyText.prototype._codeify = function(lines, ctx) {
for (let i = 0; i < lines.length; ++i) {
let line = lines[i];
if (this.options.ignorews) {
line = line.replace(/\s+/g, '');
}
if (this.options.ignorecase) {
line = line.toLowerCase();
}
if (this.options.ignoreaccents) {
line = line.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
}
const aCode = this._diff_codes[line];
if (aCode !== undefined) {
ctx.codes[i] = aCode;
} else {
++this._max_code;
this._diff_codes[line] = this._max_code;
ctx.codes[i] = this._max_code;
}
}
}
module.exports = diff;

79
src/encoder.js Normal file
View File

@@ -0,0 +1,79 @@
class Encoder {
constructor() {
this._maxCode = 0;
this._codes = {};
}
encode(text, options) {
let exp;
let fudge = 0;
if (options.split === 'chars') {
exp = /./g;
fudge = 1;
} else if (options.split === 'words') {
exp = /\s+/g;
} else {
exp = /\n/g;
}
let match;
let p0 = -1;
const parts = [];
while ((match = exp.exec(text)) !== null) {
const from = (options.split === 'lines') ? parts.length : p0 + 1;
const to = (options.split === 'lines') ? parts.length + 1 : match.index + fudge;
const item = {
from,
to,
text: text.substr(p0 + 1, match.index - p0 - 1 + fudge)
};
parts.push(item);
p0 = match.index;
}
const from = (options.split === 'lines') ? parts.length : p0 + 1;
const to = (options.split === 'lines') ? parts.length + 1 : text.length;
parts.push({
from,
to,
text: text.substr(p0 + 1)
});
const hash = this._hash(parts, options)
return {
codes: hash.codes,
parts: hash.parts,
length: Object.keys(hash.codes).length
};
}
_hash(parts, options) {
const codes = {};
let i = 0;
for (const part of parts) {
let text = part.text;
if (options.ignorews) {
text = text.replace(/\s+/g, '');
}
if (options.ignorecase) {
text = text.toLowerCase();
}
if (options.ignoreaccents) {
text = text.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
}
const code = this._codes[text];
if (code !== undefined) {
codes[i] = code;
} else {
++this._maxCode;
this._codes[text] = this._maxCode;
codes[i] = this._maxCode;
}
i += 1;
}
return {
codes,
parts
};
}
}
module.exports = Encoder;

View File

@@ -12,6 +12,7 @@ const defaultOptions = {
wrap_lines: false,
line_numbers: true,
lcs: true,
inline: 'chars',
sidebar: true,
viewport: false,
ignorews: false,
@@ -119,13 +120,15 @@ class Mergely {
const colors = dom.getColors(this.el);
this._options = {
...defaultOptions,//lgpl
...{
// default inline based off `lcs`
inline: options && options.lcs === false ? 'none' : 'chars'
},
...this._initOptions,
...options//lgpl-separate-notice
...options,//lgpl-separate-notice
};
this._viewOptions = {
...defaultOptions,
...this._initOptions,
...options,
...this._options,
_colors: colors
};
}

View File

@@ -124,7 +124,7 @@ class VDoc {
this._setRenderedChange(side, changeId);
}
addInlineDiff(change, changeId, { getText, ignorews, ignoreaccents, ignorecase }) {
addInlineDiff(change, changeId, { getText, ignorews, ignoreaccents, ignorecase, split = 'chars' }) {
if (this.options._debug) {
trace('vdoc#addInlineDiff', changeId, change);
}
@@ -152,7 +152,7 @@ class VDoc {
ignoreaccents,
ignorews,
ignorecase,
split: 'chars'
split
});
for (const change of results.changes()) {
const {

16
test/diff.spec.js Normal file
View File

@@ -0,0 +1,16 @@
const diff = require('../src/diff');
describe('diff', () => {
it('should insert one line when lhs is empty and rhs has no line ending', () => {
const _diff = new diff('', 'hello', { split: 'lines' });
const changes = _diff.changes();
console.log(changes);
// with lhs_start at 1, the insert is at the end
expect(changes).to.deep.equal([{
lhs_start: 1,
rhs_start: 1,
lhs_deleted_count: 0,
rhs_inserted_count: 0
}]);
});
});

View File

@@ -189,7 +189,6 @@ describe('markup', () => {
expect(editor.querySelectorAll('.mergely.rhs')).to.have.length(0);
}
},
{
name: 'Changed lines (lhs)',
lhs: 'the quick red fox\njumped over the hairy dog',
@@ -233,6 +232,46 @@ describe('markup', () => {
expect(rhs_spans[1].innerText).to.equal('h');
expect(rhs_spans[2].innerText).to.equal('ir');
}
},
{
name: 'Changed lines with inline words (lhs)',
lhs: 'the quick red fox\njumped over the hairy dog',
rhs: 'the quick brown fox\njumped over the lazy dog',
options: { inline: 'words' },
check: (editor) => {
expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1);
const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0');
expect(lhs_spans).to.have.length(2);
expect(lhs_spans[0].innerText).to.equal('red');
expect(lhs_spans[1].innerText).to.equal('hairy');
const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0');
expect(rhs_spans).to.have.length(2);
expect(rhs_spans[0].innerText).to.equal('brown');
expect(rhs_spans[1].innerText).to.equal('lazy');
}
},
{
name: 'Changed lines (rhs)',
lhs: 'the quick brown fox\njumped over the lazy dog',
rhs: 'the quick red fox\njumped over the hairy dog',
options: { inline: 'words' },
check: (editor) => {
expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1);
const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0');
expect(lhs_spans).to.have.length(2);
expect(lhs_spans[0].innerText).to.equal('brown');
expect(lhs_spans[1].innerText).to.equal('lazy');
const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0');
expect(rhs_spans).to.have.length(2);
expect(rhs_spans[0].innerText).to.equal('red');
expect(rhs_spans[1].innerText).to.equal('hairy');
}
}
];
@@ -246,8 +285,9 @@ describe('markup', () => {
license: 'lgpl-separate-notice',
change_timeout: 0,
_debug: debug,
lhs: (setValue) => setValue(opt.lhs),
rhs: (setValue) => setValue(opt.rhs)
lhs: opt.lhs,
rhs: opt.rhs,
...opt.options
});
const test = () => {
try {

View File

@@ -8,6 +8,7 @@ const defaultOptions = {
rhs_margin: 'right',
wrap_lines: false,
line_numbers: true,
inline: 'chars',
lcs: true,
sidebar: true,
viewport: false,