From d3d2090ca5de67cc13e86f384d6282da138db5f4 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Tue, 23 Nov 2021 21:31:38 -0500 Subject: [PATCH] Changeset: Migrate from `smartOpAssembler()` to `canonicalizeOps()` --- CHANGELOG.md | 2 + src/node/db/Pad.js | 11 +- src/static/js/Changeset.js | 185 +++++++++--------- src/static/js/ace2_inner.js | 30 +-- src/static/js/contentcollector.js | 15 +- src/tests/frontend/easysync-helper.js | 18 +- .../frontend/specs/easysync-assembler.js | 14 +- .../frontend/specs/easysync-mutations.js | 51 +++-- 8 files changed, 162 insertions(+), 164 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4883cf4be..1c8d64813 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,8 @@ * `opAssembler()`: Deprecated in favor of the new `serializeOps()` function. * `mergingOpAssembler()`: Deprecated in favor of the new `squashOps()` generator function (combined with `serializeOps()`). + * `smartOpAssembler()`: Deprecated in favor of the new `canonicalizeOps()` + generator function (combined with `serializeOps()`). * `appendATextToAssembler()`: Deprecated in favor of the new `opsFromAText()` generator function. * `newOp()`: Deprecated in favor of the new `Op` class. diff --git a/src/node/db/Pad.js b/src/node/db/Pad.js index b7bbdb867..545c35e3c 100644 --- a/src/node/db/Pad.js +++ b/src/node/db/Pad.js @@ -494,21 +494,20 @@ Pad.prototype.copyPadWithoutHistory = async function (destinationID, force) { const oldAText = this.atext; - // based on Changeset.makeSplice - const assem = Changeset.smartOpAssembler(); - for (const op of Changeset.opsFromAText(oldAText)) assem.append(op); - assem.endDocument(); + let newLength; + const serializedOps = Changeset.serializeOps((function* () { + newLength = yield* Changeset.canonicalizeOps(Changeset.opsFromAText(oldAText), true); + })()); // although we have instantiated the newPad with '\n', an additional '\n' is // added internally, so the pad text on the revision 0 is "\n\n" const oldLength = 2; - const newLength = assem.getLengthChange(); const newText = oldAText.text; // create a changeset that removes the previous text and add the newText with // all atributes present on the source pad - const changeset = Changeset.pack(oldLength, newLength, assem.toString(), newText); + const changeset = Changeset.pack(oldLength, newLength, serializedOps, newText); newPad.appendRevision(changeset); await hooks.aCallAll('padCopy', {originalPad: this, destinationID}); diff --git a/src/static/js/Changeset.js b/src/static/js/Changeset.js index 5a46de603..6a35e2eaf 100644 --- a/src/static/js/Changeset.js +++ b/src/static/js/Changeset.js @@ -432,7 +432,7 @@ class MergingOpAssembler { * @returns {Generator} The done value indicates how much the sequence of operations * changes the length of the document (in characters). */ -const canonicalizeOps = function* (ops, finalize) { +exports.canonicalizeOps = function* (ops, finalize) { let minusOps = []; let plusOps = []; let keepOps = []; @@ -519,6 +519,8 @@ const opsFromText = function* (opcode, text, attribs = '', pool = null) { * - strips final "=" * - ignores 0-length changes * - reorders consecutive + and - (which MergingOpAssembler doesn't do) + * + * @deprecated Use `canonicalizeOps` with `serializeOps` instead. */ class SmartOpAssembler { constructor() { @@ -533,7 +535,7 @@ class SmartOpAssembler { _serialize(finalize) { this._serialized = exports.serializeOps((function* () { - this._lengthChange = yield* canonicalizeOps(this._ops, finalize); + this._lengthChange = yield* exports.canonicalizeOps(this._ops, finalize); }).call(this)); } @@ -586,54 +588,58 @@ exports.checkRep = (cs) => { const unpacked = exports.unpack(cs); const oldLen = unpacked.oldLen; const newLen = unpacked.newLen; - const ops = unpacked.ops; let charBank = unpacked.charBank; - const assem = new SmartOpAssembler(); let oldPos = 0; let calcNewLen = 0; - for (const o of exports.deserializeOps(ops)) { - switch (o.opcode) { - case '=': - oldPos += o.chars; - calcNewLen += o.chars; - break; - case '-': - oldPos += o.chars; - assert(oldPos <= oldLen, `${oldPos} > ${oldLen} in ${cs}`); - break; - case '+': - { - assert(charBank.length >= o.chars, 'Invalid changeset: not enough chars in charBank'); - const chars = charBank.slice(0, o.chars); - const nlines = (chars.match(/\n/g) || []).length; - assert(nlines === o.lines, - 'Invalid changeset: number of newlines in insert op does not match the charBank'); - assert(o.lines === 0 || chars.endsWith('\n'), - 'Invalid changeset: multiline insert op does not end with a newline'); - charBank = charBank.slice(o.chars); - calcNewLen += o.chars; - assert(calcNewLen <= newLen, `${calcNewLen} > ${newLen} in ${cs}`); - break; + const ops = (function* () { + for (const o of exports.deserializeOps(unpacked.ops)) { + switch (o.opcode) { + case '=': + oldPos += o.chars; + calcNewLen += o.chars; + break; + case '-': + oldPos += o.chars; + assert(oldPos <= oldLen, `${oldPos} > ${oldLen} in ${cs}`); + break; + case '+': { + assert(charBank.length >= o.chars, 'Invalid changeset: not enough chars in charBank'); + const chars = charBank.slice(0, o.chars); + const nlines = (chars.match(/\n/g) || []).length; + assert(nlines === o.lines, + 'Invalid changeset: number of newlines in insert op does not match the charBank'); + assert(o.lines === 0 || chars.endsWith('\n'), + 'Invalid changeset: multiline insert op does not end with a newline'); + charBank = charBank.slice(o.chars); + calcNewLen += o.chars; + assert(calcNewLen <= newLen, `${calcNewLen} > ${newLen} in ${cs}`); + break; + } + default: + assert(false, `Invalid changeset: Unknown opcode: ${JSON.stringify(o.opcode)}`); } - default: - assert(false, `Invalid changeset: Unknown opcode: ${JSON.stringify(o.opcode)}`); + yield o; } - assem.append(o); - } + })(); + const serializedOps = exports.serializeOps(exports.canonicalizeOps(ops, true)); calcNewLen += oldLen - oldPos; assert(calcNewLen === newLen, 'Invalid changeset: claimed length does not match actual length'); assert(charBank === '', 'Invalid changeset: excess characters in the charBank'); - assem.endDocument(); - const normalized = exports.pack(oldLen, calcNewLen, assem.toString(), unpacked.charBank); + const normalized = exports.pack(oldLen, calcNewLen, serializedOps, unpacked.charBank); assert(normalized === cs, 'Invalid changeset: not in canonical form'); return cs; }; /** + * @deprecated Use `canonicalizeOps` with `serializeOps` instead. * @returns {SmartOpAssembler} */ -exports.smartOpAssembler = () => new SmartOpAssembler(); +exports.smartOpAssembler = () => { + padutils.warnDeprecated( + 'Changeset.smartOpAssembler() is deprecated; use Changeset.canonicalizeOps() instead'); + return new SmartOpAssembler(); +}; /** * @deprecated Use `squashOps` with `serializeOps` instead. @@ -1082,22 +1088,22 @@ class TextLinesMutator { * @returns {string} the integrated changeset */ const applyZip = (in1, in2, func) => { - const ops1 = exports.deserializeOps(in1); - const ops2 = exports.deserializeOps(in2); - let next1 = ops1.next(); - let next2 = ops2.next(); - const assem = new SmartOpAssembler(); - while (!next1.done || !next2.done) { - if (!next1.done && !next1.value.opcode) next1 = ops1.next(); - if (!next2.done && !next2.value.opcode) next2 = ops2.next(); - if (next1.value == null) next1.value = new Op(); - if (next2.value == null) next2.value = new Op(); - if (!next1.value.opcode && !next2.value.opcode) break; - const opOut = func(next1.value, next2.value); - if (opOut && opOut.opcode) assem.append(opOut); - } - assem.endDocument(); - return assem.toString(); + const ops = (function* () { + const ops1 = exports.deserializeOps(in1); + const ops2 = exports.deserializeOps(in2); + let next1 = ops1.next(); + let next2 = ops2.next(); + while (!next1.done || !next2.done) { + if (!next1.done && !next1.value.opcode) next1 = ops1.next(); + if (!next2.done && !next2.value.opcode) next2 = ops2.next(); + if (next1.value == null) next1.value = new Op(); + if (next2.value == null) next2.value = new Op(); + if (!next1.value.opcode && !next2.value.opcode) break; + const opOut = func(next1.value, next2.value); + if (opOut && opOut.opcode) yield opOut; + } + })(); + return exports.serializeOps(exports.canonicalizeOps(ops, true)); }; /** @@ -1540,15 +1546,13 @@ exports.makeSplice = (orig, start, ndel, ins, attribs, pool) => { if (start > orig.length) start = orig.length; if (ndel > orig.length - start) ndel = orig.length - start; const deleted = orig.substring(start, start + ndel); - const assem = new SmartOpAssembler(); const ops = (function* () { yield* opsFromText('=', orig.substring(0, start)); yield* opsFromText('-', deleted); yield* opsFromText('+', ins, attribs, pool); })(); - for (const op of ops) assem.append(op); - assem.endDocument(); - return exports.pack(orig.length, orig.length + ins.length - ndel, assem.toString(), ins); + const serializedOps = exports.serializeOps(exports.canonicalizeOps(ops, true)); + return exports.pack(orig.length, orig.length + ins.length - ndel, serializedOps, ins); }; /** @@ -1670,11 +1674,8 @@ exports.moveOpsToNewPool = (cs, oldPool, newPool) => { * @param {string} text - text to insert * @returns {string} */ -exports.makeAttribution = (text) => { - const assem = new SmartOpAssembler(); - for (const op of opsFromText('+', text)) assem.append(op); - return assem.toString(); -}; +exports.makeAttribution = + (text) => exports.serializeOps(exports.canonicalizeOps(opsFromText('+', text), false)); /** * Iterates over attributes in exports, attribution string, or attribs property of an op and runs @@ -1928,8 +1929,7 @@ exports.attribsAttributeValue = (attribs, key, pool) => { * @returns {Builder} */ exports.builder = (oldLen) => { - const assem = new SmartOpAssembler(); - const o = new Op(); + const ops = []; const charBank = exports.stringAssembler(); const self = { @@ -1944,12 +1944,12 @@ exports.builder = (oldLen) => { * @returns {Builder} this */ keep: (N, L, attribs, pool) => { - o.opcode = '='; + const o = new Op('='); o.attribs = typeof attribs === 'string' ? attribs : new AttributeMap(pool).update(attribs || []).toString(); o.chars = N; o.lines = (L || 0); - assem.append(o); + ops.push(o); return self; }, @@ -1962,7 +1962,7 @@ exports.builder = (oldLen) => { * @returns {Builder} this */ keepText: (text, attribs, pool) => { - for (const op of opsFromText('=', text, attribs, pool)) assem.append(op); + ops.push(...opsFromText('=', text, attribs, pool)); return self; }, @@ -1975,7 +1975,7 @@ exports.builder = (oldLen) => { * @returns {Builder} this */ insert: (text, attribs, pool) => { - for (const op of opsFromText('+', text, attribs, pool)) assem.append(op); + ops.push(...opsFromText('+', text, attribs, pool)); charBank.append(text); return self; }, @@ -1987,18 +1987,22 @@ exports.builder = (oldLen) => { * @returns {Builder} this */ remove: (N, L) => { - o.opcode = '-'; + const o = new Op('-'); o.attribs = ''; o.chars = N; o.lines = (L || 0); - assem.append(o); + ops.push(o); return self; }, toString: () => { - assem.endDocument(); - const newLen = oldLen + assem.getLengthChange(); - return exports.pack(oldLen, newLen, assem.toString(), charBank.toString()); + /** @type {number} */ + let lengthChange; + const serializedOps = exports.serializeOps((function* () { + lengthChange = yield* exports.canonicalizeOps(ops, true); + })()); + const newLen = oldLen + lengthChange; + return exports.pack(oldLen, newLen, serializedOps, charBank.toString()); }, }; @@ -2033,11 +2037,11 @@ exports.makeAttribsString = (opcode, attribs, pool) => { exports.subattribution = (astr, start, optEnd) => { const attOps = exports.deserializeOps(astr); let attOpsNext = attOps.next(); - const assem = new SmartOpAssembler(); let attOp = new Op(); - const csOp = new Op(); + const csOp = new Op('-'); + csOp.chars = start; - const doCsOp = () => { + const doCsOp = function* () { if (!csOp.chars) return; while (csOp.opcode && (attOp.opcode || !attOpsNext.done)) { if (!attOp.opcode) { @@ -2049,30 +2053,25 @@ exports.subattribution = (astr, start, optEnd) => { csOp.lines++; } const opOut = slicerZipperFunc(attOp, csOp, null); - if (opOut.opcode) assem.append(opOut); + if (opOut.opcode) yield opOut; } }; - csOp.opcode = '-'; - csOp.chars = start; - - doCsOp(); - - if (optEnd === undefined) { - if (attOp.opcode) { - assem.append(attOp); + const ops = (function* () { + yield* doCsOp(); + if (optEnd === undefined) { + if (attOp.opcode) yield attOp; + while (!attOpsNext.done) { + yield attOpsNext.value; + attOpsNext = attOps.next(); + } + } else { + csOp.opcode = '='; + csOp.chars = optEnd - start; + yield* doCsOp(); } - while (!attOpsNext.done) { - assem.append(attOpsNext.value); - attOpsNext = attOps.next(); - } - } else { - csOp.opcode = '='; - csOp.chars = optEnd - start; - doCsOp(); - } - - return assem.toString(); + })(); + return exports.serializeOps(exports.canonicalizeOps(ops, false)); }; exports.inverse = (cs, lines, alines, pool) => { diff --git a/src/static/js/ace2_inner.js b/src/static/js/ace2_inner.js index 77912057e..8f1f1a083 100644 --- a/src/static/js/ace2_inner.js +++ b/src/static/js/ace2_inner.js @@ -522,18 +522,24 @@ function Ace2Inner(editorInfo, cssManagers) { const numLines = rep.lines.length(); const upToLastLine = rep.lines.offsetOfIndex(numLines - 1); const lastLineLength = rep.lines.atIndex(numLines - 1).text.length; - const assem = Changeset.smartOpAssembler(); - const o = new Changeset.Op('-'); - o.chars = upToLastLine; - o.lines = numLines - 1; - assem.append(o); - o.chars = lastLineLength; - o.lines = 0; - assem.append(o); - for (const op of Changeset.opsFromAText(atext)) assem.append(op); - const newLen = oldLen + assem.getLengthChange(); - const changeset = Changeset.checkRep( - Changeset.pack(oldLen, newLen, assem.toString(), atext.text.slice(0, -1))); + const ops = (function* () { + const op1 = new Changeset.Op('-'); + op1.chars = upToLastLine; + op1.lines = numLines - 1; + yield op1; + const op2 = new Changeset.Op('-'); + op2.chars = lastLineLength; + op2.lines = 0; + yield op2; + yield* Changeset.opsFromAText(atext); + })(); + let lengthChange; + const serializedOps = Changeset.serializeOps((function* () { + lengthChange = yield* Changeset.canonicalizeOps(ops, false); + })()); + const newLen = oldLen + lengthChange; + const changeset = + Changeset.checkRep(Changeset.pack(oldLen, newLen, serializedOps, atext.text.slice(0, -1))); performDocumentApplyChangeset(changeset); performSelectionChange( diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 7dd70e512..266400d7a 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -82,31 +82,30 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) const lines = (() => { const textArray = []; const attribsArray = []; - let attribsBuilder = null; - const op = new Changeset.Op('+'); + let ops = null; const self = { length: () => textArray.length, atColumnZero: () => textArray[textArray.length - 1] === '', startNew: () => { textArray.push(''); self.flush(true); - attribsBuilder = Changeset.smartOpAssembler(); + ops = []; }, textOfLine: (i) => textArray[i], appendText: (txt, attrString = '') => { textArray[textArray.length - 1] += txt; + const op = new Changeset.Op('+'); op.attribs = attrString; op.chars = txt.length; - attribsBuilder.append(op); + ops.push(op); }, textLines: () => textArray.slice(), attribLines: () => attribsArray, // call flush only when you're done flush: (withNewline) => { - if (attribsBuilder) { - attribsArray.push(attribsBuilder.toString()); - attribsBuilder = null; - } + if (ops == null) return; + attribsArray.push(Changeset.serializeOps(Changeset.canonicalizeOps(ops, false))); + ops = null; }, }; self.startNew(); diff --git a/src/tests/frontend/easysync-helper.js b/src/tests/frontend/easysync-helper.js index 6b4bfc959..d79366d5b 100644 --- a/src/tests/frontend/easysync-helper.js +++ b/src/tests/frontend/easysync-helper.js @@ -168,26 +168,22 @@ const randomTestChangeset = (origText, withAttribs) => { const charBank = Changeset.stringAssembler(); let textLeft = origText; // always keep final newline const outTextAssem = Changeset.stringAssembler(); - const opAssem = Changeset.smartOpAssembler(); + const ops = []; const oldLen = origText.length; - const nextOp = new Changeset.Op(); - const appendMultilineOp = (opcode, txt) => { - nextOp.opcode = opcode; - if (withAttribs) { - nextOp.attribs = randomTwoPropAttribs(opcode); - } + const attribs = withAttribs ? randomTwoPropAttribs(opcode) : ''; txt.replace(/\n|[^\n]+/g, (t) => { + const nextOp = new Changeset.Op(opcode); + nextOp.attribs = attribs; if (t === '\n') { nextOp.chars = 1; nextOp.lines = 1; - opAssem.append(nextOp); } else { nextOp.chars = t.length; nextOp.lines = 0; - opAssem.append(nextOp); } + ops.push(nextOp); return ''; }); }; @@ -214,8 +210,8 @@ const randomTestChangeset = (origText, withAttribs) => { while (textLeft.length > 1) doOp(); for (let i = 0; i < 5; i++) doOp(); // do some more (only insertions will happen) const outText = `${outTextAssem.toString()}\n`; - opAssem.endDocument(); - const cs = Changeset.pack(oldLen, outText.length, opAssem.toString(), charBank.toString()); + const serializedOps = Changeset.serializeOps(Changeset.canonicalizeOps(ops, true)); + const cs = Changeset.pack(oldLen, outText.length, serializedOps, charBank.toString()); Changeset.checkRep(cs); return [cs, outText]; }; diff --git a/src/tests/frontend/specs/easysync-assembler.js b/src/tests/frontend/specs/easysync-assembler.js index b035b64e4..fe15ee605 100644 --- a/src/tests/frontend/specs/easysync-assembler.js +++ b/src/tests/frontend/specs/easysync-assembler.js @@ -8,20 +8,18 @@ describe('easysync-assembler', function () { expect(Changeset.serializeOps(Changeset.deserializeOps(x))).to.equal(x); }); - it('smartOpAssembler', async function () { + it('canonicalizeOps', async function () { const x = '-c*3*4+6|3=az*asdf0*1*2*3+1=1-1+1*0+1=1-1+1|c=c-1'; - const assem = Changeset.smartOpAssembler(); - for (const op of Changeset.deserializeOps(x)) assem.append(op); - assem.endDocument(); - expect(assem.toString()).to.equal(x); + expect(Changeset.serializeOps(Changeset.canonicalizeOps(Changeset.deserializeOps(x), true))) + .to.equal(x); }); describe('append atext to assembler', function () { const testAppendATextToAssembler = (testId, atext, correctOps) => { it(`testAppendATextToAssembler#${testId}`, async function () { - const assem = Changeset.smartOpAssembler(); - for (const op of Changeset.opsFromAText(atext)) assem.append(op); - expect(assem.toString()).to.equal(correctOps); + const serializedOps = + Changeset.serializeOps(Changeset.canonicalizeOps(Changeset.opsFromAText(atext), false)); + expect(serializedOps).to.equal(correctOps); }); }; diff --git a/src/tests/frontend/specs/easysync-mutations.js b/src/tests/frontend/specs/easysync-mutations.js index 7cf43c8b7..5594213f8 100644 --- a/src/tests/frontend/specs/easysync-mutations.js +++ b/src/tests/frontend/specs/easysync-mutations.js @@ -15,37 +15,36 @@ describe('easysync-mutations', function () { }; const mutationsToChangeset = (oldLen, arrayOfArrays) => { - const assem = Changeset.smartOpAssembler(); - const op = new Changeset.Op(); const bank = Changeset.stringAssembler(); let oldPos = 0; let newLen = 0; - arrayOfArrays.forEach((a) => { - if (a[0] === 'skip') { - op.opcode = '='; - op.chars = a[1]; - op.lines = (a[2] || 0); - assem.append(op); - oldPos += op.chars; - newLen += op.chars; - } else if (a[0] === 'remove') { - op.opcode = '-'; - op.chars = a[1]; - op.lines = (a[2] || 0); - assem.append(op); - oldPos += op.chars; - } else if (a[0] === 'insert') { - op.opcode = '+'; - bank.append(a[1]); - op.chars = a[1].length; - op.lines = (a[2] || 0); - assem.append(op); - newLen += op.chars; + const ops = (function* () { + for (const a of arrayOfArrays) { + const op = new Changeset.Op(); + if (a[0] === 'skip') { + op.opcode = '='; + op.chars = a[1]; + op.lines = (a[2] || 0); + oldPos += op.chars; + newLen += op.chars; + } else if (a[0] === 'remove') { + op.opcode = '-'; + op.chars = a[1]; + op.lines = (a[2] || 0); + oldPos += op.chars; + } else if (a[0] === 'insert') { + op.opcode = '+'; + bank.append(a[1]); + op.chars = a[1].length; + op.lines = (a[2] || 0); + newLen += op.chars; + } + yield op; } - }); + })(); + const serializedOps = Changeset.serializeOps(Changeset.canonicalizeOps(ops, true)); newLen += oldLen - oldPos; - assem.endDocument(); - return Changeset.pack(oldLen, newLen, assem.toString(), bank.toString()); + return Changeset.pack(oldLen, newLen, serializedOps, bank.toString()); }; const runMutationTest = (testId, origLines, muts, correct) => {