pad.libre-service.eu-etherpad/src/node/utils/padDiff.js

568 lines
16 KiB
JavaScript
Raw Normal View History

2013-01-23 00:16:49 +01:00
var Changeset = require("../../static/js/Changeset");
var async = require("async");
var exportHtml = require('./ExportHtml');
2019-01-22 18:30:33 +01:00
const thenify = require("thenify").withCallback;
function PadDiff (pad, fromRev, toRev) {
// check parameters
if (!pad || !pad.id || !pad.atext || !pad.pool) {
2013-01-23 00:16:49 +01:00
throw new Error('Invalid pad');
}
2013-01-23 00:16:49 +01:00
var range = pad.getValidRevisionRange(fromRev, toRev);
if (!range) {
throw new Error('Invalid revision range.' +
2013-01-23 00:16:49 +01:00
' startRev: ' + fromRev +
' endRev: ' + toRev);
}
2013-01-23 00:16:49 +01:00
this._pad = pad;
this._fromRev = range.startRev;
this._toRev = range.endRev;
this._html = null;
this._authors = [];
}
PadDiff.prototype._isClearAuthorship = function(changeset) {
// unpack
2013-01-23 00:16:49 +01:00
var unpacked = Changeset.unpack(changeset);
// check if there is nothing in the charBank
if (unpacked.charBank !== "") {
2013-01-23 00:16:49 +01:00
return false;
}
// check if oldLength == newLength
if (unpacked.oldLen !== unpacked.newLen) {
2013-01-23 00:16:49 +01:00
return false;
}
// lets iterator over the operators
2013-01-23 00:16:49 +01:00
var iterator = Changeset.opIterator(unpacked.ops);
// get the first operator, this should be a clear operator
2013-01-23 00:16:49 +01:00
var clearOperator = iterator.next();
// check if there is only one operator
if (iterator.hasNext() === true) {
2013-01-23 00:16:49 +01:00
return false;
}
// check if this operator doesn't change text
if (clearOperator.opcode !== "=") {
2013-01-23 00:16:49 +01:00
return false;
}
// check that this operator applys to the complete text
// if the text ends with a new line, its exactly one character less, else it has the same length
if (clearOperator.chars !== unpacked.oldLen-1 && clearOperator.chars !== unpacked.oldLen) {
2013-01-23 00:16:49 +01:00
return false;
}
2013-01-23 00:16:49 +01:00
var attributes = [];
Changeset.eachAttribNumber(changeset, function(attrNum) {
2013-01-23 00:16:49 +01:00
attributes.push(attrNum);
});
// check that this changeset uses only one attribute
if (attributes.length !== 1) {
2013-01-23 00:16:49 +01:00
return false;
}
2013-01-23 00:16:49 +01:00
var appliedAttribute = this._pad.pool.getAttrib(attributes[0]);
// check if the applied attribute is an anonymous author attribute
if (appliedAttribute[0] !== "author" || appliedAttribute[1] !== "") {
2013-01-23 00:16:49 +01:00
return false;
}
2013-01-23 00:16:49 +01:00
return true;
};
2019-01-22 18:30:33 +01:00
PadDiff.prototype._createClearAuthorship = thenify(function(rev, callback) {
2013-01-23 00:16:49 +01:00
var self = this;
this._pad.getInternalRevisionAText(rev, function(err, atext) {
if (err) {
return callback(err);
}
// build clearAuthorship changeset
2013-01-23 00:16:49 +01:00
var builder = Changeset.builder(atext.text.length);
builder.keepText(atext.text, [['author','']], self._pad.pool);
var changeset = builder.toString();
2013-01-23 00:16:49 +01:00
callback(null, changeset);
});
2019-01-22 18:30:33 +01:00
});
2019-01-22 18:30:33 +01:00
PadDiff.prototype._createClearStartAtext = thenify(function(rev, callback) {
2013-01-23 00:16:49 +01:00
var self = this;
// get the atext of this revision
this._pad.getInternalRevisionAText(rev, function(err, atext) {
if (err) {
return callback(err);
}
// create the clearAuthorship changeset
self._createClearAuthorship(rev, function(err, changeset) {
if (err) {
return callback(err);
}
try {
// apply the clearAuthorship changeset
var newAText = Changeset.applyToAText(changeset, atext, self._pad.pool);
} catch(err) {
return callback(err)
}
callback(null, newAText);
});
2013-01-23 00:16:49 +01:00
});
2019-01-22 18:30:33 +01:00
});
2019-01-22 18:30:33 +01:00
PadDiff.prototype._getChangesetsInBulk = thenify(function(startRev, count, callback) {
2013-01-23 00:16:49 +01:00
var self = this;
// find out which revisions we need
2013-01-23 00:16:49 +01:00
var revisions = [];
for (var i = startRev; i < (startRev + count) && i <= this._pad.head; i++) {
2013-01-23 00:16:49 +01:00
revisions.push(i);
}
2013-01-23 00:16:49 +01:00
var changesets = [], authors = [];
// get all needed revisions
async.forEach(revisions, function(rev, callback) {
self._pad.getRevision(rev, function(err, revision) {
if (err) {
return callback(err);
2013-01-23 00:16:49 +01:00
}
2013-01-23 00:16:49 +01:00
var arrayNum = rev-startRev;
2013-01-23 00:16:49 +01:00
changesets[arrayNum] = revision.changeset;
authors[arrayNum] = revision.meta.author;
2013-01-23 00:16:49 +01:00
callback();
});
},
function(err) {
2013-01-23 00:16:49 +01:00
callback(err, changesets, authors);
});
2019-01-22 18:30:33 +01:00
});
2013-01-23 00:16:49 +01:00
PadDiff.prototype._addAuthors = function(authors) {
var self = this;
// add to array if not in the array
authors.forEach(function(author) {
if (self._authors.indexOf(author) == -1) {
2013-01-23 00:16:49 +01:00
self._authors.push(author);
}
2013-01-23 00:16:49 +01:00
});
};
2019-01-22 18:30:33 +01:00
PadDiff.prototype._createDiffAtext = thenify(function(callback) {
2013-01-23 00:16:49 +01:00
var self = this;
var bulkSize = 100;
// get the cleaned startAText
self._createClearStartAtext(self._fromRev, function(err, atext) {
if (err) { return callback(err); }
2013-01-23 00:16:49 +01:00
var superChangeset = null;
2013-01-23 00:16:49 +01:00
var rev = self._fromRev + 1;
// async while loop
2013-01-23 00:16:49 +01:00
async.whilst(
// loop condition
2013-01-23 00:16:49 +01:00
function () { return rev <= self._toRev; },
// loop body
2013-01-23 00:16:49 +01:00
function (callback) {
// get the bulk
self._getChangesetsInBulk(rev,bulkSize,function(err, changesets, authors) {
2013-01-23 00:16:49 +01:00
var addedAuthors = [];
// run trough all changesets
for (var i = 0; i < changesets.length && (rev + i) <= self._toRev; i++) {
2013-01-23 00:16:49 +01:00
var changeset = changesets[i];
// skip clearAuthorship Changesets
if (self._isClearAuthorship(changeset)) {
2013-01-23 00:16:49 +01:00
continue;
}
2013-01-23 00:16:49 +01:00
changeset = self._extendChangesetWithAuthor(changeset, authors[i], self._pad.pool);
// add this author to the authorarray
2013-01-23 00:16:49 +01:00
addedAuthors.push(authors[i]);
// compose it with the superChangset
if (superChangeset === null) {
2013-01-23 00:16:49 +01:00
superChangeset = changeset;
} else {
2013-01-23 00:16:49 +01:00
superChangeset = Changeset.composeWithDeletions(superChangeset, changeset, self._pad.pool);
}
}
// add the authors to the PadDiff authorArray
2013-01-23 00:16:49 +01:00
self._addAuthors(addedAuthors);
// lets continue with the next bulk
2013-01-23 00:16:49 +01:00
rev += bulkSize;
callback();
});
},
// after the loop has ended
2013-01-23 00:16:49 +01:00
function (err) {
// if there are only clearAuthorship changesets, we don't get a superChangeset, so we can skip this step
if (superChangeset) {
2013-01-23 00:16:49 +01:00
var deletionChangeset = self._createDeletionChangeset(superChangeset,atext,self._pad.pool);
try {
// apply the superChangeset, which includes all addings
atext = Changeset.applyToAText(superChangeset, atext, self._pad.pool);
// apply the deletionChangeset, which adds a deletions
atext = Changeset.applyToAText(deletionChangeset, atext, self._pad.pool);
} catch(err) {
return callback(err)
}
}
2013-01-23 00:16:49 +01:00
callback(err, atext);
}
);
});
2019-01-22 18:30:33 +01:00
});
2019-01-22 18:30:33 +01:00
PadDiff.prototype.getHtml = thenify(function(callback) {
// cache the html
if (this._html != null) {
2013-01-23 00:16:49 +01:00
return callback(null, this._html);
}
2013-01-23 00:16:49 +01:00
var self = this;
var atext, html, authorColors;
2013-01-23 00:16:49 +01:00
async.series([
// get the diff atext
function(callback) {
self._createDiffAtext(function(err, _atext) {
if (err) {
2013-01-23 00:16:49 +01:00
return callback(err);
}
2013-01-23 00:16:49 +01:00
atext = _atext;
callback();
});
},
// get the authorColor table
function(callback) {
self._pad.getAllAuthorColors(function(err, _authorColors) {
if (err) {
2013-01-23 00:16:49 +01:00
return callback(err);
}
2013-01-23 00:16:49 +01:00
authorColors = _authorColors;
2013-01-27 17:45:09 +01:00
callback();
2013-01-23 00:16:49 +01:00
});
},
// convert the atext to html
function(callback) {
2013-01-23 00:16:49 +01:00
html = exportHtml.getHTMLFromAtext(self._pad, atext, authorColors);
self._html = html;
callback();
}
],
function(err) {
2013-01-23 00:16:49 +01:00
callback(err, html);
});
2019-01-22 18:30:33 +01:00
});
2019-01-22 18:30:33 +01:00
PadDiff.prototype.getAuthors = thenify(function(callback) {
2013-01-23 00:16:49 +01:00
var self = this;
// check if html was already produced, if not produce it, this generates the author array at the same time
if (self._html == null) {
self.getHtml(function(err) {
if (err) {
2013-01-23 00:16:49 +01:00
return callback(err);
}
2013-01-23 00:16:49 +01:00
callback(null, self._authors);
});
} else {
callback(null, self._authors);
}
2019-01-22 18:30:33 +01:00
});
PadDiff.prototype._extendChangesetWithAuthor = function(changeset, author, apool) {
// unpack
var unpacked = Changeset.unpack(changeset);
2013-01-23 00:16:49 +01:00
var iterator = Changeset.opIterator(unpacked.ops);
var assem = Changeset.opAssembler();
// create deleted attribs
2013-01-23 00:16:49 +01:00
var authorAttrib = apool.putAttrib(["author", author || ""]);
var deletedAttrib = apool.putAttrib(["removed", true]);
var attribs = "*" + Changeset.numToString(authorAttrib) + "*" + Changeset.numToString(deletedAttrib);
// iteratore over the operators of the changeset
while(iterator.hasNext()) {
2013-01-23 00:16:49 +01:00
var operator = iterator.next();
if (operator.opcode === "-") {
// this is a delete operator, extend it with the author
2013-01-23 00:16:49 +01:00
operator.attribs = attribs;
} else if (operator.opcode === "=" && operator.attribs) {
// this is operator changes only attributes, let's mark which author did that
2013-01-23 00:16:49 +01:00
operator.attribs+="*"+Changeset.numToString(authorAttrib);
}
// append the new operator to our assembler
2013-01-23 00:16:49 +01:00
assem.append(operator);
}
// return the modified changeset
2013-01-23 00:16:49 +01:00
return Changeset.pack(unpacked.oldLen, unpacked.newLen, assem.toString(), unpacked.charBank);
};
// this method is 80% like Changeset.inverse. I just changed so instead of reverting, it adds deletions and attribute changes to to the atext.
2013-01-23 00:16:49 +01:00
PadDiff.prototype._createDeletionChangeset = function(cs, startAText, apool) {
var lines = Changeset.splitTextLines(startAText.text);
var alines = Changeset.splitAttributionLines(startAText.attribs, startAText.text);
2013-01-23 00:16:49 +01:00
// lines and alines are what the exports is meant to apply to.
// They may be arrays or objects with .get(i) and .length methods.
// They include final newlines on lines.
2013-01-23 00:16:49 +01:00
function lines_get(idx) {
if (lines.get) {
return lines.get(idx);
} else {
return lines[idx];
}
}
2013-01-23 00:16:49 +01:00
function alines_get(idx) {
if (alines.get) {
return alines.get(idx);
} else {
return alines[idx];
}
2013-01-23 00:16:49 +01:00
}
2013-01-23 00:16:49 +01:00
var curLine = 0;
var curChar = 0;
var curLineOpIter = null;
var curLineOpIterLine;
var curLineNextOp = Changeset.newOp('+');
2013-01-23 00:16:49 +01:00
var unpacked = Changeset.unpack(cs);
var csIter = Changeset.opIterator(unpacked.ops);
var builder = Changeset.builder(unpacked.newLen);
2013-01-23 00:16:49 +01:00
function consumeAttribRuns(numChars, func /*(len, attribs, endsLine)*/ ) {
2013-01-23 00:16:49 +01:00
if ((!curLineOpIter) || (curLineOpIterLine != curLine)) {
// create curLineOpIter and advance it to curChar
curLineOpIter = Changeset.opIterator(alines_get(curLine));
curLineOpIterLine = curLine;
var indexIntoLine = 0;
var done = false;
while (!done) {
curLineOpIter.next(curLineNextOp);
if (indexIntoLine + curLineNextOp.chars >= curChar) {
curLineNextOp.chars -= (curChar - indexIntoLine);
done = true;
} else {
indexIntoLine += curLineNextOp.chars;
}
}
}
2013-01-23 00:16:49 +01:00
while (numChars > 0) {
if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) {
curLine++;
curChar = 0;
curLineOpIterLine = curLine;
curLineNextOp.chars = 0;
curLineOpIter = Changeset.opIterator(alines_get(curLine));
}
2013-01-23 00:16:49 +01:00
if (!curLineNextOp.chars) {
curLineOpIter.next(curLineNextOp);
}
2013-01-23 00:16:49 +01:00
var charsToUse = Math.min(numChars, curLineNextOp.chars);
2013-01-23 00:16:49 +01:00
func(charsToUse, curLineNextOp.attribs, charsToUse == curLineNextOp.chars && curLineNextOp.lines > 0);
numChars -= charsToUse;
curLineNextOp.chars -= charsToUse;
curChar += charsToUse;
}
2013-01-23 00:16:49 +01:00
if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) {
curLine++;
curChar = 0;
}
}
2013-01-23 00:16:49 +01:00
function skip(N, L) {
if (L) {
curLine += L;
curChar = 0;
} else {
if (curLineOpIter && curLineOpIterLine == curLine) {
consumeAttribRuns(N, function () {});
} else {
curChar += N;
}
}
}
2013-01-23 00:16:49 +01:00
function nextText(numChars) {
var len = 0;
var assem = Changeset.stringAssembler();
var firstString = lines_get(curLine).substring(curChar);
len += firstString.length;
assem.append(firstString);
2013-01-23 00:16:49 +01:00
var lineNum = curLine + 1;
2013-01-23 00:16:49 +01:00
while (len < numChars) {
var nextString = lines_get(lineNum);
len += nextString.length;
assem.append(nextString);
lineNum++;
}
2013-01-23 00:16:49 +01:00
return assem.toString().substring(0, numChars);
}
2013-01-23 00:16:49 +01:00
function cachedStrFunc(func) {
var cache = {};
2013-01-23 00:16:49 +01:00
return function (s) {
if (!cache[s]) {
cache[s] = func(s);
}
return cache[s];
};
}
2013-01-23 00:16:49 +01:00
var attribKeys = [];
var attribValues = [];
// iterate over all operators of this changeset
2013-01-23 00:16:49 +01:00
while (csIter.hasNext()) {
var csOp = csIter.next();
if (csOp.opcode == '=') {
2013-01-23 00:16:49 +01:00
var textBank = nextText(csOp.chars);
2013-01-23 00:16:49 +01:00
// decide if this equal operator is an attribution change or not. We can see this by checkinf if attribs is set.
// If the text this operator applies to is only a star, than this is a false positive and should be ignored
if (csOp.attribs && textBank != "*") {
var deletedAttrib = apool.putAttrib(["removed", true]);
var authorAttrib = apool.putAttrib(["author", ""]);
2013-01-23 00:16:49 +01:00
attribKeys.length = 0;
attribValues.length = 0;
Changeset.eachAttribNumber(csOp.attribs, function (n) {
attribKeys.push(apool.getAttribKey(n));
attribValues.push(apool.getAttribValue(n));
if (apool.getAttribKey(n) === "author") {
2013-01-23 00:16:49 +01:00
authorAttrib = n;
}
});
2013-01-23 00:16:49 +01:00
var undoBackToAttribs = cachedStrFunc(function (attribs) {
var backAttribs = [];
for (var i = 0; i < attribKeys.length; i++) {
var appliedKey = attribKeys[i];
var appliedValue = attribValues[i];
var oldValue = Changeset.attribsAttributeValue(attribs, appliedKey, apool);
2013-01-23 00:16:49 +01:00
if (appliedValue != oldValue) {
backAttribs.push([appliedKey, oldValue]);
}
}
2013-01-23 00:16:49 +01:00
return Changeset.makeAttribsString('=', backAttribs, apool);
});
2013-01-23 00:16:49 +01:00
var oldAttribsAddition = "*" + Changeset.numToString(deletedAttrib) + "*" + Changeset.numToString(authorAttrib);
2013-01-23 00:16:49 +01:00
var textLeftToProcess = textBank;
while(textLeftToProcess.length > 0) {
// process till the next line break or process only one line break
2013-01-23 00:16:49 +01:00
var lengthToProcess = textLeftToProcess.indexOf("\n");
var lineBreak = false;
switch(lengthToProcess) {
case -1:
2013-01-23 00:16:49 +01:00
lengthToProcess=textLeftToProcess.length;
break;
case 0:
lineBreak = true;
lengthToProcess=1;
break;
}
// get the text we want to procceed in this step
2013-01-23 00:16:49 +01:00
var processText = textLeftToProcess.substr(0, lengthToProcess);
2013-01-23 00:16:49 +01:00
textLeftToProcess = textLeftToProcess.substr(lengthToProcess);
if (lineBreak) {
builder.keep(1, 1); // just skip linebreaks, don't do a insert + keep for a linebreak
// consume the attributes of this linebreak
consumeAttribRuns(1, function() {});
2013-01-23 00:16:49 +01:00
} else {
// add the old text via an insert, but add a deletion attribute + the author attribute of the author who deleted it
2013-01-23 00:16:49 +01:00
var textBankIndex = 0;
consumeAttribRuns(lengthToProcess, function (len, attribs, endsLine) {
// get the old attributes back
2013-01-23 00:16:49 +01:00
var attribs = (undoBackToAttribs(attribs) || "") + oldAttribsAddition;
2013-01-23 00:16:49 +01:00
builder.insert(processText.substr(textBankIndex, len), attribs);
textBankIndex += len;
});
2013-01-23 00:16:49 +01:00
builder.keep(lengthToProcess, 0);
}
}
} else {
skip(csOp.chars, csOp.lines);
builder.keep(csOp.chars, csOp.lines);
}
2013-01-23 00:16:49 +01:00
} else if (csOp.opcode == '+') {
builder.keep(csOp.chars, csOp.lines);
} else if (csOp.opcode == '-') {
var textBank = nextText(csOp.chars);
var textBankIndex = 0;
2013-01-23 00:16:49 +01:00
consumeAttribRuns(csOp.chars, function (len, attribs, endsLine) {
builder.insert(textBank.substr(textBankIndex, len), attribs + csOp.attribs);
textBankIndex += len;
});
}
}
2013-01-23 00:16:49 +01:00
return Changeset.checkRep(builder.toString());
};
// export the constructor
2013-01-23 00:16:49 +01:00
module.exports = PadDiff;