/** * This code is mostly from the old Etherpad. Please help us to comment this code. * This helps other people to understand this code better and helps them to improve it. * TL;DR COMMENTS ON THIS FILE ARE HIGHLY APPRECIATED */ // THIS FILE IS ALSO AN APPJET MODULE: etherpad.collab.ace.contentcollector // %APPJET%: import("etherpad.collab.ace.easysync2.Changeset"); // %APPJET%: import("etherpad.admin.plugins"); /** * Copyright 2009 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS-IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ const _MAX_LIST_LEVEL = 16; const UNorm = require('unorm'); const Changeset = require('./Changeset'); const hooks = require('./pluginfw/hooks'); const _ = require('./underscore'); function sanitizeUnicode(s) { return UNorm.nfc(s); } function makeContentCollector(collectStyles, abrowser, apool, domInterface, className2Author) { abrowser = abrowser || {}; // I don't like the above. const dom = domInterface || { isNodeText(n) { return (n.nodeType == 3); }, nodeTagName(n) { return n.tagName; }, nodeValue(n) { return n.nodeValue; }, nodeNumChildren(n) { if (n.childNodes == null) return 0; return n.childNodes.length; }, nodeChild(n, i) { if (n.childNodes.item == null) { return n.childNodes[i]; } return n.childNodes.item(i); }, nodeProp(n, p) { return n[p]; }, nodeAttr(n, a) { if (n.getAttribute != null) return n.getAttribute(a); if (n.attribs != null) return n.attribs[a]; return null; }, optNodeInnerHTML(n) { return n.innerHTML; }, }; const _blockElems = { div: 1, p: 1, pre: 1, li: 1, }; _.each(hooks.callAll('ccRegisterBlockElements'), (element) => { _blockElems[element] = 1; }); function isBlockElement(n) { return !!_blockElems[(dom.nodeTagName(n) || '').toLowerCase()]; } function textify(str) { return sanitizeUnicode( str.replace(/(\n | \n)/g, ' ').replace(/[\n\r ]/g, ' ').replace(/\xa0/g, ' ').replace(/\t/g, ' ')); } function getAssoc(node, name) { return dom.nodeProp(node, `_magicdom_${name}`); } const lines = (function () { const textArray = []; const attribsArray = []; let attribsBuilder = null; const op = Changeset.newOp('+'); var self = { length() { return textArray.length; }, atColumnZero() { return textArray[textArray.length - 1] === ''; }, startNew() { textArray.push(''); self.flush(true); attribsBuilder = Changeset.smartOpAssembler(); }, textOfLine(i) { return textArray[i]; }, appendText(txt, attrString) { textArray[textArray.length - 1] += txt; // dmesg(txt+" / "+attrString); op.attribs = attrString; op.chars = txt.length; attribsBuilder.append(op); }, textLines() { return textArray.slice(); }, attribLines() { return attribsArray; }, // call flush only when you're done flush(withNewline) { if (attribsBuilder) { attribsArray.push(attribsBuilder.toString()); attribsBuilder = null; } }, }; self.startNew(); return self; }()); const cc = {}; function _ensureColumnZero(state) { if (!lines.atColumnZero()) { cc.startNewLine(state); } } let selection, startPoint, endPoint; let selStart = [-1, -1]; let selEnd = [-1, -1]; function _isEmpty(node, state) { // consider clean blank lines pasted in IE to be empty if (dom.nodeNumChildren(node) == 0) return true; if (dom.nodeNumChildren(node) == 1 && getAssoc(node, 'shouldBeEmpty') && dom.optNodeInnerHTML(node) == ' ' && !getAssoc(node, 'unpasted')) { if (state) { const child = dom.nodeChild(node, 0); _reachPoint(child, 0, state); _reachPoint(child, 1, state); } return true; } return false; } function _pointHere(charsAfter, state) { const ln = lines.length() - 1; let chr = lines.textOfLine(ln).length; if (chr == 0 && !_.isEmpty(state.lineAttributes)) { chr += 1; // listMarker } chr += charsAfter; return [ln, chr]; } function _reachBlockPoint(nd, idx, state) { if (!dom.isNodeText(nd)) _reachPoint(nd, idx, state); } function _reachPoint(nd, idx, state) { if (startPoint && nd == startPoint.node && startPoint.index == idx) { selStart = _pointHere(0, state); } if (endPoint && nd == endPoint.node && endPoint.index == idx) { selEnd = _pointHere(0, state); } } cc.incrementFlag = function (state, flagName) { state.flags[flagName] = (state.flags[flagName] || 0) + 1; }; cc.decrementFlag = function (state, flagName) { state.flags[flagName]--; }; cc.incrementAttrib = function (state, attribName) { if (!state.attribs[attribName]) { state.attribs[attribName] = 1; } else { state.attribs[attribName]++; } _recalcAttribString(state); }; cc.decrementAttrib = function (state, attribName) { state.attribs[attribName]--; _recalcAttribString(state); }; function _enterList(state, listType) { if (!listType) return; const oldListType = state.lineAttributes.list; if (listType != 'none') { state.listNesting = (state.listNesting || 0) + 1; // reminder that listType can be "number2", "number3" etc. if (listType.indexOf('number') !== -1) { state.start = (state.start || 0) + 1; } } if (listType === 'none' || !listType) { delete state.lineAttributes.list; } else { state.lineAttributes.list = listType; } _recalcAttribString(state); return oldListType; } function _exitList(state, oldListType) { if (state.lineAttributes.list) { state.listNesting--; } if (oldListType && oldListType != 'none') { state.lineAttributes.list = oldListType; } else { delete state.lineAttributes.list; delete state.lineAttributes.start; } _recalcAttribString(state); } function _enterAuthor(state, author) { const oldAuthor = state.author; state.authorLevel = (state.authorLevel || 0) + 1; state.author = author; _recalcAttribString(state); return oldAuthor; } function _exitAuthor(state, oldAuthor) { state.authorLevel--; state.author = oldAuthor; _recalcAttribString(state); } function _recalcAttribString(state) { const lst = []; for (const a in state.attribs) { if (state.attribs[a]) { // The following splitting of the attribute name is a workaround // to enable the content collector to store key-value attributes // see https://github.com/ether/etherpad-lite/issues/2567 for more information // in long term the contentcollector should be refactored to get rid of this workaround const ATTRIBUTE_SPLIT_STRING = '::'; // see if attributeString is splittable const attributeSplits = a.split(ATTRIBUTE_SPLIT_STRING); if (attributeSplits.length > 1) { // the attribute name follows the convention key::value // so save it as a key value attribute lst.push([attributeSplits[0], attributeSplits[1]]); } else { // the "normal" case, the attribute is just a switch // so set it true lst.push([a, 'true']); } } } if (state.authorLevel > 0) { const authorAttrib = ['author', state.author]; if (apool.putAttrib(authorAttrib, true) >= 0) { // require that author already be in pool // (don't add authors from other documents, etc.) lst.push(authorAttrib); } } state.attribString = Changeset.makeAttribsString('+', lst, apool); } function _produceLineAttributesMarker(state) { // TODO: This has to go to AttributeManager. const attributes = [ ['lmkr', '1'], ['insertorder', 'first'], ].concat( _.map(state.lineAttributes, (value, key) => [key, value]), ); lines.appendText('*', Changeset.makeAttribsString('+', attributes, apool)); } cc.startNewLine = function (state) { if (state) { const atBeginningOfLine = lines.textOfLine(lines.length() - 1).length == 0; if (atBeginningOfLine && !_.isEmpty(state.lineAttributes)) { _produceLineAttributesMarker(state); } } lines.startNew(); }; cc.notifySelection = function (sel) { if (sel) { selection = sel; startPoint = selection.startPoint; endPoint = selection.endPoint; } }; cc.doAttrib = function (state, na) { state.localAttribs = (state.localAttribs || []); state.localAttribs.push(na); cc.incrementAttrib(state, na); }; cc.collectContent = function (node, state) { if (!state) { state = { flags: { /* name -> nesting counter*/ }, localAttribs: null, attribs: { /* name -> nesting counter*/ }, attribString: '', // lineAttributes maintain a map from attributes to attribute values set on a line lineAttributes: { /* example: 'list': 'bullet1', */ }, }; } const localAttribs = state.localAttribs; state.localAttribs = null; const isBlock = isBlockElement(node); const isEmpty = _isEmpty(node, state); if (isBlock) _ensureColumnZero(state); const startLine = lines.length() - 1; _reachBlockPoint(node, 0, state); if (dom.isNodeText(node)) { let txt = dom.nodeValue(node); var tname = dom.nodeAttr(node.parentNode, 'name'); const txtFromHook = hooks.callAll('collectContentLineText', { cc: this, state, tname, node, text: txt, styl: null, cls: null, }); if (typeof (txtFromHook) === 'object') { txt = dom.nodeValue(node); } else if (txtFromHook) { txt = txtFromHook; } let rest = ''; let x = 0; // offset into original text if (txt.length == 0) { if (startPoint && node == startPoint.node) { selStart = _pointHere(0, state); } if (endPoint && node == endPoint.node) { selEnd = _pointHere(0, state); } } while (txt.length > 0) { let consumed = 0; if (state.flags.preMode) { const firstLine = txt.split('\n', 1)[0]; consumed = firstLine.length + 1; rest = txt.substring(consumed); txt = firstLine; } else { /* will only run this loop body once */ } if (startPoint && node == startPoint.node && startPoint.index - x <= txt.length) { selStart = _pointHere(startPoint.index - x, state); } if (endPoint && node == endPoint.node && endPoint.index - x <= txt.length) { selEnd = _pointHere(endPoint.index - x, state); } let txt2 = txt; if ((!state.flags.preMode) && /^[\r\n]*$/.exec(txt)) { // prevents textnodes containing just "\n" from being significant // in safari when pasting text, now that we convert them to // spaces instead of removing them, because in other cases // removing "\n" from pasted HTML will collapse words together. txt2 = ''; } const atBeginningOfLine = lines.textOfLine(lines.length() - 1).length == 0; if (atBeginningOfLine) { // newlines in the source mustn't become spaces at beginning of line box txt2 = txt2.replace(/^\n*/, ''); } if (atBeginningOfLine && !_.isEmpty(state.lineAttributes)) { _produceLineAttributesMarker(state); } lines.appendText(textify(txt2), state.attribString); x += consumed; txt = rest; if (txt.length > 0) { cc.startNewLine(state); } } } else { var tname = (dom.nodeTagName(node) || '').toLowerCase(); if (tname == 'img') { const collectContentImage = hooks.callAll('collectContentImage', { cc, state, tname, styl, cls, node, }); } else { // THIS SEEMS VERY HACKY! -- Please submit a better fix! delete state.lineAttributes.img; } if (tname == 'br') { this.breakLine = true; const tvalue = dom.nodeAttr(node, 'value'); const induceLineBreak = hooks.callAll('collectContentLineBreak', { cc: this, state, tname, tvalue, styl: null, cls: null, }); const startNewLine = (typeof (induceLineBreak) === 'object' && induceLineBreak.length == 0) ? true : induceLineBreak[0]; if (startNewLine) { cc.startNewLine(state); } } else if (tname == 'script' || tname == 'style') { // ignore } else if (!isEmpty) { var styl = dom.nodeAttr(node, 'style'); var cls = dom.nodeAttr(node, 'class'); let isPre = (tname == 'pre'); if ((!isPre) && abrowser.safari) { isPre = (styl && /\bwhite-space:\s*pre\b/i.exec(styl)); } if (isPre) cc.incrementFlag(state, 'preMode'); let oldListTypeOrNull = null; let oldAuthorOrNull = null; // LibreOffice Writer puts in weird items during import or copy/paste, we should drop them. if (cls === 'Numbering_20_Symbols' || cls === 'Bullet_20_Symbols') { styl = null; cls = null; // We have to return here but this could break things in the future, for now it shows how to fix the problem return; } if (collectStyles) { hooks.callAll('collectContentPre', { cc, state, tname, styl, cls, }); if (tname == 'b' || (styl && /\bfont-weight:\s*bold\b/i.exec(styl)) || tname == 'strong') { cc.doAttrib(state, 'bold'); } if (tname == 'i' || (styl && /\bfont-style:\s*italic\b/i.exec(styl)) || tname == 'em') { cc.doAttrib(state, 'italic'); } if (tname == 'u' || (styl && /\btext-decoration:\s*underline\b/i.exec(styl)) || tname == 'ins') { cc.doAttrib(state, 'underline'); } if (tname == 's' || (styl && /\btext-decoration:\s*line-through\b/i.exec(styl)) || tname == 'del') { cc.doAttrib(state, 'strikethrough'); } if (tname == 'ul' || tname == 'ol') { if (node.attribs) { var type = node.attribs.class; } else { var type = null; } const rr = cls && /(?:^| )list-([a-z]+[0-9]+)\b/.exec(cls); // lists do not need to have a type, so before we make a wrong guess, check if we find a better hint within the node's children if (!rr && !type) { for (var i in node.children) { if (node.children[i] && node.children[i].name == 'ul') { type = node.children[i].attribs.class; if (type) { break; } } } } if (rr && rr[1]) { type = rr[1]; } else { if (tname == 'ul') { if ((type && type.match('indent')) || (node.attribs && node.attribs.class && node.attribs.class.match('indent'))) { type = 'indent'; } else { type = 'bullet'; } } else { type = 'number'; } type += String(Math.min(_MAX_LIST_LEVEL, (state.listNesting || 0) + 1)); } oldListTypeOrNull = (_enterList(state, type) || 'none'); } else if ((tname == 'div' || tname == 'p') && cls && cls.match(/(?:^| )ace-line\b/)) { // This has undesirable behavior in Chrome but is right in other browsers. // See https://github.com/ether/etherpad-lite/issues/2412 for reasoning if (!abrowser.chrome) oldListTypeOrNull = (_enterList(state, type) || 'none'); } else if ((tname === 'li')) { state.lineAttributes.start = state.start || 0; _recalcAttribString(state); if (state.lineAttributes.list.indexOf('number') !== -1) { /* Nested OLs are not -->