/** * This code is mostly from the old Etherpad. Please help us to comment this code. * This helps other people to understand this code better and helps them to improve it. * TL;DR COMMENTS ON THIS FILE ARE HIGHLY APPRECIATED */ // THIS FILE IS ALSO AN APPJET MODULE: etherpad.collab.ace.contentcollector // %APPJET%: import("etherpad.collab.ace.easysync2.Changeset"); // %APPJET%: import("etherpad.admin.plugins"); /** * Copyright 2009 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS-IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ var _MAX_LIST_LEVEL = 16; var UNorm = require('unorm'); var Changeset = require('./Changeset'); var hooks = require('./pluginfw/hooks'); var _ = require('./underscore'); function sanitizeUnicode(s) { return UNorm.nfc(s); } function makeContentCollector(collectStyles, abrowser, apool, domInterface, className2Author) { abrowser = abrowser || {}; // I don't like the above. var dom = domInterface || { isNodeText: function(n) { return (n.nodeType == 3); }, nodeTagName: function(n) { return n.tagName; }, nodeValue: function(n) { return n.nodeValue; }, nodeNumChildren: function(n) { if(n.childNodes == null) return 0; return n.childNodes.length; }, nodeChild: function(n, i) { if(n.childNodes.item == null){ return n.childNodes[i]; } return n.childNodes.item(i); }, nodeProp: function(n, p) { return n[p]; }, nodeAttr: function(n, a) { if(n.getAttribute != null) return n.getAttribute(a); if(n.attribs != null) return n.attribs[a]; return null; }, optNodeInnerHTML: function(n) { return n.innerHTML; } }; var _blockElems = { "div": 1, "p": 1, "pre": 1, "li": 1 }; _.each(hooks.callAll('ccRegisterBlockElements'), function(element){ _blockElems[element] = 1; }); function isBlockElement(n) { return !!_blockElems[(dom.nodeTagName(n) || "").toLowerCase()]; } function textify(str) { return sanitizeUnicode( str.replace(/(\n | \n)/g, ' ').replace(/[\n\r ]/g, ' ').replace(/\xa0/g, ' ').replace(/\t/g, ' ')); } function getAssoc(node, name) { return dom.nodeProp(node, "_magicdom_" + name); } var lines = (function() { var textArray = []; var attribsArray = []; var attribsBuilder = null; var op = Changeset.newOp('+'); var self = { length: function() { return textArray.length; }, atColumnZero: function() { return textArray[textArray.length - 1] === ""; }, startNew: function() { textArray.push(""); self.flush(true); attribsBuilder = Changeset.smartOpAssembler(); }, textOfLine: function(i) { return textArray[i]; }, appendText: function(txt, attrString) { textArray[textArray.length - 1] += txt; //dmesg(txt+" / "+attrString); op.attribs = attrString; op.chars = txt.length; attribsBuilder.append(op); }, textLines: function() { return textArray.slice(); }, attribLines: function() { return attribsArray; }, // call flush only when you're done flush: function(withNewline) { if (attribsBuilder) { attribsArray.push(attribsBuilder.toString()); attribsBuilder = null; } } }; self.startNew(); return self; }()); var cc = {}; function _ensureColumnZero(state) { if (!lines.atColumnZero()) { cc.startNewLine(state); } } var selection, startPoint, endPoint; var selStart = [-1, -1], selEnd = [-1, -1]; function _isEmpty(node, state) { // consider clean blank lines pasted in IE to be empty if (dom.nodeNumChildren(node) == 0) return true; if (dom.nodeNumChildren(node) == 1 && getAssoc(node, "shouldBeEmpty") && dom.optNodeInnerHTML(node) == " " && !getAssoc(node, "unpasted")) { if (state) { var child = dom.nodeChild(node, 0); _reachPoint(child, 0, state); _reachPoint(child, 1, state); } return true; } return false; } function _pointHere(charsAfter, state) { var ln = lines.length() - 1; var chr = lines.textOfLine(ln).length; if (chr == 0 && !_.isEmpty(state.lineAttributes)) { chr += 1; // listMarker } chr += charsAfter; return [ln, chr]; } function _reachBlockPoint(nd, idx, state) { if (!dom.isNodeText(nd)) _reachPoint(nd, idx, state); } function _reachPoint(nd, idx, state) { if (startPoint && nd == startPoint.node && startPoint.index == idx) { selStart = _pointHere(0, state); } if (endPoint && nd == endPoint.node && endPoint.index == idx) { selEnd = _pointHere(0, state); } } cc.incrementFlag = function(state, flagName) { state.flags[flagName] = (state.flags[flagName] || 0) + 1; } cc.decrementFlag = function(state, flagName) { state.flags[flagName]--; } cc.incrementAttrib = function(state, attribName) { if (!state.attribs[attribName]) { state.attribs[attribName] = 1; } else { state.attribs[attribName]++; } _recalcAttribString(state); } cc.decrementAttrib = function(state, attribName) { state.attribs[attribName]--; _recalcAttribString(state); } function _enterList(state, listType) { if(!listType) return; var oldListType = state.lineAttributes['list']; if (listType != 'none') { state.listNesting = (state.listNesting || 0) + 1; // reminder that listType can be "number2", "number3" etc. if(listType.indexOf("number") !== -1){ state.start = (state.start || 0) + 1; } } if(listType === 'none' || !listType ){ delete state.lineAttributes['list']; } else{ state.lineAttributes['list'] = listType; } _recalcAttribString(state); return oldListType; } function _exitList(state, oldListType) { if (state.lineAttributes['list']) { state.listNesting--; } if (oldListType && oldListType != 'none') { state.lineAttributes['list'] = oldListType; } else { delete state.lineAttributes['list']; delete state.lineAttributes['start']; } _recalcAttribString(state); } function _enterAuthor(state, author) { var oldAuthor = state.author; state.authorLevel = (state.authorLevel || 0) + 1; state.author = author; _recalcAttribString(state); return oldAuthor; } function _exitAuthor(state, oldAuthor) { state.authorLevel--; state.author = oldAuthor; _recalcAttribString(state); } function _recalcAttribString(state) { var lst = []; for (var a in state.attribs) { if (state.attribs[a]) { // The following splitting of the attribute name is a workaround // to enable the content collector to store key-value attributes // see https://github.com/ether/etherpad-lite/issues/2567 for more information // in long term the contentcollector should be refactored to get rid of this workaround var ATTRIBUTE_SPLIT_STRING = "::"; // see if attributeString is splittable var attributeSplits = a.split(ATTRIBUTE_SPLIT_STRING); if (attributeSplits.length > 1) { // the attribute name follows the convention key::value // so save it as a key value attribute lst.push([attributeSplits[0], attributeSplits[1]]); } else { // the "normal" case, the attribute is just a switch // so set it true lst.push([a, 'true']); } } } if (state.authorLevel > 0) { var authorAttrib = ['author', state.author]; if (apool.putAttrib(authorAttrib, true) >= 0) { // require that author already be in pool // (don't add authors from other documents, etc.) lst.push(authorAttrib); } } state.attribString = Changeset.makeAttribsString('+', lst, apool); } function _produceLineAttributesMarker(state) { // TODO: This has to go to AttributeManager. var attributes = [ ['lmkr', '1'], ['insertorder', 'first'] ].concat( _.map(state.lineAttributes,function(value,key){ return [key, value]; }) ); lines.appendText('*', Changeset.makeAttribsString('+', attributes , apool)); } cc.startNewLine = function(state) { if (state) { var atBeginningOfLine = lines.textOfLine(lines.length() - 1).length == 0; if (atBeginningOfLine && !_.isEmpty(state.lineAttributes)) { _produceLineAttributesMarker(state); } } lines.startNew(); } cc.notifySelection = function(sel) { if (sel) { selection = sel; startPoint = selection.startPoint; endPoint = selection.endPoint; } }; cc.doAttrib = function(state, na) { state.localAttribs = (state.localAttribs || []); state.localAttribs.push(na); cc.incrementAttrib(state, na); }; cc.collectContent = function(node, state) { if (!state) { state = { flags: { /*name -> nesting counter*/ }, localAttribs: null, attribs: { /*name -> nesting counter*/ }, attribString: '', // lineAttributes maintain a map from attributes to attribute values set on a line lineAttributes: { /* example: 'list': 'bullet1', */ } }; } var localAttribs = state.localAttribs; state.localAttribs = null; var isBlock = isBlockElement(node); var isEmpty = _isEmpty(node, state); if (isBlock) _ensureColumnZero(state); var startLine = lines.length() - 1; _reachBlockPoint(node, 0, state); if (dom.isNodeText(node)) { var txt = dom.nodeValue(node); var tname = dom.nodeAttr(node.parentNode,"name"); var txtFromHook = hooks.callAll('collectContentLineText', { cc: this, state: state, tname: tname, node:node, text:txt, styl: null, cls: null }); if(typeof(txtFromHook)=='object'){ txt = dom.nodeValue(node) }else{ if(txtFromHook){ txt = txtFromHook }; } var rest = ''; var x = 0; // offset into original text if (txt.length == 0) { if (startPoint && node == startPoint.node) { selStart = _pointHere(0, state); } if (endPoint && node == endPoint.node) { selEnd = _pointHere(0, state); } } while (txt.length > 0) { var consumed = 0; if (state.flags.preMode) { var firstLine = txt.split('\n', 1)[0]; consumed = firstLine.length + 1; rest = txt.substring(consumed); txt = firstLine; } else { /* will only run this loop body once */ } if (startPoint && node == startPoint.node && startPoint.index - x <= txt.length) { selStart = _pointHere(startPoint.index - x, state); } if (endPoint && node == endPoint.node && endPoint.index - x <= txt.length) { selEnd = _pointHere(endPoint.index - x, state); } var txt2 = txt; if ((!state.flags.preMode) && /^[\r\n]*$/.exec(txt)) { // prevents textnodes containing just "\n" from being significant // in safari when pasting text, now that we convert them to // spaces instead of removing them, because in other cases // removing "\n" from pasted HTML will collapse words together. txt2 = ""; } var atBeginningOfLine = lines.textOfLine(lines.length() - 1).length == 0; if (atBeginningOfLine) { // newlines in the source mustn't become spaces at beginning of line box txt2 = txt2.replace(/^\n*/, ''); } if (atBeginningOfLine && !_.isEmpty(state.lineAttributes)) { _produceLineAttributesMarker(state); } lines.appendText(textify(txt2), state.attribString); x += consumed; txt = rest; if (txt.length > 0) { cc.startNewLine(state); } } } else { var tname = (dom.nodeTagName(node) || "").toLowerCase(); if (tname == "img"){ var collectContentImage = hooks.callAll('collectContentImage', { cc: cc, state: state, tname: tname, styl: styl, cls: cls, node: node }); }else{ // THIS SEEMS VERY HACKY! -- Please submit a better fix! delete state.lineAttributes.img } if (tname == "br") { this.breakLine = true; var tvalue = dom.nodeAttr(node, 'value'); var induceLineBreak = hooks.callAll('collectContentLineBreak', { cc: this, state: state, tname: tname, tvalue:tvalue, styl: null, cls: null }); var startNewLine= (typeof(induceLineBreak)=='object'&&induceLineBreak.length==0)?true:induceLineBreak[0]; if(startNewLine){ cc.startNewLine(state); } } else if (tname == "script" || tname == "style") { // ignore } else if (!isEmpty) { var styl = dom.nodeAttr(node, "style"); var cls = dom.nodeAttr(node, "class"); var isPre = (tname == "pre"); if ((!isPre) && abrowser.safari) { isPre = (styl && /\bwhite-space:\s*pre\b/i.exec(styl)); } if (isPre) cc.incrementFlag(state, 'preMode'); var oldListTypeOrNull = null; var oldAuthorOrNull = null; // LibreOffice Writer puts in weird items during import or copy/paste, we should drop them. if (cls === "Numbering_20_Symbols" || cls === "Bullet_20_Symbols") { styl = null; cls = null; // We have to return here but this could break things in the future, for now it shows how to fix the problem return; } if (collectStyles) { hooks.callAll('collectContentPre', { cc: cc, state: state, tname: tname, styl: styl, cls: cls }); if (tname == "b" || (styl && /\bfont-weight:\s*bold\b/i.exec(styl)) || tname == "strong") { cc.doAttrib(state, "bold"); } if (tname == "i" || (styl && /\bfont-style:\s*italic\b/i.exec(styl)) || tname == "em") { cc.doAttrib(state, "italic"); } if (tname == "u" || (styl && /\btext-decoration:\s*underline\b/i.exec(styl)) || tname == "ins") { cc.doAttrib(state, "underline"); } if (tname == "s" || (styl && /\btext-decoration:\s*line-through\b/i.exec(styl)) || tname == "del") { cc.doAttrib(state, "strikethrough"); } if (tname == "ul" || tname == "ol") { if(node.attribs){ var type = node.attribs.class; }else{ var type = null; } var rr = cls && /(?:^| )list-([a-z]+[0-9]+)\b/.exec(cls); // lists do not need to have a type, so before we make a wrong guess, check if we find a better hint within the node's children if(!rr && !type){ for (var i in node.children){ if(node.children[i] && node.children[i].name=='ul'){ type = node.children[i].attribs.class if(type){ break } } } } if(rr && rr[1]){ type = rr[1] } else { if(tname == "ul"){ if((type && type.match("indent")) || (node.attribs && node.attribs.class && node.attribs.class.match("indent"))){ type = "indent" } else { type = "bullet" } } else { type = "number" } type = type + String(Math.min(_MAX_LIST_LEVEL, (state.listNesting || 0) + 1)); } oldListTypeOrNull = (_enterList(state, type) || 'none'); } else if ((tname == "div" || tname == "p") && cls && cls.match(/(?:^| )ace-line\b/)) { // This has undesirable behavior in Chrome but is right in other browsers. // See https://github.com/ether/etherpad-lite/issues/2412 for reasoning if(!abrowser.chrome) oldListTypeOrNull = (_enterList(state, type) || 'none'); } else if ((tname === "li")){ state.lineAttributes['start'] = state.start || 0; _recalcAttribString(state); if(state.lineAttributes.list.indexOf("number") !== -1){ /* Nested OLs are not -->
  1. 1
    1. nested
They are -->
  1. 1
    1. nested
Note how the
    item has to be inside a
  1. Because of this we don't increment the start number */ if(node.parent && node.parent.name !== "ol"){ /* TODO: start number has to increment based on indentLevel(numberX) This means we have to build an object IE { 1: 4 2: 3 3: 5 } But the browser seems to handle it fine using CSS.. Why can't we do the same with exports? We can.. But let's leave this comment in because it might be useful in the future.. */ state.start++; // not if it's parent is an OL or UL. } } // UL list items never modify the start value. if(node.parent && node.parent.name === "ul"){ state.start++; // TODO, this is hacky. // Because if the first item is an UL it will increment a list no? // A much more graceful way would be to say, ul increases if it's within an OL // But I don't know a way to do that because we're only aware of the previous Line // As the concept of parent's doesn't exist when processing each domline... } }else{ // Below needs more testin if it's neccesary as _exitList should take care of this. // delete state.start; // delete state.listNesting; // _recalcAttribString(state); } if (className2Author && cls) { var classes = cls.match(/\S+/g); if (classes && classes.length > 0) { for (var i = 0; i < classes.length; i++) { var c = classes[i]; var a = className2Author(c); if (a) { oldAuthorOrNull = (_enterAuthor(state, a) || 'none'); break; } } } } } var nc = dom.nodeNumChildren(node); for (var i = 0; i < nc; i++) { var c = dom.nodeChild(node, i); cc.collectContent(c, state); } if (collectStyles) { hooks.callAll('collectContentPost', { cc: cc, state: state, tname: tname, styl: styl, cls: cls }); } if (isPre) cc.decrementFlag(state, 'preMode'); if (state.localAttribs) { for (var i = 0; i < state.localAttribs.length; i++) { cc.decrementAttrib(state, state.localAttribs[i]); } } if (oldListTypeOrNull) { _exitList(state, oldListTypeOrNull); } if (oldAuthorOrNull) { _exitAuthor(state, oldAuthorOrNull); } } } if (!abrowser.msie) { _reachBlockPoint(node, 1, state); } if (isBlock) { if (lines.length() - 1 == startLine) { // added additional check to resolve https://github.com/JohnMcLear/ep_copy_paste_images/issues/20 // this does mean that images etc can't be pasted on lists but imho that's fine // If we're doing an export event we need to start a new lines // Export events don't have window available. // commented out to solve #2412 - https://github.com/ether/etherpad-lite/issues/2412 if((state.lineAttributes && !state.lineAttributes.list) || typeof window === "undefined"){ cc.startNewLine(state); } } else { _ensureColumnZero(state); } } if (abrowser.msie) { // in IE, a point immediately after a DIV appears on the next line _reachBlockPoint(node, 1, state); } state.localAttribs = localAttribs; }; // can pass a falsy value for end of doc cc.notifyNextNode = function(node) { // an "empty block" won't end a line; this addresses an issue in IE with // typing into a blank line at the end of the document. typed text // goes into the body, and the empty line div still looks clean. // it is incorporated as dirty by the rule that a dirty region has // to end a line. if ((!node) || (isBlockElement(node) && !_isEmpty(node))) { _ensureColumnZero(null); } }; // each returns [line, char] or [-1,-1] var getSelectionStart = function() { return selStart; }; var getSelectionEnd = function() { return selEnd; }; // returns array of strings for lines found, last entry will be "" if // last line is complete (i.e. if a following span should be on a new line). // can be called at any point cc.getLines = function() { return lines.textLines(); }; cc.finish = function() { lines.flush(); var lineAttribs = lines.attribLines(); var lineStrings = cc.getLines(); lineStrings.length--; lineAttribs.length--; var ss = getSelectionStart(); var se = getSelectionEnd(); function fixLongLines() { // design mode does not deal with with really long lines! var lineLimit = 2000; // chars var buffer = 10; // chars allowed over before wrapping var linesWrapped = 0; var numLinesAfter = 0; for (var i = lineStrings.length - 1; i >= 0; i--) { var oldString = lineStrings[i]; var oldAttribString = lineAttribs[i]; if (oldString.length > lineLimit + buffer) { var newStrings = []; var newAttribStrings = []; while (oldString.length > lineLimit) { //var semiloc = oldString.lastIndexOf(';', lineLimit-1); //var lengthToTake = (semiloc >= 0 ? (semiloc+1) : lineLimit); var lengthToTake = lineLimit; newStrings.push(oldString.substring(0, lengthToTake)); oldString = oldString.substring(lengthToTake); newAttribStrings.push(Changeset.subattribution(oldAttribString, 0, lengthToTake)); oldAttribString = Changeset.subattribution(oldAttribString, lengthToTake); } if (oldString.length > 0) { newStrings.push(oldString); newAttribStrings.push(oldAttribString); } function fixLineNumber(lineChar) { if (lineChar[0] < 0) return; var n = lineChar[0]; var c = lineChar[1]; if (n > i) { n += (newStrings.length - 1); } else if (n == i) { var a = 0; while (c > newStrings[a].length) { c -= newStrings[a].length; a++; } n += a; } lineChar[0] = n; lineChar[1] = c; } fixLineNumber(ss); fixLineNumber(se); linesWrapped++; numLinesAfter += newStrings.length; newStrings.unshift(i, 1); lineStrings.splice.apply(lineStrings, newStrings); newAttribStrings.unshift(i, 1); lineAttribs.splice.apply(lineAttribs, newAttribStrings); } } return { linesWrapped: linesWrapped, numLinesAfter: numLinesAfter }; } var wrapData = fixLongLines(); return { selStart: ss, selEnd: se, linesWrapped: wrapData.linesWrapped, numLinesAfter: wrapData.numLinesAfter, lines: lineStrings, lineAttribs: lineAttribs }; } return cc; } exports.sanitizeUnicode = sanitizeUnicode; exports.makeContentCollector = makeContentCollector;