'use strict'; /** * Copyright 2009 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS-IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ var async = require('async'); var Changeset = require('ep_etherpad-lite/static/js/Changeset'); var padManager = require('../db/PadManager'); var eRR = require('async-stacktrace'); var _ = require('underscore'); var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var eejs = require('ep_etherpad-lite/node/eejs'); var _analyzeLine = require('./ExportHelper')._analyzeLine; var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; // copied from ACE var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; var _REGEX_URLCHAR = new RegExp('(' + (/[-:@a-zA-Z0-9_.,~%+/\\?=&#;()$]/).source + '|' + _REGEX_WORDCHAR.source + ')'); var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g'); // returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...] // copied from ACE function _processSpaces (s) { var doesWrap = true; if (s.indexOf('<') < 0 && !doesWrap) { // short-cut return s.replace(/ /g, ' '); } var parts = []; s.replace(/<[^>]*>?| |[^ <]+/g, function (m) { parts.push(m); }); if (doesWrap) { var endOfLine = true; var beforeSpace = false; // last space in a run is normal, others are nbsp, // end of line is nbsp for (var i = parts.length - 1; i >= 0; i--) { var p = parts[i]; if (p === ' ') { if (endOfLine || beforeSpace) parts[i] = ' '; endOfLine = false; beforeSpace = true; } else if (p.charAt(0) !== '<') { endOfLine = false; beforeSpace = false; } } // beginning of line is nbsp for (i = 0; i < parts.length; i++) { p = parts[i]; if (p === ' ') { parts[i] = ' '; break; } else if (p.charAt(0) !== '<') { break; } } } else { for (i = 0; i < parts.length; i++) { p = parts[i]; if (p === ' ') { parts[i] = ' '; } } } return parts.join(''); } function _findURLs (text) { _REGEX_URL.lastIndex = 0; var urls = null; var execResult; while (execResult = _REGEX_URL.exec(text)) { urls = urls || []; var startIndex = execResult.index; var url = execResult[0]; urls.push([startIndex, url]); } return urls; } /** * Return docx from input html * * @param {text} [pad] text * @param {text} [atext] text * @param {text} [authorColors] text * * @returns {html} html * * @public */ function getHTMLFromAtext (pad, atext, authorColors) { var apool = pad.apool(); var textLines = atext.text.slice(0, -1).split('\n'); var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); var tags = ['h1', 'h2', 'strong', 'em', 'u', 's']; var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough']; // prepare tags stored as ['tag', true] to be exported hooks.aCallAll('exportHtmlAdditionalTags', pad, function (err, newProps) { if (err) { return err; } newProps.forEach(function (propName) { tags.push(propName); props.push(propName); }); }); // prepare tags stored as ['tag', 'value'] to be exported. This will generate HTML // with tags like hooks.aCallAll('exportHtmlAdditionalTagsWithData', pad, function (err, newProps) { if (err) { return err; } newProps.forEach(function (propName) { tags.push('span data-' + propName[0] + '="' + propName[1] + '"'); props.push(propName); }); }); // holds a map of used styling attributes (*1, *2, etc) in the apool // and maps them to an index in props // *3:2 -> the attribute *3 means strong // *2:5 -> the attribute *2 means s(trikethrough) var anumMap = {}; var css = ''; var stripDotFromAuthorID = function (id) { return id.replace(/\./g, '_'); }; if (authorColors) { css += ''; } // iterates over all props(h1,h2,strong,...), checks if it is used in // this pad, and if yes puts its attrib id->props value into anumMap props.forEach(function (propName, i) { var attrib = [propName, true]; if (_.isArray(propName)) { // propName can be in the form of ['color', 'red'], // see hook exportHtmlAdditionalTagsWithData attrib = propName; } var propTrueNum = apool.putAttrib(attrib, true); if (propTrueNum >= 0) { anumMap[propTrueNum] = i; } }); function getLineHTML (text, attribs) { // Use order of tags (b/i/u) as order of nesting, for simplicity // and decent nesting. For example, // Just bold Bold and italics Just italics // becomes // Just bold Bold and italics Just italics var taker = Changeset.stringIterator(text); var assem = Changeset.stringAssembler(); var openTags = []; function getSpanClassFor (i) { //return if author colors are disabled if (!authorColors) return false; var property = props[i]; // we are not insterested on properties in the form of ['color', 'red'], // see hook exportHtmlAdditionalTagsWithData if (_.isArray(property)) { return false; } if (property.substr(0, 6) === 'author') { return stripDotFromAuthorID(property); } if (property === 'removed') { return 'removed'; } return false; } // tags added by exportHtmlAdditionalTagsWithData will be exported as with // data attributes function isSpanWithData (i) { var property = props[i]; return _.isArray(property); } function emitOpenTag (i) { openTags.unshift(i); var spanClass = getSpanClassFor(i); if (spanClass) { assem.append(''); } else { assem.append('<'); assem.append(tags[i]); assem.append('>'); } } // this closes an open tag and removes its reference from openTags function emitCloseTag (i) { openTags.shift(); var spanClass = getSpanClassFor(i); var spanWithData = isSpanWithData(i); if (spanClass || spanWithData) { assem.append(''); } else { assem.append(''); } } var urls = _findURLs(text); var idx = 0; function processNextChars (numChars) { if (numChars <= 0) { return; } var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars)); idx += numChars; // this iterates over every op string and decides which tags to open or to close // based on the attribs used while (iter.hasNext()) { var o = iter.next(); var usedAttribs = []; // mark all attribs as used Changeset.eachAttribNumber(o.attribs, function (a) { if (a in anumMap) { usedAttribs.push(anumMap[a]); // i = 0 => bold, etc. } }); var outermostTag = -1; // find the outer most open tag that is no longer used for (var i = openTags.length - 1; i >= 0; i--) { if (usedAttribs.indexOf(openTags[i]) === -1) { outermostTag = i; break; } } // close all tags upto the outer most if (outermostTag !== -1) { while (outermostTag >= 0) { emitCloseTag(openTags[0]); outermostTag--; } } // open all tags that are used but not open for (i = 0; i < usedAttribs.length; i++) { if (openTags.indexOf(usedAttribs[i]) === -1) { emitOpenTag(usedAttribs[i]); } } var chars = o.chars; if (o.lines) { chars--; // exclude newline at end of line, if present } var s = taker.take(chars); //removes the characters with the code 12. Don't know where they come //from but they break the abiword parser and are completly useless s = s.replace(String.fromCharCode(12), ''); assem.append(_encodeWhitespace(Security.escapeHTML(s))); } // end iteration over spans in line // close all the tags that are open after the last op while (openTags.length > 0) { emitCloseTag(openTags[0]); } } // end processNextChars if (urls) { urls.forEach(function (urlData) { var startIndex = urlData[0]; var url = urlData[1]; var urlLength = url.length; processNextChars(startIndex - idx); assem.append(''); processNextChars(urlLength); assem.append(''); }); } processNextChars(text.length - idx); return _processSpaces(assem.toString()); } // end getLineHTML var pieces = [css]; // Need to deal with constraints imposed on HTML lists; can // only gain one level of nesting at once, can't change type // mid-list, etc. // People might use weird indenting, e.g. skip a level, // so we want to do something reasonable there. We also // want to deal gracefully with blank lines. // => keeps track of the parents level of indentation var openLists = []; for (var i = 0; i < textLines.length; i++) { var context; var line = _analyzeLine(textLines[i], attribLines[i], apool); var lineContent = getLineHTML(line.text, line.aline); if (line.listLevel) { //If we are inside a list context = { line: line, lineContent: lineContent, apool: apool, attribLine: attribLines[i], text: textLines[i], padId: pad.id }; var prevLine = null; var nextLine = null; if (i > 0) { prevLine = _analyzeLine(textLines[i - 1], attribLines[i - 1], apool); } if (i < textLines.length) { nextLine = _analyzeLine(textLines[i + 1], attribLines[i + 1], apool); } hooks.callAll('getLineHTMLForExport', context); //To create list parent elements if ((!prevLine || prevLine.listLevel !== line.listLevel) || (prevLine && line.listTypeName !== prevLine.listTypeName)) { //pieces.push('
    '); var exists = _.find(openLists, function (item) { return (item.level === line.listLevel && item.type === line.listTypeName); }); if (!exists) { var prevLevel = prevLine.listLevel || 0; if (prevLine && line.listTypeName !== prevLine.listTypeName) { prevLevel = 0; } for (var diff = prevLevel; diff < line.listLevel; diff++) { openLists.push({level: diff, type: line.listTypeName}); var prevPiece = pieces[pieces.length - 1]; if (prevPiece.indexOf('') === 0) { pieces.push('
  • '); } if (line.listTypeName === 'number') { pieces.push('
      '); } else { pieces.push('
        '); } } } } pieces.push('
      • ', context.lineContent); // To close list elements if (nextLine && nextLine.listLevel === line.listLevel && line.listTypeName === nextLine.listTypeName) { pieces.push('
      • '); } if ((!nextLine || !nextLine.listLevel || nextLine.listLevel < line.listLevel) || (nextLine && line.listTypeName !== nextLine.listTypeName)) { var nextLevel = nextLine.listLevel || 0; if (nextLine && line.listTypeName !== nextLine.listTypeName) { nextLevel = 0; } for (var diff = nextLevel; diff < line.listLevel; diff++) { openLists = openLists.filter(function(el) { return el.level !== diff && el.type !== line.listTypeName; }); if (pieces[pieces.length - 1].indexOf(''); } if (line.listTypeName === 'number') { pieces.push('
    '); } else { pieces.push('
'); } } } } else { //outside any list, need to close line.listLevel of lists context = { line: line, lineContent: lineContent, apool: apool, attribLine: attribLines[i], text: textLines[i], padId: pad.id }; hooks.callAll('getLineHTMLForExport', context); pieces.push(context.lineContent, '
'); } } return pieces.join(''); } function getPadHTML (pad, revNum, callback) { var atext = pad.atext; var html; async.waterfall( [ // fetch revision atext function (callback) { if (revNum !== undefined) { pad.getInternalRevisionAText(revNum, function (err, revisionAtext) { if (eRR(err, callback)) return; atext = revisionAtext; return callback(); }); } else { return callback(null); } }, // convert atext to html function (callback) { html = getHTMLFromAtext(pad, atext); callback(null); } ], // run final callback function (err) { if (eRR(err, callback)) return; callback(null, html); } ); } exports.getPadHTML = getPadHTML; exports.getHTMLFromAtext = getHTMLFromAtext; exports.getPadHTMLDocument = function (padId, revNum, callback) { padManager.getPad(padId, function (err, pad) { if (eRR(err, callback)) return; var stylesForExportCSS = ''; // Include some Styles into the Head for Export hooks.aCallAll('stylesForExport', padId, function (err, stylesForExport) { if (eRR(err, callback)) return; stylesForExport.forEach(function (css) { stylesForExportCSS += css; }); getPadHTML(pad, revNum, function (err, html) { if (eRR(err, callback)) return; var exportedDoc = eejs.require('ep_etherpad-lite/templates/export_html.html', { body: html, padId: Security.escapeHTML(padId), extraCSS: stylesForExportCSS }); callback(null, exportedDoc); }); }); }); };