export html to original structure

This commit is contained in:
ilmar 2018-04-24 12:13:31 +03:00
parent 517b249394
commit d6fa065ef2

View file

@ -1,5 +1,3 @@
'use strict';
/**
* Copyright 2009 Google Inc.
*
@ -17,97 +15,63 @@
*/
var async = require('async');
var Changeset = require('ep_etherpad-lite/static/js/Changeset');
var padManager = require('../db/PadManager');
var eRR = require('async-stacktrace');
var async = require("async");
var Changeset = require("ep_etherpad-lite/static/js/Changeset");
var padManager = require("../db/PadManager");
var ERR = require("async-stacktrace");
var _ = require('underscore');
var Security = require('ep_etherpad-lite/static/js/security');
var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks');
var eejs = require('ep_etherpad-lite/node/eejs');
var _analyzeLine = require('./ExportHelper')._analyzeLine;
var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace;
// copied from ACE
var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
var _REGEX_URLCHAR = new RegExp('(' + (/[-:@a-zA-Z0-9_.,~%+/\\?=&#;()$]/).source + '|' + _REGEX_WORDCHAR.source + ')');
var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g');
// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...]
// copied from ACE
function _processSpaces (s) {
var doesWrap = true;
if (s.indexOf('<') < 0 && !doesWrap) {
// short-cut
return s.replace(/ /g, '&nbsp;');
}
var parts = [];
s.replace(/<[^>]*>?| |[^ <]+/g, function (m) {
parts.push(m);
function getPadHTML(pad, revNum, callback)
{
var atext = pad.atext;
var html;
async.waterfall([
// fetch revision atext
function (callback)
{
if (revNum != undefined)
{
pad.getInternalRevisionAText(revNum, function (err, revisionAtext)
{
if(ERR(err, callback)) return;
atext = revisionAtext;
callback();
});
if (doesWrap) {
var endOfLine = true;
var beforeSpace = false;
// last space in a run is normal, others are nbsp,
// end of line is nbsp
for (var i = parts.length - 1; i >= 0; i--) {
var p = parts[i];
if (p === ' ') {
if (endOfLine || beforeSpace) parts[i] = '&nbsp;';
endOfLine = false;
beforeSpace = true;
} else if (p.charAt(0) !== '<') {
endOfLine = false;
beforeSpace = false;
}
}
// beginning of line is nbsp
for (i = 0; i < parts.length; i++) {
p = parts[i];
if (p === ' ') {
parts[i] = '&nbsp;';
break;
} else if (p.charAt(0) !== '<') {
break;
}
}
} else {
for (i = 0; i < parts.length; i++) {
p = parts[i];
if (p === ' ') {
parts[i] = '&nbsp;';
}
else
{
callback(null);
}
},
// convert atext to html
function (callback)
{
html = getHTMLFromAtext(pad, atext);
callback(null);
}],
// run final callback
function (err)
{
if(ERR(err, callback)) return;
callback(null, html);
});
}
return parts.join('');
}
exports.getPadHTML = getPadHTML;
exports.getHTMLFromAtext = getHTMLFromAtext;
function _findURLs (text) {
_REGEX_URL.lastIndex = 0;
var urls = null;
var execResult;
while (execResult = _REGEX_URL.exec(text)) {
urls = urls || [];
var startIndex = execResult.index;
var url = execResult[0];
urls.push([startIndex, url]);
}
return urls;
}
/**
* Return docx from input html
*
* @param {text} [pad] text
* @param {text} [atext] text
* @param {text} [authorColors] text
*
* @returns {html} html
*
* @public
*/
function getHTMLFromAtext (pad, atext, authorColors) {
function getHTMLFromAtext(pad, atext, authorColors)
{
var apool = pad.apool();
var textLines = atext.text.slice(0, -1).split('\n');
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
@ -116,22 +80,16 @@ function getHTMLFromAtext (pad, atext, authorColors) {
var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough'];
// prepare tags stored as ['tag', true] to be exported
hooks.aCallAll('exportHtmlAdditionalTags', pad, function (err, newProps) {
if (err) {
return err;
}
newProps.forEach(function (propName) {
hooks.aCallAll("exportHtmlAdditionalTags", pad, function(err, newProps){
newProps.forEach(function (propName, i){
tags.push(propName);
props.push(propName);
});
});
// prepare tags stored as ['tag', 'value'] to be exported. This will generate HTML
// with tags like <span data-tag='value'>
hooks.aCallAll('exportHtmlAdditionalTagsWithData', pad, function (err, newProps) {
if (err) {
return err;
}
newProps.forEach(function (propName) {
// with tags like <span data-tag="value">
hooks.aCallAll("exportHtmlAdditionalTagsWithData", pad, function(err, newProps){
newProps.forEach(function (propName, i){
tags.push('span data-' + propName[0] + '="' + propName[1] + '"');
props.push(propName);
});
@ -142,49 +100,51 @@ function getHTMLFromAtext (pad, atext, authorColors) {
// *3:2 -> the attribute *3 means strong
// *2:5 -> the attribute *2 means s(trikethrough)
var anumMap = {};
var css = '';
var css = "";
var stripDotFromAuthorID = function(id){
return id.replace(/\./g,'_');
};
if(authorColors){
css += '<style>\n';
css+="<style>\n";
for (var a in apool.numToAttrib) {
if (apool.numToAttrib.hasOwnProperty.call(a)) {
var attr = apool.numToAttrib[a];
var newLength = null;
var propName = null;
//skip non author attributes
if (attr[0] === 'author' && attr[1] !== '') {
if (attr[0] === "author" && attr[1] !== ""){
//add to props array
propName = 'author' + stripDotFromAuthorID(attr[1]);
propName = "author" + stripDotFromAuthorID(attr[1]);
newLength = props.push(propName);
anumMap[a] = newLength - 1;
css += '.' + propName + ' {background-color: ' + authorColors[attr[1]] + '}\n';
} else if (attr[0] === 'removed') {
propName = 'removed';
css+="." + propName + " {background-color: " + authorColors[attr[1]]+ "}\n";
} else if(attr[0] === "removed") {
propName = "removed";
newLength = props.push(propName);
anumMap[a] = newLength -1;
css += '.removed {text-decoration: line-through; ' +
'-ms-filter:\'progid:DXImageTransform.Microsoft.Alpha(Opacity=80)\'; ' +
'filter: alpha(opacity=80); ' +
'opacity: 0.8; ' +
'}\n';
css+=".removed {text-decoration: line-through; " +
"-ms-filter:'progid:DXImageTransform.Microsoft.Alpha(Opacity=80)'; "+
"filter: alpha(opacity=80); "+
"opacity: 0.8; "+
"}\n";
}
}
}
css += '</style>';
css+="</style>";
}
// iterates over all props(h1,h2,strong,...), checks if it is used in
// this pad, and if yes puts its attrib id->props value into anumMap
props.forEach(function (propName, i) {
props.forEach(function (propName, i)
{
var attrib = [propName, true];
if (_.isArray(propName)) {
// propName can be in the form of ['color', 'red'],
@ -192,12 +152,14 @@ function getHTMLFromAtext (pad, atext, authorColors) {
attrib = propName;
}
var propTrueNum = apool.putAttrib(attrib, true);
if (propTrueNum >= 0) {
if (propTrueNum >= 0)
{
anumMap[propTrueNum] = i;
}
});
function getLineHTML (text, attribs) {
function getLineHTML(text, attribs)
{
// Use order of tags (b/i/u) as order of nesting, for simplicity
// and decent nesting. For example,
// <b>Just bold<b> <b><i>Bold and italics</i></b> <i>Just italics</i>
@ -219,12 +181,12 @@ function getHTMLFromAtext (pad, atext, authorColors) {
return false;
}
if (property.substr(0, 6) === 'author') {
if(property.substr(0,6) === "author"){
return stripDotFromAuthorID(property);
}
if (property === 'removed') {
return 'removed';
if(property === "removed"){
return "removed";
}
return false;
@ -234,11 +196,11 @@ function getHTMLFromAtext (pad, atext, authorColors) {
// data attributes
function isSpanWithData(i){
var property = props[i];
return _.isArray(property);
}
function emitOpenTag (i) {
function emitOpenTag(i)
{
openTags.unshift(i);
var spanClass = getSpanClassFor(i);
@ -254,7 +216,8 @@ function getHTMLFromAtext (pad, atext, authorColors) {
}
// this closes an open tag and removes its reference from openTags
function emitCloseTag (i) {
function emitCloseTag(i)
{
openTags.shift();
var spanClass = getSpanClassFor(i);
var spanWithData = isSpanWithData(i);
@ -272,8 +235,10 @@ function getHTMLFromAtext (pad, atext, authorColors) {
var idx = 0;
function processNextChars (numChars) {
if (numChars <= 0) {
function processNextChars(numChars)
{
if (numChars <= 0)
{
return;
}
@ -282,42 +247,52 @@ function getHTMLFromAtext (pad, atext, authorColors) {
// this iterates over every op string and decides which tags to open or to close
// based on the attribs used
while (iter.hasNext()) {
while (iter.hasNext())
{
var o = iter.next();
var usedAttribs = [];
// mark all attribs as used
Changeset.eachAttribNumber(o.attribs, function (a) {
if (a in anumMap) {
Changeset.eachAttribNumber(o.attribs, function (a)
{
if (a in anumMap)
{
usedAttribs.push(anumMap[a]); // i = 0 => bold, etc.
}
});
var outermostTag = -1;
// find the outer most open tag that is no longer used
for (var i = openTags.length - 1; i >= 0; i--) {
if (usedAttribs.indexOf(openTags[i]) === -1) {
for (var i = openTags.length - 1; i >= 0; i--)
{
if (usedAttribs.indexOf(openTags[i]) === -1)
{
outermostTag = i;
break;
}
}
// close all tags upto the outer most
if (outermostTag !== -1) {
while (outermostTag >= 0) {
if (outermostTag !== -1)
{
while ( outermostTag >= 0 )
{
emitCloseTag(openTags[0]);
outermostTag--;
}
}
// open all tags that are used but not open
for (i = 0; i < usedAttribs.length; i++) {
if (openTags.indexOf(usedAttribs[i]) === -1) {
for (i=0; i < usedAttribs.length; i++)
{
if (openTags.indexOf(usedAttribs[i]) === -1)
{
emitOpenTag(usedAttribs[i]);
}
}
var chars = o.chars;
if (o.lines) {
if (o.lines)
{
chars--; // exclude newline at end of line, if present
}
@ -325,19 +300,21 @@ function getHTMLFromAtext (pad, atext, authorColors) {
//removes the characters with the code 12. Don't know where they come
//from but they break the abiword parser and are completly useless
s = s.replace(String.fromCharCode(12), '');
s = s.replace(String.fromCharCode(12), "");
assem.append(_encodeWhitespace(Security.escapeHTML(s)));
} // end iteration over spans in line
// close all the tags that are open after the last op
while (openTags.length > 0) {
while (openTags.length > 0)
{
emitCloseTag(openTags[0]);
}
} // end processNextChars
if (urls) {
urls.forEach(function (urlData) {
if (urls)
{
urls.forEach(function (urlData)
{
var startIndex = urlData[0];
var url = urlData[1];
var urlLength = url.length;
@ -347,12 +324,10 @@ function getHTMLFromAtext (pad, atext, authorColors) {
assem.append('</a>');
});
}
processNextChars(text.length - idx);
return _processSpaces(assem.toString());
} // end getLineHTML
var pieces = [css];
// Need to deal with constraints imposed on HTML lists; can
@ -363,13 +338,14 @@ function getHTMLFromAtext (pad, atext, authorColors) {
// want to deal gracefully with blank lines.
// => keeps track of the parents level of indentation
var openLists = [];
for (var i = 0; i < textLines.length; i++) {
for (var i = 0; i < textLines.length; i++)
{
var context;
var line = _analyzeLine(textLines[i], attribLines[i], apool);
var lineContent = getLineHTML(line.text, line.aline);
if (line.listLevel) { //If we are inside a list
if (line.listLevel)//If we are inside a list
{
context = {
line: line,
lineContent: lineContent,
@ -380,22 +356,26 @@ function getHTMLFromAtext (pad, atext, authorColors) {
};
var prevLine = null;
var nextLine = null;
if (i > 0) {
if (i > 0)
{
prevLine = _analyzeLine(textLines[i -1], attribLines[i -1], apool);
}
if (i < textLines.length) {
if (i < textLines.length)
{
nextLine = _analyzeLine(textLines[i + 1], attribLines[i + 1], apool);
}
hooks.callAll('getLineHTMLForExport', context);
//To create list parent elements
if ((!prevLine || prevLine.listLevel !== line.listLevel) || (prevLine && line.listTypeName !== prevLine.listTypeName)) {
//pieces.push('<ul class="' + line.listTypeName + '">');
var exists = _.find(openLists, function (item) {
if ((!prevLine || prevLine.listLevel !== line.listLevel) || (prevLine && line.listTypeName !== prevLine.listTypeName))
{
var exists = _.find(openLists, function (item)
{
return (item.level === line.listLevel && item.type === line.listTypeName);
});
if (!exists) {
var prevLevel = prevLine.listLevel || 0;
if (prevLine && line.listTypeName !== prevLine.listTypeName) {
if (prevLine && line.listTypeName !== prevLine.listTypeName)
{
prevLevel = 0;
}
@ -403,49 +383,63 @@ function getHTMLFromAtext (pad, atext, authorColors) {
openLists.push({level: diff, type: line.listTypeName});
var prevPiece = pieces[pieces.length - 1];
if (prevPiece.indexOf('<ul') === 0 || prevPiece.indexOf('<ol') === 0 || prevPiece.indexOf('</li>') === 0) {
pieces.push('<li>');
if (prevPiece.indexOf("<ul") === 0 || prevPiece.indexOf("<ol") === 0 || prevPiece.indexOf("</li>") === 0)
{
pieces.push("<li>");
}
if (line.listTypeName === 'number') {
pieces.push('<ol class="' + line.listTypeName + '">');
} else {
pieces.push('<ul class="' + line.listTypeName + '">');
if (line.listTypeName === "number")
{
pieces.push("<ol class=\"" + line.listTypeName + "\">");
}
else
{
pieces.push("<ul class=\"" + line.listTypeName + "\">");
}
}
}
}
}
pieces.push('<li>', context.lineContent);
pieces.push("<li>", context.lineContent);
// To close list elements
if (nextLine && nextLine.listLevel === line.listLevel && line.listTypeName === nextLine.listTypeName) {
pieces.push('</li>');
if (nextLine && nextLine.listLevel === line.listLevel && line.listTypeName === nextLine.listTypeName)
{
pieces.push("</li>");
}
if ((!nextLine || !nextLine.listLevel || nextLine.listLevel < line.listLevel) || (nextLine && line.listTypeName !== nextLine.listTypeName)) {
if ((!nextLine || !nextLine.listLevel || nextLine.listLevel < line.listLevel) || (nextLine && line.listTypeName !== nextLine.listTypeName))
{
var nextLevel = nextLine.listLevel || 0;
if (nextLine && line.listTypeName !== nextLine.listTypeName) {
if (nextLine && line.listTypeName !== nextLine.listTypeName)
{
nextLevel = 0;
}
for (var diff = nextLevel; diff < line.listLevel; diff++) {
openLists = openLists.filter(function(el) {
for (var diff = nextLevel; diff < line.listLevel; diff++)
{
openLists = openLists.filter(function(el)
{
return el.level !== diff && el.type !== line.listTypeName;
});
if (pieces[pieces.length - 1].indexOf('</ul') === 0 || pieces[pieces.length - 1].indexOf('</ol') === 0) {
pieces.push('</li>');
if (pieces[pieces.length - 1].indexOf("</ul") === 0 || pieces[pieces.length - 1].indexOf("</ol") === 0)
{
pieces.push("</li>");
}
if (line.listTypeName === 'number') {
pieces.push('</ol>');
} else {
pieces.push('</ul>');
if (line.listTypeName === "number")
{
pieces.push("</ol>");
}
else
{
pieces.push("</ul>");
}
}
}
} else { //outside any list, need to close line.listLevel of lists
}
else//outside any list, need to close line.listLevel of lists
{
context = {
line: line,
lineContent: lineContent,
@ -455,66 +449,31 @@ function getHTMLFromAtext (pad, atext, authorColors) {
padId: pad.id
};
hooks.callAll('getLineHTMLForExport', context);
pieces.push(context.lineContent, '<br>');
hooks.callAll("getLineHTMLForExport", context);
pieces.push(context.lineContent, "<br>");
}
}
return pieces.join('');
}
function getPadHTML (pad, revNum, callback) {
var atext = pad.atext;
var html;
async.waterfall(
[
// fetch revision atext
function (callback) {
if (revNum !== undefined) {
pad.getInternalRevisionAText(revNum, function (err, revisionAtext) {
if (eRR(err, callback)) return;
atext = revisionAtext;
exports.getPadHTMLDocument = function (padId, revNum, callback)
{
padManager.getPad(padId, function (err, pad)
{
if(ERR(err, callback)) return;
return callback();
});
} else {
return callback(null);
}
},
// convert atext to html
function (callback) {
html = getHTMLFromAtext(pad, atext);
callback(null);
}
],
// run final callback
function (err) {
if (eRR(err, callback)) return;
callback(null, html);
}
);
}
exports.getPadHTML = getPadHTML;
exports.getHTMLFromAtext = getHTMLFromAtext;
exports.getPadHTMLDocument = function (padId, revNum, callback) {
padManager.getPad(padId, function (err, pad) {
if (eRR(err, callback)) return;
var stylesForExportCSS = '';
var stylesForExportCSS = "";
// Include some Styles into the Head for Export
hooks.aCallAll('stylesForExport', padId, function (err, stylesForExport) {
if (eRR(err, callback)) return;
hooks.aCallAll("stylesForExport", padId, function(err, stylesForExport){
stylesForExport.forEach(function(css){
stylesForExportCSS += css;
});
getPadHTML(pad, revNum, function (err, html) {
if (eRR(err, callback)) return;
var exportedDoc = eejs.require('ep_etherpad-lite/templates/export_html.html', {
getPadHTML(pad, revNum, function (err, html)
{
if(ERR(err, callback)) return;
var exportedDoc = eejs.require("ep_etherpad-lite/templates/export_html.html", {
body: html,
padId: Security.escapeHTML(padId),
extraCSS: stylesForExportCSS
@ -524,3 +483,81 @@ exports.getPadHTMLDocument = function (padId, revNum, callback) {
});
});
};
// copied from ACE
var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
var _REGEX_SPACE = /\s/;
var _REGEX_URLCHAR = new RegExp('(' + /[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source + '|' + _REGEX_WORDCHAR.source + ')');
var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g');
// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...]
function _findURLs(text)
{
_REGEX_URL.lastIndex = 0;
var urls = null;
var execResult;
while ((execResult = _REGEX_URL.exec(text)))
{
urls = (urls || []);
var startIndex = execResult.index;
var url = execResult[0];
urls.push([startIndex, url]);
}
return urls;
}
// copied from ACE
function _processSpaces(s){
var doesWrap = true;
if (s.indexOf("<") < 0 && !doesWrap){
// short-cut
return s.replace(/ /g, '&nbsp;');
}
var parts = [];
s.replace(/<[^>]*>?| |[^ <]+/g, function (m){
parts.push(m);
});
if (doesWrap){
var endOfLine = true;
var beforeSpace = false;
// last space in a run is normal, others are nbsp,
// end of line is nbsp
for (var i = parts.length - 1; i >= 0; i--){
var p = parts[i];
if (p == " "){
if (endOfLine || beforeSpace) parts[i] = '&nbsp;';
endOfLine = false;
beforeSpace = true;
}
else if (p.charAt(0) != "<"){
endOfLine = false;
beforeSpace = false;
}
}
// beginning of line is nbsp
for (i = 0; i < parts.length; i++){
p = parts[i];
if (p == " "){
parts[i] = '&nbsp;';
break;
}
else if (p.charAt(0) != "<"){
break;
}
}
}
else
{
for (i = 0; i < parts.length; i++){
p = parts[i];
if (p == " "){
parts[i] = '&nbsp;';
}
}
}
return parts.join('');
}