pad.libre-service.eu-etherpad/src/node/utils/ExportHtml.js

589 lines
17 KiB
JavaScript
Raw Normal View History

2011-07-06 14:57:07 +02:00
/**
* Copyright 2009 Google Inc.
2013-01-27 17:45:09 +01:00
*
2011-07-06 14:57:07 +02:00
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
2013-01-27 17:45:09 +01:00
*
2011-07-06 14:57:07 +02:00
* http://www.apache.org/licenses/LICENSE-2.0
2013-01-27 17:45:09 +01:00
*
2011-07-06 14:57:07 +02:00
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS-IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2011-08-10 18:31:20 +02:00
2012-02-28 21:19:10 +01:00
2011-07-06 14:57:07 +02:00
var async = require("async");
var Changeset = require("ep_etherpad-lite/static/js/Changeset");
2011-07-27 19:52:23 +02:00
var padManager = require("../db/PadManager");
var ERR = require("async-stacktrace");
var Security = require('ep_etherpad-lite/static/js/security');
2012-08-11 20:02:01 +02:00
var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks');
var _analyzeLine = require('./ExportHelper')._analyzeLine;
var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace;
2011-07-06 14:57:07 +02:00
2011-08-10 18:31:20 +02:00
function getPadHTML(pad, revNum, callback)
{
2011-07-06 14:57:07 +02:00
var atext = pad.atext;
var html;
async.waterfall([
2011-08-10 18:31:20 +02:00
// fetch revision atext
function (callback)
{
if (revNum != undefined)
{
pad.getInternalRevisionAText(revNum, function (err, revisionAtext)
{
if(ERR(err, callback)) return;
2011-08-10 18:31:20 +02:00
atext = revisionAtext;
callback();
2011-08-10 18:31:20 +02:00
});
2011-07-06 14:57:07 +02:00
}
2011-08-10 18:31:20 +02:00
else
{
callback(null);
2011-07-06 14:57:07 +02:00
}
2011-08-10 18:31:20 +02:00
},
// convert atext to html
function (callback)
{
html = getHTMLFromAtext(pad, atext);
callback(null);
}],
// run final callback
function (err)
{
if(ERR(err, callback)) return;
callback(null, html);
2011-08-10 18:31:20 +02:00
});
2011-07-06 14:57:07 +02:00
}
exports.getPadHTML = getPadHTML;
2013-01-27 18:25:50 +01:00
exports.getHTMLFromAtext = getHTMLFromAtext;
2013-01-27 18:25:50 +01:00
function getHTMLFromAtext(pad, atext, authorColors)
2011-08-10 18:31:20 +02:00
{
2011-07-06 14:57:07 +02:00
var apool = pad.apool();
2011-08-10 18:31:20 +02:00
var textLines = atext.text.slice(0, -1).split('\n');
2011-07-06 14:57:07 +02:00
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
2011-08-10 18:31:20 +02:00
var tags = ['h1', 'h2', 'strong', 'em', 'u', 's'];
var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough'];
2014-12-09 17:13:52 +01:00
hooks.aCallAll("exportHtmlAdditionalTags", pad, function(err, newProps){
newProps.forEach(function (propName, i){
tags.push(propName);
props.push(propName);
});
});
// holds a map of used styling attributes (*1, *2, etc) in the apool
// and maps them to an index in props
// *3:2 -> the attribute *3 means strong
// *2:5 -> the attribute *2 means s(trikethrough)
2011-07-06 14:57:07 +02:00
var anumMap = {};
2013-01-27 17:45:09 +01:00
var css = "";
var stripDotFromAuthorID = function(id){
return id.replace(/\./g,'_');
};
if(authorColors){
css+="<style>\n";
2013-01-27 17:45:09 +01:00
for (var a in apool.numToAttrib) {
var attr = apool.numToAttrib[a];
2013-01-27 17:45:09 +01:00
//skip non author attributes
if(attr[0] === "author" && attr[1] !== ""){
//add to props array
var propName = "author" + stripDotFromAuthorID(attr[1]);
var newLength = props.push(propName);
anumMap[a] = newLength -1;
2013-01-27 17:45:09 +01:00
css+="." + propName + " {background-color: " + authorColors[attr[1]]+ "}\n";
} else if(attr[0] === "removed") {
var propName = "removed";
2013-01-27 17:45:09 +01:00
var newLength = props.push(propName);
anumMap[a] = newLength -1;
2013-01-27 17:45:09 +01:00
css+=".removed {text-decoration: line-through; " +
"-ms-filter:'progid:DXImageTransform.Microsoft.Alpha(Opacity=80)'; "+
"filter: alpha(opacity=80); "+
"opacity: 0.8; "+
"}\n";
}
}
2013-01-27 17:45:09 +01:00
css+="</style>";
}
2011-08-10 18:31:20 +02:00
// iterates over all props(h1,h2,strong,...), checks if it is used in
// this pad, and if yes puts its attrib id->props value into anumMap
2011-08-10 18:31:20 +02:00
props.forEach(function (propName, i)
{
var propTrueNum = apool.putAttrib([propName, true], true);
if (propTrueNum >= 0)
{
2011-07-06 14:57:07 +02:00
anumMap[propTrueNum] = i;
}
});
2011-08-10 18:31:20 +02:00
function getLineHTML(text, attribs)
{
2011-07-06 14:57:07 +02:00
// Use order of tags (b/i/u) as order of nesting, for simplicity
// and decent nesting. For example,
// <b>Just bold<b> <b><i>Bold and italics</i></b> <i>Just italics</i>
// becomes
// <b>Just bold <i>Bold and italics</i></b> <i>Just italics</i>
var taker = Changeset.stringIterator(text);
var assem = Changeset.stringAssembler();
var openTags = [];
2013-01-27 17:45:09 +01:00
function getSpanClassFor(i){
//return if author colors are disabled
if (!authorColors) return false;
2013-01-27 17:45:09 +01:00
var property = props[i];
2013-01-27 17:45:09 +01:00
if(property.substr(0,6) === "author"){
return stripDotFromAuthorID(property);
}
2013-01-27 17:45:09 +01:00
if(property === "removed"){
return "removed";
}
2013-01-27 17:45:09 +01:00
return false;
}
2011-08-10 18:31:20 +02:00
function emitOpenTag(i)
{
openTags.unshift(i);
2013-01-27 17:45:09 +01:00
var spanClass = getSpanClassFor(i);
2013-01-27 17:45:09 +01:00
if(spanClass){
assem.append('<span class="');
assem.append(spanClass);
assem.append('">');
} else {
assem.append('<');
assem.append(tags[i]);
assem.append('>');
}
2011-07-06 14:57:07 +02:00
}
2011-08-10 18:31:20 +02:00
// this closes an open tag and removes its reference from openTags
2011-08-10 18:31:20 +02:00
function emitCloseTag(i)
{
openTags.shift();
2013-01-27 17:45:09 +01:00
var spanClass = getSpanClassFor(i);
2013-01-27 17:45:09 +01:00
if(spanClass){
assem.append('</span>');
} else {
assem.append('</');
assem.append(tags[i]);
assem.append('>');
}
2011-07-06 14:57:07 +02:00
}
2011-07-06 14:57:07 +02:00
var urls = _findURLs(text);
var idx = 0;
2011-08-10 18:31:20 +02:00
function processNextChars(numChars)
{
if (numChars <= 0)
{
2011-07-06 14:57:07 +02:00
return;
}
2011-08-10 18:31:20 +02:00
var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars));
2011-07-06 14:57:07 +02:00
idx += numChars;
// this iterates over every op string and decides which tags to open or to close
// based on the attribs used
2011-08-10 18:31:20 +02:00
while (iter.hasNext())
{
2011-07-06 14:57:07 +02:00
var o = iter.next();
var usedAttribs = [];
// mark all attribs as used
2011-08-10 18:31:20 +02:00
Changeset.eachAttribNumber(o.attribs, function (a)
{
if (a in anumMap)
{
usedAttribs.push(anumMap[a]); // i = 0 => bold, etc.
2011-07-06 14:57:07 +02:00
}
});
var outermostTag = -1;
// find the outer most open tag that is no longer used
for (var i = openTags.length - 1; i >= 0; i--)
2011-08-10 18:31:20 +02:00
{
if (usedAttribs.indexOf(openTags[i]) === -1)
2011-08-10 18:31:20 +02:00
{
outermostTag = i;
break;
2011-07-06 14:57:07 +02:00
}
}
2013-12-08 11:31:23 +01:00
// close all tags upto the outer most
if (outermostTag != -1)
2011-08-10 18:31:20 +02:00
{
2013-12-08 13:53:26 +01:00
while ( outermostTag >= 0 )
2011-08-10 18:31:20 +02:00
{
2013-12-08 13:53:26 +01:00
emitCloseTag(openTags[0]);
outermostTag--;
2011-07-06 14:57:07 +02:00
}
}
2011-07-06 14:57:07 +02:00
// open all tags that are used but not open
for (i=0; i < usedAttribs.length; i++)
{
if (openTags.indexOf(usedAttribs[i]) === -1)
2011-08-10 18:31:20 +02:00
{
emitOpenTag(usedAttribs[i])
2011-07-06 14:57:07 +02:00
}
}
2011-07-06 14:57:07 +02:00
var chars = o.chars;
2011-08-10 18:31:20 +02:00
if (o.lines)
{
2011-07-06 14:57:07 +02:00
chars--; // exclude newline at end of line, if present
}
2013-12-08 11:37:47 +01:00
2011-07-06 14:57:07 +02:00
var s = taker.take(chars);
2013-12-08 11:37:47 +01:00
//removes the characters with the code 12. Don't know where they come
//from but they break the abiword parser and are completly useless
s = s.replace(String.fromCharCode(12), "");
2013-12-08 11:37:47 +01:00
assem.append(_encodeWhitespace(Security.escapeHTML(s)));
2011-07-06 14:57:07 +02:00
} // end iteration over spans in line
2013-12-08 11:37:47 +01:00
// close all the tags that are open after the last op
while (openTags.length > 0)
2011-08-10 18:31:20 +02:00
{
emitCloseTag(openTags[0])
2011-07-06 14:57:07 +02:00
}
} // end processNextChars
2011-08-10 18:31:20 +02:00
if (urls)
{
urls.forEach(function (urlData)
{
2011-07-06 14:57:07 +02:00
var startIndex = urlData[0];
var url = urlData[1];
var urlLength = url.length;
processNextChars(startIndex - idx);
assem.append('<a href="' + Security.escapeHTMLAttribute(url) + '">');
2011-07-06 14:57:07 +02:00
processNextChars(urlLength);
assem.append('</a>');
});
}
processNextChars(text.length - idx);
return _processSpaces(assem.toString());
} // end getLineHTML
2013-01-27 17:45:09 +01:00
var pieces = [css];
2011-07-06 14:57:07 +02:00
// Need to deal with constraints imposed on HTML lists; can
// only gain one level of nesting at once, can't change type
// mid-list, etc.
// People might use weird indenting, e.g. skip a level,
// so we want to do something reasonable there. We also
// want to deal gracefully with blank lines.
2012-01-15 18:20:20 +01:00
// => keeps track of the parents level of indentation
2011-07-06 14:57:07 +02:00
var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...]
2014-12-29 16:27:40 +01:00
var listLevels = []
2011-08-10 18:31:20 +02:00
for (var i = 0; i < textLines.length; i++)
{
2011-07-06 14:57:07 +02:00
var line = _analyzeLine(textLines[i], attribLines[i], apool);
var lineContent = getLineHTML(line.text, line.aline);
2014-12-29 16:27:40 +01:00
listLevels.push(line.listLevel)
2012-01-15 18:20:20 +01:00
if (line.listLevel)//If we are inside a list
2011-08-10 18:31:20 +02:00
{
2011-07-06 14:57:07 +02:00
// do list stuff
var whichList = -1; // index into lists or -1
2011-08-10 18:31:20 +02:00
if (line.listLevel)
{
2011-07-06 14:57:07 +02:00
whichList = lists.length;
2011-08-10 18:31:20 +02:00
for (var j = lists.length - 1; j >= 0; j--)
{
if (line.listLevel <= lists[j][0])
{
2011-07-06 14:57:07 +02:00
whichList = j;
}
}
}
2012-01-15 18:20:20 +01:00
if (whichList >= lists.length)//means we are on a deeper level of indentation than the previous line
2011-08-10 18:31:20 +02:00
{
if(lists.length > 0){
pieces.push('</li>')
}
2011-07-06 14:57:07 +02:00
lists.push([line.listLevel, line.listTypeName]);
// if there is a previous list we need to open x tags, where x is the difference of the levels
// if there is no previous list we need to open x tags, where x is the wanted level
var toOpen = lists.length > 1 ? line.listLevel - lists[lists.length - 2][0] - 1 : line.listLevel - 1
2012-01-15 18:20:20 +01:00
if(line.listTypeName == "number")
{
if(toOpen > 0){
pieces.push(new Array(toOpen + 1).join('<ol>'))
}
2012-01-15 18:20:20 +01:00
pieces.push('<ol class="'+line.listTypeName+'"><li>', lineContent || '<br>');
}
else
{
if(toOpen > 0){
pieces.push(new Array(toOpen + 1).join('<ul>'))
}
2012-01-15 18:20:20 +01:00
pieces.push('<ul class="'+line.listTypeName+'"><li>', lineContent || '<br>');
}
2011-07-06 14:57:07 +02:00
}
2012-01-15 18:20:20 +01:00
//the following code *seems* dead after my patch.
//I keep it just in case I'm wrong...
/*else if (whichList == -1)//means we are not inside a list
2011-08-10 18:31:20 +02:00
{
if (line.text)
{
2012-01-15 18:20:20 +01:00
console.log('trace 1');
2011-07-06 14:57:07 +02:00
// non-blank line, end all lists
2012-01-15 18:20:20 +01:00
if(line.listTypeName == "number")
{
pieces.push(new Array(lists.length + 1).join('</li></ol>'));
}
else
{
pieces.push(new Array(lists.length + 1).join('</li></ul>'));
}
2011-07-06 14:57:07 +02:00
lists.length = 0;
pieces.push(lineContent, '<br>');
2011-07-06 14:57:07 +02:00
}
2011-08-10 18:31:20 +02:00
else
{
2012-01-15 18:20:20 +01:00
console.log('trace 2');
pieces.push('<br><br>');
2011-07-06 14:57:07 +02:00
}
2012-01-15 18:20:20 +01:00
}*/
else//means we are getting closer to the lowest level of indentation or are at the same level
2011-08-10 18:31:20 +02:00
{
2014-12-29 16:27:40 +01:00
var toClose = lists.length > 0 ? listLevels[listLevels.length - 2] - line.listLevel : 0
if( toClose > 0){
pieces.push('</li>')
2012-01-15 18:20:20 +01:00
if(lists[lists.length - 1][1] == "number")
{
2014-12-29 16:27:40 +01:00
pieces.push(new Array(toClose+1).join('</ol>'))
pieces.push('<li>', lineContent || '<br>');
2012-01-15 18:20:20 +01:00
}
else
{
2014-12-29 16:27:40 +01:00
pieces.push(new Array(toClose+1).join('</ul>'))
pieces.push('<li>', lineContent || '<br>');
2012-01-15 18:20:20 +01:00
}
2014-12-29 16:27:40 +01:00
lists = lists.slice(0,whichList+1)
} else {
pieces.push('</li><li>', lineContent || '<br>');
2011-07-06 14:57:07 +02:00
}
}
}
else//outside any list, need to close line.listLevel of lists
2011-08-10 18:31:20 +02:00
{
if(lists.length > 0){
if(lists[lists.length - 1][1] == "number"){
2012-01-15 18:20:20 +01:00
pieces.push('</li></ol>');
2014-12-29 16:27:40 +01:00
pieces.push(new Array(listLevels[listLevels.length - 2]).join('</ol>'))
} else {
2012-01-15 18:20:20 +01:00
pieces.push('</li></ul>');
2014-12-29 16:27:40 +01:00
pieces.push(new Array(listLevels[listLevels.length - 2]).join('</ul>'))
2012-01-15 18:20:20 +01:00
}
}
lists = []
var lineContentFromHook = hooks.callAllStr("getLineHTMLForExport",
2012-08-11 22:44:42 +02:00
{
line: line,
apool: apool,
attribLine: attribLines[i],
text: textLines[i]
}, " ", " ", "");
if (lineContentFromHook)
{
pieces.push(lineContentFromHook, '');
}
else
{
pieces.push(lineContent, '<br>');
}
2011-07-06 14:57:07 +02:00
}
}
2012-01-15 18:20:20 +01:00
for (var k = lists.length - 1; k >= 0; k--)
{
if(lists[k][1] == "number")
{
pieces.push('</li></ol>');
}
else
{
pieces.push('</li></ul>');
}
}
2011-07-06 14:57:07 +02:00
return pieces.join('');
}
2011-08-10 18:31:20 +02:00
exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback)
{
padManager.getPad(padId, function (err, pad)
2011-07-08 19:33:01 +02:00
{
if(ERR(err, callback)) return;
2011-08-10 18:31:20 +02:00
2014-12-09 18:34:52 +01:00
var stylesForExportCSS = "";
2014-12-08 20:08:12 +01:00
// Include some Styles into the Head for Export
2014-12-08 20:44:40 +01:00
hooks.aCallAll("stylesForExport", padId, function(err, stylesForExport){
2014-12-09 18:34:52 +01:00
stylesForExport.forEach(function(css){
stylesForExportCSS += css;
});
2014-12-08 20:44:40 +01:00
// Core inclusion of head etc.
var head =
(noDocType ? '' : '<!doctype html>\n') +
'<html lang="en">\n' + (noDocType ? '' : '<head>\n' +
'<title>' + Security.escapeHTML(padId) + '</title>\n' +
'<meta charset="utf-8">\n' +
'<style> * { font-family: arial, sans-serif;\n' +
'font-size: 13px;\n' +
'line-height: 17px; }' +
'ul.indent { list-style-type: none; }' +
'ol { list-style-type: none; }' +
'body > ol { counter-reset: first second; }' +
'ol > li:before {' +
'content: counter(first) ". " ;'+
'counter-increment: first;}' +
'ol > li > ol > li:before {' +
'content: counter(first) "." counter(second) ". " ;'+
'counter-increment: second;}' +
'ol > li > ol > li > ol > li:before {' +
'content: counter(first) "." counter(second) "." counter(third) ". ";'+
'counter-increment: third;}' +
'ol > li > ol > li > ol > li > ol > li:before {' +
'content: counter(first) "." counter(second) "." counter(third) "." counter(fourth) ". ";'+
'counter-increment: fourth;}' +
'ol > li > ol > li > ol > li > ol > li > ol > li:before {' +
'content: counter(first) "." counter(second) "." counter(third) "." counter(fourth) "." counter(fifth) ". ";'+
'counter-increment: fifth;}' +
'ol > li > ol > li > ol > li > ol > li > ol > li > ol > li:before {' +
'content: counter(first) "." counter(second) "." counter(third) "." counter(fourth) "." counter(fifth) "." counter(sixth) ". ";'+
'counter-increment: sixth;}' +
2014-12-09 18:34:52 +01:00
stylesForExportCSS +
2014-12-08 20:44:40 +01:00
'</style>\n' + '</head>\n') +
'<body>';
var foot = '</body>\n</html>\n';
getPadHTML(pad, revNum, function (err, html)
{
if(ERR(err, callback)) return;
callback(null, head + html + foot);
});
2011-07-08 19:33:01 +02:00
});
2011-07-06 14:57:07 +02:00
});
};
2011-07-06 14:57:07 +02:00
// copied from ACE
var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
var _REGEX_SPACE = /\s/;
2011-08-10 18:31:20 +02:00
var _REGEX_URLCHAR = new RegExp('(' + /[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source + '|' + _REGEX_WORDCHAR.source + ')');
var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g');
2011-07-06 14:57:07 +02:00
// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...]
2011-08-10 18:31:20 +02:00
function _findURLs(text)
{
2011-07-06 14:57:07 +02:00
_REGEX_URL.lastIndex = 0;
var urls = null;
var execResult;
2011-08-10 18:31:20 +02:00
while ((execResult = _REGEX_URL.exec(text)))
{
2011-07-06 14:57:07 +02:00
urls = (urls || []);
var startIndex = execResult.index;
var url = execResult[0];
urls.push([startIndex, url]);
}
return urls;
}
// copied from ACE
function _processSpaces(s){
var doesWrap = true;
if (s.indexOf("<") < 0 && !doesWrap){
// short-cut
return s.replace(/ /g, '&nbsp;');
}
var parts = [];
s.replace(/<[^>]*>?| |[^ <]+/g, function (m){
parts.push(m);
});
if (doesWrap){
var endOfLine = true;
var beforeSpace = false;
// last space in a run is normal, others are nbsp,
// end of line is nbsp
for (var i = parts.length - 1; i >= 0; i--){
var p = parts[i];
if (p == " "){
if (endOfLine || beforeSpace) parts[i] = '&nbsp;';
endOfLine = false;
beforeSpace = true;
}
else if (p.charAt(0) != "<"){
endOfLine = false;
beforeSpace = false;
}
}
// beginning of line is nbsp
for (i = 0; i < parts.length; i++){
p = parts[i];
if (p == " "){
parts[i] = '&nbsp;';
break;
}
else if (p.charAt(0) != "<"){
break;
}
}
}
else
{
for (i = 0; i < parts.length; i++){
p = parts[i];
if (p == " "){
parts[i] = '&nbsp;';
}
}
}
return parts.join('');
}