beautify exporthtml

This commit is contained in:
Peter 'Pita' Martischka 2011-08-10 17:31:20 +01:00
parent 33ec98dd35
commit dc10985ba3

View file

@ -13,27 +13,30 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
var async = require("async"); var async = require("async");
var Changeset = require("./Changeset"); var Changeset = require("./Changeset");
var padManager = require("../db/PadManager"); var padManager = require("../db/PadManager");
function getPadPlainText(pad, revNum)
function getPadPlainText(pad, revNum) { {
var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext());
pad.atext()); var textLines = atext.text.slice(0, -1).split('\n');
var textLines = atext.text.slice(0,-1).split('\n');
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
var apool = pad.pool(); var apool = pad.pool();
var pieces = []; var pieces = [];
for(var i=0;i<textLines.length;i++) { for (var i = 0; i < textLines.length; i++)
{
var line = _analyzeLine(textLines[i], attribLines[i], apool); var line = _analyzeLine(textLines[i], attribLines[i], apool);
if (line.listLevel) { if (line.listLevel)
var numSpaces = line.listLevel*2-1; {
var numSpaces = line.listLevel * 2 - 1;
var bullet = '*'; var bullet = '*';
pieces.push(new Array(numSpaces+1).join(' '), bullet, ' ', line.text, '\n'); pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n');
} }
else { else
{
pieces.push(line.text, '\n'); pieces.push(line.text, '\n');
} }
} }
@ -41,52 +44,68 @@ function getPadPlainText(pad, revNum) {
return pieces.join(''); return pieces.join('');
} }
function getPadHTML(pad, revNum, callback) { function getPadHTML(pad, revNum, callback)
{
var atext = pad.atext; var atext = pad.atext;
var html; var html;
async.waterfall([ async.waterfall([
// fetch revision atext // fetch revision atext
function (callback) {
if (revNum != undefined) {
pad.getInternalRevisionAText(revNum, function (err, revisionAtext) { function (callback)
atext = revisionAtext; {
callback(err); if (revNum != undefined)
}); {
} else { pad.getInternalRevisionAText(revNum, function (err, revisionAtext)
callback(null); {
} atext = revisionAtext;
}, callback(err);
});
// convert atext to html }
function (callback) { else
html = getHTMLFromAtext(pad, atext); {
callback(null); callback(null);
} }
], },
// run final callback
function (err) { // convert atext to html
callback(err, html);
}
); function (callback)
{
html = getHTMLFromAtext(pad, atext);
callback(null);
}],
// run final callback
function (err)
{
callback(err, html);
});
} }
function getHTMLFromAtext(pad, atext) { function getHTMLFromAtext(pad, atext)
{
var apool = pad.apool(); var apool = pad.apool();
var textLines = atext.text.slice(0,-1).split('\n'); var textLines = atext.text.slice(0, -1).split('\n');
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
var tags = ['h1', 'h2', 'strong','em','u','s']; var tags = ['h1', 'h2', 'strong', 'em', 'u', 's'];
var props = ['heading1', 'heading2', 'bold','italic','underline','strikethrough']; var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough'];
var anumMap = {}; var anumMap = {};
props.forEach(function(propName, i) { props.forEach(function (propName, i)
var propTrueNum = apool.putAttrib([propName,true], true); {
if (propTrueNum >= 0) { var propTrueNum = apool.putAttrib([propName, true], true);
if (propTrueNum >= 0)
{
anumMap[propTrueNum] = i; anumMap[propTrueNum] = i;
} }
}); });
function getLineHTML(text, attribs) { function getLineHTML(text, attribs)
{
var propVals = [false, false, false]; var propVals = [false, false, false];
var ENTER = 1; var ENTER = 1;
var STAY = 2; var STAY = 2;
@ -97,16 +116,18 @@ function getHTMLFromAtext(pad, atext) {
// <b>Just bold<b> <b><i>Bold and italics</i></b> <i>Just italics</i> // <b>Just bold<b> <b><i>Bold and italics</i></b> <i>Just italics</i>
// becomes // becomes
// <b>Just bold <i>Bold and italics</i></b> <i>Just italics</i> // <b>Just bold <i>Bold and italics</i></b> <i>Just italics</i>
var taker = Changeset.stringIterator(text); var taker = Changeset.stringIterator(text);
var assem = Changeset.stringAssembler(); var assem = Changeset.stringAssembler();
function emitOpenTag(i) { function emitOpenTag(i)
{
assem.append('<'); assem.append('<');
assem.append(tags[i]); assem.append(tags[i]);
assem.append('>'); assem.append('>');
} }
function emitCloseTag(i) {
function emitCloseTag(i)
{
assem.append('</'); assem.append('</');
assem.append(tags[i]); assem.append(tags[i]);
assem.append('>'); assem.append('>');
@ -115,101 +136,123 @@ function getHTMLFromAtext(pad, atext) {
var urls = _findURLs(text); var urls = _findURLs(text);
var idx = 0; var idx = 0;
function processNextChars(numChars) {
if (numChars <= 0) { function processNextChars(numChars)
{
if (numChars <= 0)
{
return; return;
} }
var iter = Changeset.opIterator(Changeset.subattribution(attribs, var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars));
idx, idx+numChars));
idx += numChars; idx += numChars;
while (iter.hasNext()) { while (iter.hasNext())
{
var o = iter.next(); var o = iter.next();
var propChanged = false; var propChanged = false;
Changeset.eachAttribNumber(o.attribs, function(a) { Changeset.eachAttribNumber(o.attribs, function (a)
if (a in anumMap) { {
if (a in anumMap)
{
var i = anumMap[a]; // i = 0 => bold, etc. var i = anumMap[a]; // i = 0 => bold, etc.
if (! propVals[i]) { if (!propVals[i])
{
propVals[i] = ENTER; propVals[i] = ENTER;
propChanged = true; propChanged = true;
} }
else { else
{
propVals[i] = STAY; propVals[i] = STAY;
} }
} }
}); });
for(var i=0;i<propVals.length;i++) { for (var i = 0; i < propVals.length; i++)
if (propVals[i] === true) { {
if (propVals[i] === true)
{
propVals[i] = LEAVE; propVals[i] = LEAVE;
propChanged = true; propChanged = true;
} }
else if (propVals[i] === STAY) { else if (propVals[i] === STAY)
{
propVals[i] = true; // set it back propVals[i] = true; // set it back
} }
} }
// now each member of propVal is in {false,LEAVE,ENTER,true} // now each member of propVal is in {false,LEAVE,ENTER,true}
// according to what happens at start of span // according to what happens at start of span
if (propChanged)
if (propChanged) { {
// leaving bold (e.g.) also leaves italics, etc. // leaving bold (e.g.) also leaves italics, etc.
var left = false; var left = false;
for(var i=0;i<propVals.length;i++) { for (var i = 0; i < propVals.length; i++)
{
var v = propVals[i]; var v = propVals[i];
if (! left) { if (!left)
if (v === LEAVE) { {
if (v === LEAVE)
{
left = true; left = true;
} }
} }
else { else
if (v === true) { {
if (v === true)
{
propVals[i] = STAY; // tag will be closed and re-opened propVals[i] = STAY; // tag will be closed and re-opened
} }
} }
} }
for(var i=propVals.length-1; i>=0; i--) { for (var i = propVals.length - 1; i >= 0; i--)
if (propVals[i] === LEAVE) { {
if (propVals[i] === LEAVE)
{
emitCloseTag(i); emitCloseTag(i);
propVals[i] = false; propVals[i] = false;
} }
else if (propVals[i] === STAY) { else if (propVals[i] === STAY)
{
emitCloseTag(i); emitCloseTag(i);
} }
} }
for(var i=0; i<propVals.length; i++) { for (var i = 0; i < propVals.length; i++)
if (propVals[i] === ENTER || propVals[i] === STAY) { {
if (propVals[i] === ENTER || propVals[i] === STAY)
{
emitOpenTag(i); emitOpenTag(i);
propVals[i] = true; propVals[i] = true;
} }
} }
// propVals is now all {true,false} again // propVals is now all {true,false} again
} // end if (propChanged) } // end if (propChanged)
var chars = o.chars; var chars = o.chars;
if (o.lines) { if (o.lines)
{
chars--; // exclude newline at end of line, if present chars--; // exclude newline at end of line, if present
} }
var s = taker.take(chars); var s = taker.take(chars);
assem.append(_escapeHTML(s)); assem.append(_escapeHTML(s));
} // end iteration over spans in line } // end iteration over spans in line
for (var i = propVals.length - 1; i >= 0; i--)
for(var i=propVals.length-1; i>=0; i--) { {
if (propVals[i]) { if (propVals[i])
{
emitCloseTag(i); emitCloseTag(i);
propVals[i] = false; propVals[i] = false;
} }
} }
} // end processNextChars } // end processNextChars
if (urls)
if (urls) { {
urls.forEach(function(urlData) { urls.forEach(function (urlData)
{
var startIndex = urlData[0]; var startIndex = urlData[0];
var url = urlData[1]; var url = urlData[1];
var urlLength = url.length; var urlLength = url.length;
processNextChars(startIndex - idx); processNextChars(startIndex - idx);
assem.append('<a href="'+url.replace(/\"/g, '&quot;')+'">'); assem.append('<a href="' + url.replace(/\"/g, '&quot;') + '">');
processNextChars(urlLength); processNextChars(urlLength);
assem.append('</a>'); assem.append('</a>');
}); });
@ -218,7 +261,6 @@ function getHTMLFromAtext(pad, atext) {
return _processSpaces(assem.toString()); return _processSpaces(assem.toString());
} // end getLineHTML } // end getLineHTML
var pieces = []; var pieces = [];
// Need to deal with constraints imposed on HTML lists; can // Need to deal with constraints imposed on HTML lists; can
@ -228,79 +270,98 @@ function getHTMLFromAtext(pad, atext) {
// so we want to do something reasonable there. We also // so we want to do something reasonable there. We also
// want to deal gracefully with blank lines. // want to deal gracefully with blank lines.
var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...] var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...]
for(var i=0;i<textLines.length;i++) { for (var i = 0; i < textLines.length; i++)
{
var line = _analyzeLine(textLines[i], attribLines[i], apool); var line = _analyzeLine(textLines[i], attribLines[i], apool);
var lineContent = getLineHTML(line.text, line.aline); var lineContent = getLineHTML(line.text, line.aline);
if (line.listLevel || lists.length > 0) { if (line.listLevel || lists.length > 0)
{
// do list stuff // do list stuff
var whichList = -1; // index into lists or -1 var whichList = -1; // index into lists or -1
if (line.listLevel) { if (line.listLevel)
{
whichList = lists.length; whichList = lists.length;
for(var j=lists.length-1;j>=0;j--) { for (var j = lists.length - 1; j >= 0; j--)
if (line.listLevel <= lists[j][0]) { {
if (line.listLevel <= lists[j][0])
{
whichList = j; whichList = j;
} }
} }
} }
if (whichList >= lists.length) { if (whichList >= lists.length)
{
lists.push([line.listLevel, line.listTypeName]); lists.push([line.listLevel, line.listTypeName]);
pieces.push('<ul><li>', lineContent || '<br>'); pieces.push('<ul><li>', lineContent || '<br>');
} }
else if (whichList == -1) { else if (whichList == -1)
if (line.text) { {
if (line.text)
{
// non-blank line, end all lists // non-blank line, end all lists
pieces.push(new Array(lists.length+1).join('</li></ul\n>')); pieces.push(new Array(lists.length + 1).join('</li></ul\n>'));
lists.length = 0; lists.length = 0;
pieces.push(lineContent, '<br>'); pieces.push(lineContent, '<br>');
} }
else { else
{
pieces.push('<br><br>'); pieces.push('<br><br>');
} }
} }
else { else
while (whichList < lists.length-1) { {
while (whichList < lists.length - 1)
{
pieces.push('</li></ul>'); pieces.push('</li></ul>');
lists.length--; lists.length--;
} }
pieces.push('</li><li>', lineContent || '<br>'); pieces.push('</li><li>', lineContent || '<br>');
} }
} }
else { else
{
pieces.push(lineContent, '<br>'); pieces.push(lineContent, '<br>');
} }
} }
pieces.push(new Array(lists.length+1).join('</li></ul>')); pieces.push(new Array(lists.length + 1).join('</li></ul>'));
return pieces.join(''); return pieces.join('');
} }
function _analyzeLine(text, aline, apool) { function _analyzeLine(text, aline, apool)
{
var line = {}; var line = {};
// identify list // identify list
var lineMarker = 0; var lineMarker = 0;
line.listLevel = 0; line.listLevel = 0;
if (aline) { if (aline)
{
var opIter = Changeset.opIterator(aline); var opIter = Changeset.opIterator(aline);
if (opIter.hasNext()) { if (opIter.hasNext())
{
var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool); var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool);
if (listType) { if (listType)
{
lineMarker = 1; lineMarker = 1;
listType = /([a-z]+)([12345678])/.exec(listType); listType = /([a-z]+)([12345678])/.exec(listType);
if (listType) { if (listType)
{
line.listTypeName = listType[1]; line.listTypeName = listType[1];
line.listLevel = Number(listType[2]); line.listLevel = Number(listType[2]);
} }
} }
} }
} }
if (lineMarker) { if (lineMarker)
{
line.text = text.substring(1); line.text = text.substring(1);
line.aline = Changeset.subattribution(aline, 1); line.aline = Changeset.subattribution(aline, 1);
} }
else { else
{
line.text = text; line.text = text;
line.aline = aline; line.aline = aline;
} }
@ -308,37 +369,32 @@ function _analyzeLine(text, aline, apool) {
return line; return line;
} }
exports.getPadHTMLDocument = function(padId, revNum, noDocType, callback) { exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback)
padManager.getPad(padId, function(err, pad) {
padManager.getPad(padId, function (err, pad)
{ {
if(err) if (err)
{ {
callback(err); callback(err);
return; return;
} }
var head = (noDocType?'':'<!doctype html>\n')+ var head = (noDocType ? '' : '<!doctype html>\n') + '<html lang="en">\n' + (noDocType ? '' : '<head>\n' + '<meta charset="utf-8">\n' + '<style> * { font-family: arial, sans-serif;\n' + 'font-size: 13px;\n' + 'line-height: 17px; }</style>\n' + '</head>\n') + '<body>';
'<html lang="en">\n'+
(noDocType?'':
'<head>\n'+
'<meta charset="utf-8">\n'+
'<style> * { font-family: arial, sans-serif;\n'+
'font-size: 13px;\n'+
'line-height: 17px; }</style>\n' +
'</head>\n')+
'<body>';
var foot = '</body>\n</html>\n'; var foot = '</body>\n</html>\n';
getPadHTML(pad, revNum, function (err, html) { getPadHTML(pad, revNum, function (err, html)
{
callback(err, head + html + foot); callback(err, head + html + foot);
}); });
}); });
} }
function _escapeHTML(s) { function _escapeHTML(s)
{
var re = /[&<>]/g; var re = /[&<>]/g;
if (! re.MAP) { if (!re.MAP)
{
// persisted across function calls! // persisted across function calls!
re.MAP = { re.MAP = {
'&': '&amp;', '&': '&amp;',
@ -346,53 +402,72 @@ function _escapeHTML(s) {
'>': '&gt;', '>': '&gt;',
}; };
} }
return s.replace(re, function(c) { return re.MAP[c]; }); return s.replace(re, function (c)
{
return re.MAP[c];
});
} }
// copied from ACE // copied from ACE
function _processSpaces(s) {
function _processSpaces(s)
{
var doesWrap = true; var doesWrap = true;
if (s.indexOf("<") < 0 && ! doesWrap) { if (s.indexOf("<") < 0 && !doesWrap)
{
// short-cut // short-cut
return s.replace(/ /g, '&nbsp;'); return s.replace(/ /g, '&nbsp;');
} }
var parts = []; var parts = [];
s.replace(/<[^>]*>?| |[^ <]+/g, function(m) { parts.push(m); }); s.replace(/<[^>]*>?| |[^ <]+/g, function (m)
if (doesWrap) { {
parts.push(m);
});
if (doesWrap)
{
var endOfLine = true; var endOfLine = true;
var beforeSpace = false; var beforeSpace = false;
// last space in a run is normal, others are nbsp, // last space in a run is normal, others are nbsp,
// end of line is nbsp // end of line is nbsp
for(var i=parts.length-1;i>=0;i--) { for (var i = parts.length - 1; i >= 0; i--)
{
var p = parts[i]; var p = parts[i];
if (p == " ") { if (p == " ")
if (endOfLine || beforeSpace) {
parts[i] = '&nbsp;'; if (endOfLine || beforeSpace) parts[i] = '&nbsp;';
endOfLine = false; endOfLine = false;
beforeSpace = true; beforeSpace = true;
} }
else if (p.charAt(0) != "<") { else if (p.charAt(0) != "<")
endOfLine = false; {
beforeSpace = false; endOfLine = false;
beforeSpace = false;
} }
} }
// beginning of line is nbsp // beginning of line is nbsp
for(var i=0;i<parts.length;i++) { for (var i = 0; i < parts.length; i++)
{
var p = parts[i]; var p = parts[i];
if (p == " ") { if (p == " ")
parts[i] = '&nbsp;'; {
break; parts[i] = '&nbsp;';
break;
} }
else if (p.charAt(0) != "<") { else if (p.charAt(0) != "<")
break; {
break;
} }
} }
} }
else { else
for(var i=0;i<parts.length;i++) { {
for (var i = 0; i < parts.length; i++)
{
var p = parts[i]; var p = parts[i];
if (p == " ") { if (p == " ")
parts[i] = '&nbsp;'; {
parts[i] = '&nbsp;';
} }
} }
} }
@ -403,15 +478,19 @@ function _processSpaces(s) {
// copied from ACE // copied from ACE
var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
var _REGEX_SPACE = /\s/; var _REGEX_SPACE = /\s/;
var _REGEX_URLCHAR = new RegExp('('+/[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source+'|'+_REGEX_WORDCHAR.source+')'); var _REGEX_URLCHAR = new RegExp('(' + /[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source + '|' + _REGEX_WORDCHAR.source + ')');
var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source+_REGEX_URLCHAR.source+'*(?![:.,;])'+_REGEX_URLCHAR.source, 'g'); var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g');
// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...] // returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...]
function _findURLs(text) {
function _findURLs(text)
{
_REGEX_URL.lastIndex = 0; _REGEX_URL.lastIndex = 0;
var urls = null; var urls = null;
var execResult; var execResult;
while ((execResult = _REGEX_URL.exec(text))) { while ((execResult = _REGEX_URL.exec(text)))
{
urls = (urls || []); urls = (urls || []);
var startIndex = execResult.index; var startIndex = execResult.index;
var url = execResult[0]; var url = execResult[0];