pad.libre-service.eu-etherpad/src/node/utils/ExportTxt.js

263 lines
7.6 KiB
JavaScript
Raw Normal View History

2021-01-21 22:06:52 +01:00
'use strict';
2013-02-10 18:34:34 +01:00
/**
2013-02-12 20:47:53 +01:00
* TXT export
*/
/*
* 2013 John McLear
2013-02-10 18:34:34 +01:00
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS-IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2021-01-21 22:06:52 +01:00
const Changeset = require('../../static/js/Changeset');
2020-11-23 19:24:19 +01:00
const padManager = require('../db/PadManager');
const _analyzeLine = require('./ExportHelper')._analyzeLine;
2013-02-10 18:34:34 +01:00
// This is slightly different than the HTML method as it passes the output to getTXTFromAText
2021-01-21 22:06:52 +01:00
const getPadTXT = async (pad, revNum) => {
let atext = pad.atext;
2013-02-10 18:34:34 +01:00
2021-01-21 22:06:52 +01:00
if (revNum !== undefined) {
// fetch revision atext
atext = await pad.getInternalRevisionAText(revNum);
}
2013-02-10 18:34:34 +01:00
// convert atext to html
return getTXTFromAtext(pad, atext);
2020-11-23 19:24:19 +01:00
};
2013-02-10 18:34:34 +01:00
// This is different than the functionality provided in ExportHtml as it provides formatting
// functionality that is designed specifically for TXT exports
2021-01-21 22:06:52 +01:00
const getTXTFromAtext = (pad, atext, authorColors) => {
2020-11-23 19:24:19 +01:00
const apool = pad.apool();
const textLines = atext.text.slice(0, -1).split('\n');
const attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
2013-02-10 18:34:34 +01:00
2020-11-23 19:24:19 +01:00
const props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough'];
const anumMap = {};
const css = '';
2013-02-10 18:34:34 +01:00
2020-11-23 19:24:19 +01:00
props.forEach((propName, i) => {
const propTrueNum = apool.putAttrib([propName, true], true);
if (propTrueNum >= 0) {
2013-02-10 18:34:34 +01:00
anumMap[propTrueNum] = i;
}
});
2021-01-21 22:06:52 +01:00
const getLineTXT = (text, attribs) => {
2020-11-23 19:24:19 +01:00
const propVals = [false, false, false];
const ENTER = 1;
const STAY = 2;
const LEAVE = 0;
2013-02-10 18:34:34 +01:00
// Use order of tags (b/i/u) as order of nesting, for simplicity
// and decent nesting. For example,
// <b>Just bold<b> <b><i>Bold and italics</i></b> <i>Just italics</i>
// becomes
// <b>Just bold <i>Bold and italics</i></b> <i>Just italics</i>
2020-11-23 19:24:19 +01:00
const taker = Changeset.stringIterator(text);
const assem = Changeset.stringAssembler();
2020-11-23 19:24:19 +01:00
let idx = 0;
2013-02-10 18:34:34 +01:00
2021-01-21 22:06:52 +01:00
const processNextChars = (numChars) => {
if (numChars <= 0) {
2013-02-10 18:34:34 +01:00
return;
}
2020-11-23 19:24:19 +01:00
const iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars));
2013-02-10 18:34:34 +01:00
idx += numChars;
while (iter.hasNext()) {
2020-11-23 19:24:19 +01:00
const o = iter.next();
2021-01-21 22:06:52 +01:00
let propChanged = false;
2020-11-23 19:24:19 +01:00
Changeset.eachAttribNumber(o.attribs, (a) => {
if (a in anumMap) {
2020-11-23 19:24:19 +01:00
const i = anumMap[a]; // i = 0 => bold, etc.
if (!propVals[i]) {
2013-02-10 18:34:34 +01:00
propVals[i] = ENTER;
propChanged = true;
} else {
2013-02-10 18:34:34 +01:00
propVals[i] = STAY;
}
}
});
2021-01-21 22:06:52 +01:00
for (let i = 0; i < propVals.length; i++) {
if (propVals[i] === true) {
2013-02-10 18:34:34 +01:00
propVals[i] = LEAVE;
propChanged = true;
} else if (propVals[i] === STAY) {
// set it back
propVals[i] = true;
2013-02-10 18:34:34 +01:00
}
}
2013-02-10 18:34:34 +01:00
// now each member of propVal is in {false,LEAVE,ENTER,true}
// according to what happens at start of span
if (propChanged) {
2013-02-10 18:34:34 +01:00
// leaving bold (e.g.) also leaves italics, etc.
2020-11-23 19:24:19 +01:00
let left = false;
2021-01-21 22:06:52 +01:00
for (let i = 0; i < propVals.length; i++) {
2020-11-23 19:24:19 +01:00
const v = propVals[i];
if (!left) {
if (v === LEAVE) {
2013-02-10 18:34:34 +01:00
left = true;
}
2020-11-23 19:24:19 +01:00
} else if (v === true) {
// tag will be closed and re-opened
propVals[i] = STAY;
2013-02-10 18:34:34 +01:00
}
}
2021-01-21 22:06:52 +01:00
const tags2close = [];
2013-02-10 18:34:34 +01:00
2021-01-21 22:06:52 +01:00
for (let i = propVals.length - 1; i >= 0; i--) {
if (propVals[i] === LEAVE) {
2020-11-23 19:24:19 +01:00
// emitCloseTag(i);
2013-02-10 18:34:34 +01:00
tags2close.push(i);
propVals[i] = false;
} else if (propVals[i] === STAY) {
2020-11-23 19:24:19 +01:00
// emitCloseTag(i);
2013-02-10 18:34:34 +01:00
tags2close.push(i);
}
}
2021-01-21 22:06:52 +01:00
for (let i = 0; i < propVals.length; i++) {
if (propVals[i] === ENTER || propVals[i] === STAY) {
2013-02-10 18:34:34 +01:00
propVals[i] = true;
}
}
// propVals is now all {true,false} again
} // end if (propChanged)
2020-11-23 19:24:19 +01:00
let chars = o.chars;
if (o.lines) {
// exclude newline at end of line, if present
chars--;
2013-02-10 18:34:34 +01:00
}
2020-11-23 19:24:19 +01:00
const s = taker.take(chars);
// removes the characters with the code 12. Don't know where they come
// from but they break the abiword parser and are completly useless
// s = s.replace(String.fromCharCode(12), "");
// remove * from s, it's just not needed on a blank line.. This stops
// plugins from being able to display * at the beginning of a line
// s = s.replace("*", ""); // Then remove it
2013-02-13 19:45:45 +01:00
assem.append(s);
2013-02-10 18:34:34 +01:00
} // end iteration over spans in line
2021-01-21 22:06:52 +01:00
const tags2close = [];
for (let i = propVals.length - 1; i >= 0; i--) {
if (propVals[i]) {
2013-02-10 18:34:34 +01:00
tags2close.push(i);
propVals[i] = false;
}
}
2021-01-21 22:06:52 +01:00
};
// end processNextChars
2013-02-10 18:34:34 +01:00
processNextChars(text.length - idx);
2020-11-23 19:24:19 +01:00
return (assem.toString());
2021-01-21 22:06:52 +01:00
};
// end getLineHTML
2020-11-23 19:24:19 +01:00
const pieces = [css];
2013-02-10 18:34:34 +01:00
// Need to deal with constraints imposed on HTML lists; can
// only gain one level of nesting at once, can't change type
// mid-list, etc.
// People might use weird indenting, e.g. skip a level,
// so we want to do something reasonable there. We also
// want to deal gracefully with blank lines.
// => keeps track of the parents level of indentation
2020-11-23 19:24:19 +01:00
const listNumbers = {};
let prevListLevel;
2020-11-23 19:24:19 +01:00
for (let i = 0; i < textLines.length; i++) {
const line = _analyzeLine(textLines[i], attribLines[i], apool);
let lineContent = getLineTXT(line.text, line.aline);
2021-01-21 22:06:52 +01:00
if (line.listTypeName === 'bullet') {
2020-11-23 19:24:19 +01:00
lineContent = `* ${lineContent}`; // add a bullet
2013-02-10 18:34:34 +01:00
}
2020-11-23 19:24:19 +01:00
if (line.listTypeName !== 'number') {
// We're no longer in an OL so we can reset counting
2020-11-23 19:24:19 +01:00
for (const key in listNumbers) {
delete listNumbers[key];
}
}
if (line.listLevel > 0) {
2020-11-23 19:24:19 +01:00
for (let j = line.listLevel - 1; j >= 0; j--) {
pieces.push('\t'); // tab indent list numbers..
2020-11-23 19:24:19 +01:00
if (!listNumbers[line.listLevel]) {
listNumbers[line.listLevel] = 0;
}
2013-02-10 18:34:34 +01:00
}
2021-01-21 22:06:52 +01:00
if (line.listTypeName === 'number') {
/*
* listLevel == amount of indentation
* listNumber(s) == item number
*
* Example:
* 1. foo
* 1.1 bah
* 2. latte
* 2.1 latte
*
* To handle going back to 2.1 when prevListLevel is lower number
* than current line.listLevel then reset the object value
*/
2020-11-23 19:24:19 +01:00
if (line.listLevel < prevListLevel) {
delete listNumbers[prevListLevel];
}
listNumbers[line.listLevel]++;
2020-11-23 19:24:19 +01:00
if (line.listLevel > 1) {
let x = 1;
while (x <= line.listLevel - 1) {
pieces.push(`${listNumbers[x]}.`);
x++;
}
}
2020-11-23 19:24:19 +01:00
pieces.push(`${listNumbers[line.listLevel]}. `);
prevListLevel = line.listLevel;
2013-02-10 18:34:34 +01:00
}
2013-02-10 18:34:34 +01:00
pieces.push(lineContent, '\n');
} else {
2013-02-10 18:34:34 +01:00
pieces.push(lineContent, '\n');
}
}
return pieces.join('');
2021-01-21 22:06:52 +01:00
};
exports.getTXTFromAtext = getTXTFromAtext;
2013-02-10 18:34:34 +01:00
2021-01-21 22:06:52 +01:00
exports.getPadTXTDocument = async (padId, revNum) => {
2020-11-23 19:24:19 +01:00
const pad = await padManager.getPad(padId);
return getPadTXT(pad, revNum);
2020-11-23 19:24:19 +01:00
};