2011-11-17 10:17:52 +01:00
|
|
|
/**
|
|
|
|
* Copyright Yaco Sistemas S.L. 2011.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS-IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2020-11-23 19:24:19 +01:00
|
|
|
const log4js = require('log4js');
|
|
|
|
const Changeset = require('ep_etherpad-lite/static/js/Changeset');
|
|
|
|
const contentcollector = require('ep_etherpad-lite/static/js/contentcollector');
|
|
|
|
const cheerio = require('cheerio');
|
|
|
|
const rehype = require('rehype');
|
2020-12-05 09:14:09 +01:00
|
|
|
const minifyWhitespace = require('rehype-minify-whitespace');
|
2011-11-17 10:17:52 +01:00
|
|
|
|
2020-09-17 03:06:15 +02:00
|
|
|
exports.setPadHTML = async (pad, html) => {
|
2020-11-23 19:24:19 +01:00
|
|
|
const apiLogger = log4js.getLogger('ImportHtml');
|
2011-11-17 10:17:52 +01:00
|
|
|
|
2020-06-05 21:54:16 +02:00
|
|
|
rehype()
|
2020-12-16 22:51:43 +01:00
|
|
|
.use(minifyWhitespace, {newlines: false})
|
|
|
|
.process(html, (err, output) => {
|
|
|
|
html = String(output);
|
|
|
|
});
|
2020-06-05 21:54:16 +02:00
|
|
|
|
2020-11-23 19:24:19 +01:00
|
|
|
const $ = cheerio.load(html);
|
2013-09-21 17:11:56 +02:00
|
|
|
|
2014-11-26 16:19:22 +01:00
|
|
|
// Appends a line break, used by Etherpad to ensure a caret is available
|
|
|
|
// below the last line of an import
|
2020-11-23 19:24:19 +01:00
|
|
|
$('body').append('<p></p>');
|
2014-11-26 16:19:22 +01:00
|
|
|
|
2020-11-23 19:24:19 +01:00
|
|
|
const doc = $('html')[0];
|
2011-11-17 10:17:52 +01:00
|
|
|
apiLogger.debug('html:');
|
|
|
|
apiLogger.debug(html);
|
|
|
|
|
|
|
|
// Convert a dom tree into a list of lines and attribute liens
|
|
|
|
// using the content collector object
|
2020-11-23 19:24:19 +01:00
|
|
|
const cc = contentcollector.makeContentCollector(true, null, pad.pool);
|
2019-02-08 23:20:57 +01:00
|
|
|
try {
|
|
|
|
// we use a try here because if the HTML is bad it will blow up
|
2014-11-25 18:26:09 +01:00
|
|
|
cc.collectContent(doc);
|
2020-11-23 19:24:19 +01:00
|
|
|
} catch (e) {
|
|
|
|
apiLogger.warn('HTML was not properly formed', e);
|
2019-02-08 23:20:57 +01:00
|
|
|
|
|
|
|
// don't process the HTML because it was bad
|
2019-01-31 09:55:36 +01:00
|
|
|
throw e;
|
2013-02-13 00:23:44 +01:00
|
|
|
}
|
|
|
|
|
2020-11-23 19:24:19 +01:00
|
|
|
const result = cc.finish();
|
2013-02-13 00:23:44 +01:00
|
|
|
|
2011-11-17 10:17:52 +01:00
|
|
|
apiLogger.debug('Lines:');
|
2019-02-08 23:20:57 +01:00
|
|
|
|
2020-11-23 19:24:19 +01:00
|
|
|
let i;
|
2019-01-31 09:55:36 +01:00
|
|
|
for (i = 0; i < result.lines.length; i++) {
|
2020-11-23 19:24:19 +01:00
|
|
|
apiLogger.debug(`Line ${i + 1} text: ${result.lines[i]}`);
|
|
|
|
apiLogger.debug(`Line ${i + 1} attributes: ${result.lineAttribs[i]}`);
|
2011-11-17 10:17:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Get the new plain text and its attributes
|
2020-11-23 19:24:19 +01:00
|
|
|
const newText = result.lines.join('\n');
|
2011-11-17 10:17:52 +01:00
|
|
|
apiLogger.debug('newText:');
|
|
|
|
apiLogger.debug(newText);
|
2020-11-23 19:24:19 +01:00
|
|
|
const newAttribs = `${result.lineAttribs.join('|1+1')}|1+1`;
|
2011-11-17 10:17:52 +01:00
|
|
|
|
2020-11-23 19:24:19 +01:00
|
|
|
function eachAttribRun(attribs, func /* (startInNewText, endInNewText, attribs)*/) {
|
|
|
|
const attribsIter = Changeset.opIterator(attribs);
|
|
|
|
let textIndex = 0;
|
|
|
|
const newTextStart = 0;
|
|
|
|
const newTextEnd = newText.length;
|
2019-02-08 23:20:57 +01:00
|
|
|
while (attribsIter.hasNext()) {
|
2020-11-23 19:24:19 +01:00
|
|
|
const op = attribsIter.next();
|
|
|
|
const nextIndex = textIndex + op.chars;
|
2019-02-08 23:20:57 +01:00
|
|
|
if (!(nextIndex <= newTextStart || textIndex >= newTextEnd)) {
|
2011-11-17 10:17:52 +01:00
|
|
|
func(Math.max(newTextStart, textIndex), Math.min(newTextEnd, nextIndex), op.attribs);
|
|
|
|
}
|
|
|
|
textIndex = nextIndex;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// create a new changeset with a helper builder object
|
2020-11-23 19:24:19 +01:00
|
|
|
const builder = Changeset.builder(1);
|
2011-11-17 10:17:52 +01:00
|
|
|
|
|
|
|
// assemble each line into the builder
|
2020-11-23 19:24:19 +01:00
|
|
|
eachAttribRun(newAttribs, (start, end, attribs) => {
|
2011-11-17 10:17:52 +01:00
|
|
|
builder.insert(newText.substring(start, end), attribs);
|
|
|
|
});
|
|
|
|
|
|
|
|
// the changeset is ready!
|
2020-11-23 19:24:19 +01:00
|
|
|
const theChangeset = builder.toString();
|
2019-02-08 23:20:57 +01:00
|
|
|
|
2020-11-23 19:24:19 +01:00
|
|
|
apiLogger.debug(`The changeset: ${theChangeset}`);
|
2020-09-17 03:06:15 +02:00
|
|
|
await Promise.all([
|
|
|
|
pad.setText('\n'),
|
|
|
|
pad.appendRevision(theChangeset),
|
|
|
|
]);
|
2020-11-23 19:24:19 +01:00
|
|
|
};
|