tests for spaces (#4594)

This commit is contained in:
webzwo0i 2020-12-20 07:18:19 +01:00 committed by GitHub
parent a637920e55
commit 040057239e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 315 additions and 1 deletions

View file

@ -67,7 +67,163 @@ const testImports = {
input: '<html><body><ul class="indent"><li>indent</li><li>indent</ul></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body><ul class="indent"><li>indent</li><li>indent</ul><br></body></html>',
expectedText: '\tindent\n\tindent\n\n'
}
},
lineWithMultipleSpaces: {
description: 'Multiple spaces should be collapsed',
input: '<html><body>Text with more than one space.<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Text with more than one space.<br><br></body></html>',
expectedText: 'Text with more than one space.\n\n'
},
lineWithMultipleNonBreakingAndNormalSpaces: {
// XXX the HTML between "than" and "one" looks strange
description: 'non-breaking space should be preserved, but can be replaced when it',
input: '<html><body>Text&nbsp;with&nbsp; more&nbsp;&nbsp;&nbsp;than &nbsp;one space.<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Text with&nbsp; more&nbsp;&nbsp; than&nbsp; one space.<br><br></body></html>',
expectedText: 'Text with more than one space.\n\n'
},
multiplenbsp: {
description: 'Multiple non-breaking space should be preserved',
input: '<html><body>&nbsp;&nbsp;<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>&nbsp;&nbsp;<br><br></body></html>',
expectedText: ' \n\n'
},
multipleNonBreakingSpaceBetweenWords: {
description: 'A normal space is always inserted before a word',
input: '<html><body>&nbsp;&nbsp;word1&nbsp;&nbsp;word2&nbsp;&nbsp;&nbsp;word3<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>&nbsp; word1&nbsp; word2&nbsp;&nbsp; word3<br><br></body></html>',
expectedText: ' word1 word2 word3\n\n'
},
nonBreakingSpacePreceededBySpaceBetweenWords: {
description: 'A non-breaking space preceeded by a normal space',
input: '<html><body> &nbsp;word1 &nbsp;word2 &nbsp;word3<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>&nbsp;word1&nbsp; word2&nbsp; word3<br><br></body></html>',
expectedText: ' word1 word2 word3\n\n'
},
nonBreakingSpaceFollowededBySpaceBetweenWords: {
description: 'A non-breaking space followed by a normal space',
input: '<html><body>&nbsp; word1&nbsp; word2&nbsp; word3<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>&nbsp; word1&nbsp; word2&nbsp; word3<br><br></body></html>',
expectedText: ' word1 word2 word3\n\n'
},
spacesAfterNewline: {
description: 'Collapse spaces that follow a newline',
input:'<!doctype html><html><body>something<br> something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br>something<br><br></body></html>',
expectedText: 'something\nsomething\n\n'
},
spacesAfterNewlineP: {
description: 'Collapse spaces that follow a paragraph',
input:'<!doctype html><html><body>something<p></p> something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br>something<br><br></body></html>',
expectedText: 'something\n\nsomething\n\n'
},
spacesAtEndOfLine: {
description: 'Collapse spaces that preceed/follow a newline',
input:'<html><body>something <br> something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br>something<br><br></body></html>',
expectedText: 'something\nsomething\n\n'
},
spacesAtEndOfLineP: {
description: 'Collapse spaces that preceed/follow a paragraph',
input:'<html><body>something <p></p> something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br>something<br><br></body></html>',
expectedText: 'something\n\nsomething\n\n'
},
nonBreakingSpacesAfterNewlines: {
description: 'Don\'t collapse non-breaking spaces that follow a newline',
input:'<html><body>something<br>&nbsp;&nbsp;&nbsp;something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br>&nbsp;&nbsp; something<br><br></body></html>',
expectedText: 'something\n something\n\n'
},
nonBreakingSpacesAfterNewlinesP: {
description: 'Don\'t collapse non-breaking spaces that follow a paragraph',
input:'<html><body>something<p></p>&nbsp;&nbsp;&nbsp;something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br>&nbsp;&nbsp; something<br><br></body></html>',
expectedText: 'something\n\n something\n\n'
},
collapseSpacesInsideElements: {
description: 'Preserve only one space when multiple are present',
input: '<html><body>Need <span> more </span> space<i> s </i> !<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s </em>!<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
},
collapseSpacesAcrossNewlines: {
description: 'Newlines and multiple spaces across newlines should be collapsed',
input: `
<html><body>Need
<span> more </span>
space
<i> s </i>
!<br></body></html>`,
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s </em>!<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
},
multipleNewLinesAtBeginning: {
description: 'Multiple new lines and paragraphs at the beginning should be preserved',
input: '<html><body><br><br><p></p><p></p>first line<br><br>second line<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body><br><br><br><br>first line<br><br>second line<br><br></body></html>',
expectedText: '\n\n\n\nfirst line\n\nsecond line\n\n'
},
multiLineParagraph:{
description: "A paragraph with multiple lines should not loose spaces when lines are combined",
input:`<html><body>
<p>
а б в г ґ д е є ж з и і ї й к л м н о
п р с т у ф х ц ч ш щ ю я ь
</p>
</body></html>`,
expectedHTML: '<!DOCTYPE HTML><html><body>&#1072; &#1073; &#1074; &#1075; &#1169; &#1076; &#1077; &#1108; &#1078; &#1079; &#1080; &#1110; &#1111; &#1081; &#1082; &#1083; &#1084; &#1085; &#1086; &#1087; &#1088; &#1089; &#1090; &#1091; &#1092; &#1093; &#1094; &#1095; &#1096; &#1097; &#1102; &#1103; &#1100;<br><br></body></html>',
expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь\n\n'
},
multiLineParagraphWithPre:{
//XXX why is there &nbsp; before "in"?
description: "lines in preformatted text should be kept intact",
input:`<html><body>
<p>
а б в г ґ д е є ж з и і ї й к л м н о<pre>multiple
lines
in
pre
</pre></p><p>п р с т у ф х ц ч ш щ ю я
ь</p>
</body></html>`,
expectedHTML: '<!DOCTYPE HTML><html><body>&#1072; &#1073; &#1074; &#1075; &#1169; &#1076; &#1077; &#1108; &#1078; &#1079; &#1080; &#1110; &#1111; &#1081; &#1082; &#1083; &#1084; &#1085; &#1086;<br>multiple<br>&nbsp;&nbsp; lines<br>&nbsp;in<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; pre<br><br>&#1087; &#1088; &#1089; &#1090; &#1091; &#1092; &#1093; &#1094; &#1095; &#1096; &#1097; &#1102; &#1103; &#1100;<br><br></body></html>',
expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о\nmultiple\n lines\n in\n pre\n\nп р с т у ф х ц ч ш щ ю я ь\n\n'
},
preIntroducesASpace: {
description: "pre should be on a new line not preceeded by a space",
input:`<html><body><p>
1
<pre>preline
</pre></p></body></html>`,
expectedHTML: '<!DOCTYPE HTML><html><body>1<br>preline<br><br><br></body></html>',
expectedText: '1\npreline\n\n\n'
},
dontDeleteSpaceInsideElements: {
description: 'Preserve spaces inside elements',
input: '<html><body>Need<span> more </span>space<i> s </i>!<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s </em>!<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
},
dontDeleteSpaceOutsideElements: {
description: 'Preserve spaces outside elements',
input: '<html><body>Need <span>more</span> space <i>s</i> !<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s</em> !<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
},
dontDeleteSpaceAtEndOfElement: {
description: 'Preserve spaces at the end of an element',
input: '<html><body>Need <span>more </span>space <i>s </i>!<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s </em>!<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
},
dontDeleteSpaceAtBeginOfElements: {
description: 'Preserve spaces at the start of an element',
input: '<html><body>Need<span> more</span> space<i> s</i> !<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s</em> !<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
},
>>>>>>> 5a47aff2... tests for spaces
};
describe(__filename, function () {

View file

@ -1,6 +1,14 @@
'use strict';
/* eslint-disable max-len */
/*
* While importexport tests target the `setHTML` API endpoint, which is nearly identical to what happens
* when a user manually imports a document via the UI, the contentcollector tests here don't use rehype to process
* the document. Rehype removes spaces and newĺines were applicable, so the expected results here can
* differ from importexport.js.
*
* If you add tests here, please also add them to importexport.js
*/
const contentcollector = require('../../../src/static/js/contentcollector');
const AttributePool = require('../../../src/static/js/AttributePool');
@ -113,6 +121,156 @@ const tests = {
expectedLineAttribs: ['+5'],
expectedText: ['empty'],
},
lineWithMultipleSpaces: {
description: 'Multiple spaces should be preserved',
html: '<html><body>Text with more than one space.<br></body></html>',
expectedLineAttribs: [ '+10' ],
expectedText: ['Text with more than one space.']
},
lineWithMultipleNonBreakingAndNormalSpaces: {
description: 'non-breaking and normal space should be preserved',
html: '<html><body>Text&nbsp;with&nbsp; more&nbsp;&nbsp;&nbsp;than &nbsp;one space.<br></body></html>',
expectedLineAttribs: [ '+10' ],
expectedText: ['Text with more than one space.']
},
multiplenbsp: {
description: 'Multiple nbsp should be preserved',
html: '<html><body>&nbsp;&nbsp;<br></body></html>',
expectedLineAttribs: [ '+2' ],
expectedText: [' ']
},
multipleNonBreakingSpaceBetweenWords: {
description: 'Multiple nbsp between words ',
html: '<html><body>&nbsp;&nbsp;word1&nbsp;&nbsp;word2&nbsp;&nbsp;&nbsp;word3<br></body></html>',
expectedLineAttribs: [ '+m' ],
expectedText: [' word1 word2 word3']
},
nonBreakingSpacePreceededBySpaceBetweenWords: {
description: 'A non-breaking space preceeded by a normal space',
html: '<html><body> &nbsp;word1 &nbsp;word2 &nbsp;word3<br></body></html>',
expectedLineAttribs: [ '+l' ],
expectedText: [' word1 word2 word3']
},
nonBreakingSpaceFollowededBySpaceBetweenWords: {
description: 'A non-breaking space followed by a normal space',
html: '<html><body>&nbsp; word1&nbsp; word2&nbsp; word3<br></body></html>',
expectedLineAttribs: [ '+l' ],
expectedText: [' word1 word2 word3']
},
spacesAfterNewline: {
description: 'Don\'t collapse spaces that follow a newline',
html:'<!doctype html><html><body>something<br> something<br></body></html>',
expectedLineAttribs: ['+9', '+m'],
expectedText: ['something', ' something']
},
spacesAfterNewlineP: {
description: 'Don\'t collapse spaces that follow a empty paragraph',
html:'<!doctype html><html><body>something<p></p> something<br></body></html>',
expectedLineAttribs: ['+9', '', '+m'],
expectedText: ['something', '', ' something']
},
spacesAtEndOfLine: {
description: 'Don\'t collapse spaces that preceed/follow a newline',
html:'<html><body>something <br> something<br></body></html>',
expectedLineAttribs: ['+l', '+m'],
expectedText: ['something ', ' something']
},
spacesAtEndOfLineP: {
description: 'Don\'t collapse spaces that preceed/follow a empty paragraph',
html:'<html><body>something <p></p> something<br></body></html>',
expectedLineAttribs: ['+l', '', '+m'],
expectedText: ['something ', '', ' something']
},
nonBreakingSpacesAfterNewlines: {
description: 'Don\'t collapse non-breaking spaces that follow a newline',
html:'<html><body>something<br>&nbsp;&nbsp;&nbsp;something<br></body></html>',
expectedLineAttribs: ['+9', '+c'],
expectedText: ['something', ' something']
},
nonBreakingSpacesAfterNewlinesP: {
description: 'Don\'t collapse non-breaking spaces that follow a paragraph',
html:'<html><body>something<p></p>&nbsp;&nbsp;&nbsp;something<br></body></html>',
expectedLineAttribs: ['+9', '', '+c'],
expectedText: ['something', '', ' something']
},
preserveSpacesInsideElements: {
description: 'Preserve all spaces when multiple are present',
html: '<html><body>Need <span> more </span> space<i> s </i> !<br></body></html>',
expectedLineAttribs: ['+h*0+4+2'],
expectedText: ['Need more space s !'],
},
preserveSpacesAcrossNewlines: {
description: 'Newlines and multiple spaces across newlines should be preserved',
html: `
<html><body>Need
<span> more </span>
space
<i> s </i>
!<br></body></html>`,
expectedLineAttribs: [ '+19*0+4+b' ],
expectedText: [ 'Need more space s !' ]
},
multipleNewLinesAtBeginning: {
description: 'Multiple new lines at the beginning should be preserved',
html: '<html><body><br><br><p></p><p></p>first line<br><br>second line<br></body></html>',
expectedLineAttribs: ['', '', '', '', '+a', '', '+b'],
expectedText: [ '', '', '', '', 'first line', '', 'second line']
},
multiLineParagraph:{
description: "A paragraph with multiple lines should not loose spaces when lines are combined",
html:`<html><body><p>
а б в г ґ д е є ж з и і ї й к л м н о
п р с т у ф х ц ч ш щ ю я ь</p>
</body></html>`,
expectedLineAttribs: [ '+1t' ],
expectedText: ["а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь"]
},
multiLineParagraphWithPre:{
description: "lines in preformatted text should be kept intact",
html:`<html><body><p>
а б в г ґ д е є ж з и і ї й к л м н о<pre>multiple
lines
in
pre
</pre></p><p>п р с т у ф х ц ч ш щ ю я
ь</p>
</body></html>`,
expectedLineAttribs: [ '+11', '+8', '+5', '+2', '+3', '+r' ],
expectedText: ['а б в г ґ д е є ж з и і ї й к л м н о', 'multiple', 'lines', 'in', 'pre', 'п р с т у ф х ц ч ш щ ю я ь']
},
preIntroducesASpace: {
description: "pre should be on a new line not preceeded by a space",
html:`<html><body><p>
1
<pre>preline
</pre></p></body></html>`,
expectedLineAttribs: [ '+6', '+7' ],
expectedText: [' 1 ', 'preline']
},
dontDeleteSpaceInsideElements: {
description: 'Preserve spaces on the beginning and end of a element',
html: '<html><body>Need<span> more </span>space<i> s </i>!<br></body></html>',
expectedLineAttribs: ['+f*0+3+1'],
expectedText: ['Need more space s !']
},
dontDeleteSpaceOutsideElements: {
description: 'Preserve spaces outside elements',
html: '<html><body>Need <span>more</span> space <i>s</i> !<br></body></html>',
expectedLineAttribs: ['+g*0+1+2'],
expectedText: ['Need more space s !']
},
dontDeleteSpaceAtEndOfElement: {
description: 'Preserve spaces at the end of an element',
html: '<html><body>Need <span>more </span>space <i>s </i>!<br></body></html>',
expectedLineAttribs: ['+g*0+2+1'],
expectedText: ['Need more space s !']
},
dontDeleteSpaceAtBeginOfElements: {
description: 'Preserve spaces at the start of an element',
html: '<html><body>Need<span> more</span> space<i> s</i> !<br></body></html>',
expectedLineAttribs: ['+f*0+2+2'],
expectedText: ['Need more space s !']
},
};
describe(__filename, function () {