Add ' and * to acceptable URL characters

These characters are in the RFC3986 reserved set.

These characters are added to the set of characters that cannot be the
last character of a URL to avoid mislinkification.
This commit is contained in:
Richard Hansen 2020-12-12 18:26:48 -05:00 committed by John McLear
parent 7d23278ed0
commit a44debdcfe
2 changed files with 4 additions and 4 deletions

View file

@ -60,10 +60,10 @@ const wordCharRegex = new RegExp(`[${[
const urlRegex = (() => {
// TODO: wordCharRegex matches many characters that are not permitted in URIs. Are they included
// here as an attempt to support IRIs? (See https://tools.ietf.org/html/rfc3987.)
const urlChar = `[-:@_.,~%+/?=&#!;()$${wordCharRegex.source.slice(1, -1)}]`;
const urlChar = `[-:@_.,~%+/?=&#!;()$'*${wordCharRegex.source.slice(1, -1)}]`;
// Matches a single character that should not be considered part of the URL if it is the last
// character that matches urlChar.
const postUrlPunct = '[:.,;?!)]';
const postUrlPunct = '[:.,;?!)\'*]';
// Schemes that must be followed by ://
const withAuth = `(?:${[
'(?:x-)?man',

View file

@ -31,7 +31,7 @@ describe('urls', function () {
});
describe('special characters inside URL', function () {
for (const char of '-:@_.,~%+/?=&#!;()$') {
for (const char of '-:@_.,~%+/?=&#!;()$\'*') {
const url = `https://etherpad.org/${char}foo`;
it(url, async function () {
await helper.edit(url);
@ -44,7 +44,7 @@ describe('urls', function () {
});
describe('punctuation after URL is ignored', function () {
for (const char of ':.,;?!)]') {
for (const char of ':.,;?!)\'*]') {
const want = 'https://etherpad.org';
const input = want + char;
it(input, async function () {