From 699aa299f8ccb9d8121899694f002d79bd1a7afc Mon Sep 17 00:00:00 2001 From: Daniel Perez Alvarez Date: Mon, 13 Aug 2012 17:09:02 +0100 Subject: [PATCH] Normalize inserted text using UNorm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason, the client was sending the server a Unicode-normalized version of inserted strings. So if for example we inserted the string 'ä' (i.e. \x61\xCC\x88) into the document, what would be sent to the server would be 'ä' (i.e. \xC3\xA4). This wouldn't be a problem on its own. BUT JavaScript reports that the length of the first string is 2, while the length of the second one is 1. So the command that was being sent to the server was 'Z:1>2*0+1$ä', when it should really be 'Z:1>1*0+1$ä'. When the `checkRep` method checks the length of the inserted string, it finds an inconsistency, and disconnects the client. We now normalize the inserted string before the command is generated, so the length is always correct. --- src/static/js/contentcollector.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 777b3421c..b27dfc5e0 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -25,13 +25,14 @@ var _MAX_LIST_LEVEL = 8; +var UNorm = require('./unorm'); var Changeset = require('./Changeset'); var hooks = require('./pluginfw/hooks'); var _ = require('./underscore'); function sanitizeUnicode(s) { - return s.replace(/[\uffff\ufffe\ufeff\ufdd0-\ufdef\ud800-\udfff]/g, '?'); + return UNorm.nfc(s).replace(/[\uffff\ufffe\ufeff\ufdd0-\ufdef\ud800-\udfff]/g, '?'); } function makeContentCollector(collectStyles, browser, apool, domInterface, className2Author)