From 9d68c619c7f03151b965f4a2272827d41ce6cf49 Mon Sep 17 00:00:00 2001 From: Loris Bettazza Date: Fri, 20 Sep 2024 17:15:26 +0200 Subject: [PATCH] :bug: Fix attachment regex to allow lowercase characters and underscores Also, since we're now being less strict with the regex, I'll add the restriction that the attachment should be the first thing in the message to help avoid false positives. HOPEFULLY this assumption won't bite me in the ass later As always we also have to check for the LTR / RTL characters because whatsapp really loves them Closes #260 --- src/parser.ts | 3 ++- tests/parser.test.ts | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index be6a769..24916d2 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -19,7 +19,8 @@ const regexParserSystem = new RegExp( sharedRegex.source + messageRegex.source, 'i', ); -const regexAttachment = /<.+:(.+)>|([A-Z\d-]+\.\w+)\s[(<].+[)>]/; +const regexAttachment = + /^(?:\u200E|\u200F)*(?:<.+:(.+)>|([\w-]+\.\w+)\s[(<].+[)>])/; /** * Takes an array of lines and detects the lines that are part of a previous diff --git a/tests/parser.test.ts b/tests/parser.test.ts index 2e72f07..26075b7 100644 --- a/tests/parser.test.ts +++ b/tests/parser.test.ts @@ -207,11 +207,14 @@ describe('parser.js', () => { '3/6/18, 1:55 p.m. - a: IMG-20210428-WA0001.jpg (file attached)'; const format3 = '3/6/18, 1:55 p.m. - a: 2015-08-04-PHOTO-00004762.jpg <‎attached>'; + const format4 = + '3/6/18, 1:55 p.m. - a: ‎4f2680f1db95a8454775cc2eefc95bfc.jpg (Datei angehängt)\nDir auch frohe Ostern.'; const messages = [ { system: false, msg: format1 }, { system: false, msg: '3/6/18, 1:55 p.m. - a: m' }, { system: false, msg: format2 }, { system: false, msg: format3 }, + { system: false, msg: format4 }, ]; const parsedWithoutAttachments = parseMessages(messages, { parseAttachments: false, @@ -245,6 +248,12 @@ describe('parser.js', () => { '2015-08-04-PHOTO-00004762.jpg', ); }); + + it('should correctly parse the attachment string with format #4', () => { + expect(parsedWithAttachments[4]?.attachment?.fileName).toBe( + '4f2680f1db95a8454775cc2eefc95bfc.jpg', + ); + }); }); }); });