Git Product home page Git Product logo

ecm-msg's Introduction

Outlook .msg files need a UI

One of my main clients use the Outlook email reader. Their Web-based UI needs to read and view those messages. This is where such things are taken care of.

The HTML parser, generator and image inliner: lib/html.js

To make this easy on the (old) server we’ll inline the images into the HTML from the “attachments” portion of the .msg file.

First we need to parse the HTML. That’s node-html-parser.

We also need to know the type of the image. image-type to the rescue

npm install node-html-parser image-type
import { parse } from 'node-html-parser';
import  { default as iconvLite } from 'iconv-lite';
import { deEncapsulateSync } from 'rtf-stream-parser';
import { decompressRTF } from '@kenjiuno/decompressrtf';
import { escapeHTML } from '@wordpress/escape-html'
import fs from 'fs'
import imageType from 'image-type';

  export const msgInternalHtml = (msg) => {
    msg.parserConfig = msg.parserConfig || {};
    const msgInfo = msg.getFileData();
    const rtfb = msgInfo.compressedRtf
          ? Buffer.from(decompressRTF(msgInfo.compressedRtf))
          : false,
          rtf = rtfb
          ? deEncapsulateSync(rtfb, { decode: iconvLite.decode })
          : false;
    return rtf && rtf.mode === 'html' ? rtf.text
      : '<pre>' + msgInfo.body + '</pre>'
  }

  export const msgInternalDoc = (msg) => {
    const str = msgInternalHtml(msg)
    return parse(str);
  };

  export const msgDocInlineImgs = (msg, doc) => {
    return doc.querySelectorAll('img');
  }

  export const inlineCidImg = async (msg, img) => {
    const src = img.getAttribute('src')
    if (! src.startsWith('cid:')) {
      return img
    }
    const decoder = new TextDecoder('utf8');
    const cid = src.substring(4),
          atto = msg.getFileData().attachments.find(i => i.pidContentId === cid),
          content = !atto ? false
          : msg.getAttachment(atto).content,
          type = await imageType(content),
          b64 = Buffer.from(content).toString('base64'),
          newSrc = `data:${type.mime};base64,${b64}`

    img.setAttribute('src', newSrc);

    return img
  }

export const msgBodyHtml = async (msg) => {
   const ihtml = msgInternalHtml(msg),
         doc = msgInternalDoc(msg),
         imgs = msgDocInlineImgs(msg, doc);

  // console.log('have imgs', imgs.map(i => i.getAttribute('src')));

  await Promise.all(
    imgs.map(i => inlineCidImg(msg, i))
  );

  // console.log('have imgs', imgs.map(i => i.getAttribute('src')));
  return doc.outerHTML;
}




The bin/ecm-msg.js script

This is the heart of it for now. We want to export the message to JSON and grab any attachment.

import { program } from 'commander';
import fs from 'fs';
import path from 'path';
import pkg from '@kenjiuno/msgreader';
const MsgReader = pkg.default ;

import  { default as iconvLite } from 'iconv-lite';
import { deEncapsulateSync } from 'rtf-stream-parser';
import { decompressRTF } from '@kenjiuno/decompressrtf';
import { escapeHTML } from '@wordpress/escape-html'

import { msgInternalHtml,
         msgInternalDoc,
         msgDocInlineImgs,
         inlineCidImg,
         msgBodyHtml
       } from '../lib/html.js'


program
  .command('body-html <msgFilePath> [saveToHtmlFilePath]')
  .description('Parse msg file and return rtf body as HTML')
  .action(async (msgFilePath, saveToHtmlFilePath) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath)
    const msg = new MsgReader(msgFileBuffer)
    const html = await msgBodyHtml(msg);
   console.log(html)
  });


program
  .command('json <msgFilePath>')
  .description('Parse msg file and print json structure')
  .option('-f, --full-json', 'print full JSON')
  .action((msgFilePath, options) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath)
    const testMsg = new MsgReader(msgFileBuffer)
    testMsg.parserConfig = testMsg.parserConfig || {};
    const testMsgInfo = testMsg.getFileData();
    const rtfb = Buffer.from(decompressRTF(testMsgInfo.compressedRtf)),
          rtf = deEncapsulateSync(rtfb, { decode: iconvLite.decode });
    const { attachments,
            recipients,
            subject,
            senderName,
            inetAcctName,
            body } = testMsgInfo
    console.log(
      options.fullJson
        ? JSON.stringify(testMsgInfo, null, 2)
        : JSON.stringify({
          sender: {
            name: senderName,
            email: inetAcctName
          },
          recipients,
          subject,
          body, rtf, attachments
        })
    );
  });
program
  .command('rtf <msgFilePath> [saveToRtfFilePath]')
  .description('Parse msg file and print decompressed rtf')
  .action((msgFilePath, saveToRtfFilePath) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath)
    const testMsg = new MsgReader(msgFileBuffer)
    const testMsgInfo = testMsg.getFileData()

    const body = Buffer.from(decompressRTF(testMsgInfo.compressedRtf))

    if (typeof saveToRtfFilePath === "string" && saveToRtfFilePath.length >= 1) {
      fs.writeFileSync(saveToRtfFilePath, body);
    }
    else {
      console.log(body.toString('utf-8'));
    }
  });


program
  .parse(process.argv);




RTF Stream Parser

Because Outlook messages contain a RTF file within them a lot of the time, and they are often HTML (FFS!), and we want to use HTML to output them, well, as luck would have it, we are not the only one.

Repo Here.

npm install rtf-stream-parser iconv-lite '@wordpress/escape-html'

The npm package

$ npm create vite@latest
Need to install the following packages:
  [email protected]
Ok to proceed? (y) y
✔ Project name: … ecm-msg
✔ Select a framework: › Vanilla
✔ Select a variant: › JavaScript

Scaffolding project in /home/drewc/me/ecm/src/ecm-msg/ecm-msg...

Done. Now run:

  cd ecm-msg
  npm install
  npm run dev

cd ecm-msg ; mv * ../ ; mv .gitignore ../;
cd .. ; rmdir ecm-msg

The msgreader JS

npm install '@kenjiuno/msgreader'

CLI

npm install 'commander'

Forked from the repo: https://github.com/HiraokaHyperTools/msgreader/blob/master/cli.js

const program = require('commander');

const MsgReader = require('@kenjiuno/msgreader').default;
const { props, typeNames } = require('@kenjiuno/msgreader/lib/Defs');
const { Reader } = require('@kenjiuno/msgreader/lib/Reader');

const fs = require('fs');
const path = require('path');
const { decompressRTF } = require('@kenjiuno/decompressrtf');

program
  .command('parse <msgFilePath>')
  .description('Parse msg file and print parsed structure')
  .option('-f, --full-json', 'print full JSON')
  .option('-i, --include-raw-props', 'include raw (and also unknown) props')
  .action((msgFilePath, options) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath)
    const testMsg = new MsgReader(msgFileBuffer)
    testMsg.parserConfig = testMsg.parserConfig || {};
    if (options.includeRawProps) {
      testMsg.parserConfig.includeRawProps = true;
    }
    const testMsgInfo = testMsg.getFileData();
    console.log(
      options.fullJson
        ? JSON.stringify(testMsgInfo, null, 2)
        : testMsgInfo
    );
  });

program
  .command('rtf <msgFilePath> [saveToRtfFilePath]')
  .description('Parse msg file and print decompressed rtf')
  .action((msgFilePath, saveToRtfFilePath) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath)
    const testMsg = new MsgReader(msgFileBuffer)
    const testMsgInfo = testMsg.getFileData()

    const body = Buffer.from(decompressRTF(testMsgInfo.compressedRtf))

    if (typeof saveToRtfFilePath === "string" && saveToRtfFilePath.length >= 1) {
      fs.writeFileSync(saveToRtfFilePath, body);
    }
    else {
      console.log(body.toString("utf8"));
    }
  });

function listAttachmentsRecursively(fieldsData, delimiter) {
  const attachments = []

  const walk = (fieldsData, prefix, attachments) => {
    for (const att of fieldsData.attachments) {
      if (att.innerMsgContent) {
        attachments.push({
          fileName: prefix + att.name + ".msg",
          attachmentRef: att,
        })
        walk(att.innerMsgContentFields, att.name + delimiter, attachments);
      }
      else {
        attachments.push({
          fileName: prefix + att.fileName,
          attachmentRef: att,
        })
      }
    }
  }

  walk(fieldsData, "", attachments)

  return attachments
}

program
  .command('list-att <msgFilePath>')
  .description('Parse msg file and list attachment file names')
  .action((msgFilePath) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath)
    const testMsg = new MsgReader(msgFileBuffer)
    const testMsgInfo = testMsg.getFileData()

    const attachments = listAttachmentsRecursively(testMsgInfo, "_");
    for (let attachment of attachments) {
      console.log(attachment)
    }
  });

program
  .command('save-att <msgFilePath> <saveToDir>')
  .description('Parse msg file and write all attachment files')
  .action((msgFilePath, saveToDir) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath)
    const testMsg = new MsgReader(msgFileBuffer)
    const testMsgInfo = testMsg.getFileData()

    fs.mkdirSync(path.resolve(saveToDir), { recursive: true })

    const attachments = listAttachmentsRecursively(testMsgInfo, "_");
    for (let attachment of attachments) {
      const attFilePath = path.resolve(saveToDir, attachment.fileName);
      fs.writeFileSync(attFilePath, testMsg.getAttachment(attachment.attachmentRef).content)
    }
  });

program
  .command('dump <msgFilePath>')
  .description('Dump msg file and print data')
  .option('-p, --print-raw-data', 'print raw data')
  .action((msgFilePath, options) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath)
    const testMsg = new MsgReader(msgFileBuffer)
    let msgIndex = 0
    testMsg.parserConfig = {
      propertyObserver: (fields, tag, raw) => {
        if (fields.msgIndex === undefined) {
          fields.msgIndex = msgIndex++;
        }
        {
          const key = tag.toString(16).padStart(8, "0").toUpperCase();
          const prop = props.filter(it => it.key === key).shift();
          const type = typeNames[parseInt(key.substr(4), 16)];
          console.info(
            "msgIdx:", fields.msgIndex,
            "dataType:", `'${fields.dataType}'`,
            "tag:", `0x${key}`,
            "name:", prop && prop.name || null,
            "type:", type && type || null,
            "size:", raw && raw.byteLength,
            "data:", options.printRawData ? raw : undefined,
          )
        }
      }
    }
    const testMsgInfo = testMsg.getFileData()
  });

program
  .command('expose <msgFilePath> <exportToDir>')
  .description('Expose files/folders in Compound File Binary Format (CFBF)')
  .action((msgFilePath, exportToDir, options) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath);
    const store = new Reader(msgFileBuffer);
    store.parse();
    function expose(folder, saveTo) {
      fs.mkdir(saveTo, { recursive: true }, (err) => {
        if (err) {
          return;
        }
        for (let fileName of folder.fileNames()) {
          const array = folder.readFile(fileName);
          const path = saveTo + "/" + fileName;
          console.info(path);
          fs.writeFileSync(path, (array === null) ? [] : array);
        }
        for (let subFolder of folder.subFolders()) {
          expose(subFolder, saveTo + "/" + subFolder.name);
        }
      });
    }
    expose(store.rootFolder(), exportToDir);
  });

program
  .command('html <msgFilePath>')
  .description('Parse msg file and display 1013001f:bodyHtml or 10130102:html')
  .option('-e, --encoding <encoding>', 'The encoding type to decode binary html.', 'utf8')
  .action((msgFilePath, options) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath);
    const testMsg = new MsgReader(msgFileBuffer);
    const testMsgInfo = testMsg.getFileData();
    if (testMsgInfo.html !== undefined) {
      console.log(Buffer.from(testMsgInfo.html).toString(options.encoding));
    }
    else if (testMsgInfo.bodyHtml !== undefined) {
      console.log(testMsgInfo.bodyHtml);
    }
    else {
      console.warn("no html is contained.");
    }
  });

program
  .command('walk <msgFilePath>')
  .description('Walk entire msg file as a raw CFBF')
  .action((msgFilePath, options) => {
    const msgFileBuffer = fs.readFileSync(msgFilePath);
    const reader = new Reader(msgFileBuffer);
    reader.parse();

    function walk(folder, prefix) {
      console.info("Walking folder:", prefix);
      for (let fileSet of folder.fileNameSets()) {
        const contents = fileSet.provider();
        console.info("Verify file:", fileSet.name, "(", fileSet.length, ")", "read", contents.length, "bytes");
        if (fileSet.length != contents.length) {
          throw new Error();
        }
      }
      for (let subFolder of folder.subFolders()) {
        walk(subFolder, `${prefix}${subFolder.name}/`);
      }
    }

    walk(reader.rootFolder(), "/");
  });



program
  .command('dummy1')
  .action(() => {
    const msgFileBuffer = fs.readFileSync('test/msgInMsg.msg');
    const testMsg = new MsgReader(msgFileBuffer);
    const testMsgInfo = testMsg.getFileData();
    const testMsgAttachment0 = testMsg.getAttachment(0);
    console.log(testMsgAttachment0);
  });

program
  .command('dummy2')
  .action(() => {
    const msgFileBuffer = fs.readFileSync('test/voteItems.msg');
    const testMsg = new MsgReader(msgFileBuffer);
    const testMsgInfo = testMsg.getFileData();
    console.log(testMsgInfo);
  });

program
  .parse(process.argv);

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.