最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

javascript - How to get plain text from body in Node.js IMAP module - Stack Overflow

programmeradmin2浏览0评论

I am using the IMAP module for Node.js to parse the body of IMAP email. I can get the body returned to me as raw HTML data but this includes tags and other unnecessary data. I would like the text of what was typed (removing any divs, style, etc.)

Here is the code I am currently using:

openInbox(function(err, box) {
      if (err) throw err;
      var f = imap.seq.fetch(box.messages.total + ':*', { bodies: ['HEADER.FIELDS (FROM)','TEXT'] });
      f.on('message', function(msg, seqno) {
        console.log('Message #%d', seqno);
        var prefix = '(#' + seqno + ') ';
        msg.on('body', function(stream, info) {
          if (info.which === 'TEXT')
            console.log(prefix + '\n\nBody [%s] found, %d total bytes\n\n\n', inspect(info.which), info.size);
          var buffer = '', count = 0;
          stream.on('data', function(chunk) {
            count += chunk.length;
            buffer += chunk.toString('utf8');
            if (info.which === 'TEXT')
              console.log(prefix + 'Body [%s] (%d/%d)', inspect(info.which), count, info.size);
          });
          stream.once('end', function() {
            if (info.which !== 'TEXT')
              console.log(prefix + 'Parsed header: %s', inspect(Imap.parseHeader(buffer)));
            else
              console.log(prefix + 'Body [%s] Finished', inspect(info.which));
            console.log('\n\n\n\n'+buffer.toString()+'\n\n\n\n\n\n');
          });
        });
        msg.once('attributes', function(attrs) {
          console.log(prefix + 'Attributes: %s', inspect(attrs, false, 8));
        });
        msg.once('end', function() {
          console.log(prefix + 'Finished');
        });
      });
      f.once('error', function(err) {
        console.log('Fetch error: ' + err);
      });
      f.once('end', function() {
        console.log('Done fetching all messages!');
        imap.end();
      });
    });

Is there a way to parse as pure text without any tags or other HTML information?

I am using the IMAP module for Node.js to parse the body of IMAP email. I can get the body returned to me as raw HTML data but this includes tags and other unnecessary data. I would like the text of what was typed (removing any divs, style, etc.)

Here is the code I am currently using:

openInbox(function(err, box) {
      if (err) throw err;
      var f = imap.seq.fetch(box.messages.total + ':*', { bodies: ['HEADER.FIELDS (FROM)','TEXT'] });
      f.on('message', function(msg, seqno) {
        console.log('Message #%d', seqno);
        var prefix = '(#' + seqno + ') ';
        msg.on('body', function(stream, info) {
          if (info.which === 'TEXT')
            console.log(prefix + '\n\nBody [%s] found, %d total bytes\n\n\n', inspect(info.which), info.size);
          var buffer = '', count = 0;
          stream.on('data', function(chunk) {
            count += chunk.length;
            buffer += chunk.toString('utf8');
            if (info.which === 'TEXT')
              console.log(prefix + 'Body [%s] (%d/%d)', inspect(info.which), count, info.size);
          });
          stream.once('end', function() {
            if (info.which !== 'TEXT')
              console.log(prefix + 'Parsed header: %s', inspect(Imap.parseHeader(buffer)));
            else
              console.log(prefix + 'Body [%s] Finished', inspect(info.which));
            console.log('\n\n\n\n'+buffer.toString()+'\n\n\n\n\n\n');
          });
        });
        msg.once('attributes', function(attrs) {
          console.log(prefix + 'Attributes: %s', inspect(attrs, false, 8));
        });
        msg.once('end', function() {
          console.log(prefix + 'Finished');
        });
      });
      f.once('error', function(err) {
        console.log('Fetch error: ' + err);
      });
      f.once('end', function() {
        console.log('Done fetching all messages!');
        imap.end();
      });
    });

Is there a way to parse as pure text without any tags or other HTML information?

Share Improve this question edited Jun 29, 2015 at 18:09 Sebastian Nette 7,8422 gold badges19 silver badges18 bronze badges asked Jun 29, 2015 at 17:52 feztheforeignerfeztheforeigner 3671 gold badge4 silver badges13 bronze badges
Add a ment  | 

1 Answer 1

Reset to default 7

There is a node module designed for that: https://www.npmjs./package/html-to-text

var htmlToText = require('html-to-text');

var text = htmlToText.fromString('<h1>Hello World</h1>', {
    wordwrap: 130
});
console.log(text);

It also nicely parses tables to text.

发布评论

评论列表(0)

  1. 暂无评论