I have got a Node.JS server that requests data from two web servers: bbc.co.uk and sky. Then the RSS feeds are parsed, and a user sees two lists: from BBC and from sky.
Here is the code.
var feed = require('feed-read');
var http = require('http');
var async = require('async');
var request = require('request');
var LIMIT = 10;
var UNABLE_TO_CONNECT = "Unable to connect.";
var BBC_URL = '.xml';
var SKY_URL = '.xml';
var server = http.createServer(onRequest);
server.listen(9000);
function onRequest(req, res) {
res.writeHead(200, {
'Content-Type' : 'text/html; charset=utf-8'
});
async.parallel([ function(callback) {
feed(BBC_URL, onRssFetched);
// TODO: where to call callback()?
}, function(callback) {
feed(SKY_URL, onRssFetched);
// TODO: where to call callback()?
} ], function done(err, results) {
console.log("Done");
if (err) {
throw err;
}
});
}
function onRssFetched(err, articles) {
console.log("RSS fetched");
var html = [];
if (err) {
html.push("<p>", UNABLE_TO_CONNECT = "</p>");
} else {
html.push("<ol>");
var i = 0;
articles.forEach(function(entry) {
if (i == LIMIT) {
return;
}
html.push("<li><a href='" + entry.link + "'>" + entry.title
+ "</a></li>");
i++;
});
}
console.log(html.join(""));
}
Now I don't know how to add the result to the web page. If I call callback()
right after calling the feed
method, callback()
will be executed without waiting until feed
has pleted its job. On the other hand, I can't pass callback
to feed
. Maybe the approach is wrong, and I need some other module for RSS parsing.
I have got a Node.JS server that requests data from two web servers: bbc.co.uk and sky.. Then the RSS feeds are parsed, and a user sees two lists: from BBC and from sky.
Here is the code.
var feed = require('feed-read');
var http = require('http');
var async = require('async');
var request = require('request');
var LIMIT = 10;
var UNABLE_TO_CONNECT = "Unable to connect.";
var BBC_URL = 'http://feeds.bbci.co.uk/news/rss.xml';
var SKY_URL = 'http://news.sky./feeds/rss/home.xml';
var server = http.createServer(onRequest);
server.listen(9000);
function onRequest(req, res) {
res.writeHead(200, {
'Content-Type' : 'text/html; charset=utf-8'
});
async.parallel([ function(callback) {
feed(BBC_URL, onRssFetched);
// TODO: where to call callback()?
}, function(callback) {
feed(SKY_URL, onRssFetched);
// TODO: where to call callback()?
} ], function done(err, results) {
console.log("Done");
if (err) {
throw err;
}
});
}
function onRssFetched(err, articles) {
console.log("RSS fetched");
var html = [];
if (err) {
html.push("<p>", UNABLE_TO_CONNECT = "</p>");
} else {
html.push("<ol>");
var i = 0;
articles.forEach(function(entry) {
if (i == LIMIT) {
return;
}
html.push("<li><a href='" + entry.link + "'>" + entry.title
+ "</a></li>");
i++;
});
}
console.log(html.join(""));
}
Now I don't know how to add the result to the web page. If I call callback()
right after calling the feed
method, callback()
will be executed without waiting until feed
has pleted its job. On the other hand, I can't pass callback
to feed
. Maybe the approach is wrong, and I need some other module for RSS parsing.
- I'm confused, why isn't feed processing for BBC and SKY a separate request on server side? Can't you just make two different requests on client side and that should solve your problem right? – Anatoli Commented Nov 29, 2013 at 5:31
4 Answers
Reset to default 14 +50@Maksim I know your original question included the async module, but propose an alternative:
why not stream each article to the client as it es in rather than waiting for all RSS feeds to return before sending a response...?
By using async.parallel you are telling node:
"wait until we have a response from all these news services
and only then (bine the articles into) a single response to the client ..."
This uses up memory for each connected client while you wait for all responses (from the RSS news services) ... wasteful.
So I've written my answer without resorting to async.
And, instead of waiting for ages (while async bines all the feeds into one),
the client sees news as soon as the first rss feed returns!
var feed = require('feed-read'), // require the feed-read module
http = require("http"),
urls = [
"http://feeds.bbci.co.uk/news/rss.xml",
"http://news.sky./feeds/rss/home.xml",
"http://www.techmeme./feed.xml"
]; // Example RSS Feeds
http.createServer(function (req, res) {
// send basic http headers to client
res.writeHead(200, {
"Content-Type": "text/html",
"Transfer-Encoding": "chunked"
});
// setup simple html page:
res.write("<html>\n<head>\n<title>RSS Feeds</title>\n</head>\n<body>");
// loop through our list of RSS feed urls
for (var j = 0; j < urls.length; j++) {
// fetch rss feed for the url:
feed(urls[j], function(err, articles) {
// loop through the list of articles returned
for (var i = 0; i < articles.length; i++) {
// stream article title (and what ever else you want) to client
res.write("<h3>"+articles[i].title +"</h3>");
// check we have reached the end of our list of articles & urls
if( i === articles.length-1 && j === urls.length-1) {
res.end("</body>\n</html>"); // end http response
} // else still have rss urls to check
} // end inner for loop
}); // end call to feed (feed-read) method
} // end urls for loop
}).listen(9000);
Key Advantages:
- The people connecting to your app will see news/results Much faster (almost instantly!)
- Your app uses much less memory
- You don't have to edit/update any code when you add new RSS news feeds!
For even more detail/notes on this solution
see: https://github./nelsonic/node-parse-rss
No, you don't need another library. But what you need to do is to hand over callback
to your feed
function instead of onRssFetched
. This way the single RSS feeds are handed over to the final callback in your async.parallel
call, using the result
variable.
In this variable you then have access to both RSS feeds at the same time, and you can do whatever you want to do with them.
So, basically your logic needs to be:
async.parallel({
bbc: function (callback) {
feed(BBC_URL, callback);
},
sky: function (callback) {
feed(SKY_URL, callback);
}
}, function (err, result) {
if (err) {
// Somewhere, something went wrong…
}
var rssBbc = result.bbc,
rssSky = result.sky;
// Merge the two feeds or deliver them to the client or do
// whatever you want to do with them.
});
And that's it :-).
To amplify @nelsonic's answer (enough that I feel this warrants its own answer), feed-parse
already processes asynchronously. At its heart, it's still running on http.request. If you look at the code, you see that you can even pass in an array of URLs directly and it will loop through them, but it uses more of an "async.eachSeries" approach, where the next call only occurs after the previous one pletes, which appears not to be what you're looking for.
If you truly want to wait for calls to plete first before handling them, you're better off asynchronously buffering the data, then using underscore's _.after()
to run after all URLs have finished.
But odds are, what you really want to do (unless you're just looking for an example to try out async) is @nelsonic's answer.
I would ideally stream the rss data, instead of aggregating in memory. @nelsonic has explained the correct approach to solve this problem.
Still, if we were to make your code running, consider following code:
var util = require('util');
var http = require('http');
var async = require('async');
var feed = require('feed-read');
var request = require('request');
var LIMIT = 10;
var UNABLE_TO_CONNECT = 'Unable to connect.';
var BBC_URL = 'http://feeds.bbci.co.uk/news/rss.xml';
var SKY_URL = 'http://news.sky./feeds/rss/home.xml';
var server = http.createServer(onRequest);
server.listen(9000);
function onRequest(req, res) {
util.log('Request recieved!');
res.writeHead(200, {
'Content-Type': 'text/html; charset=utf-8'
});
async.parallel({
bbc: function (callback) {
feed(BBC_URL, function (err, articles) {
var html = onRssFetched(err, articles);
callback(err, html);
});
},
sky: function (callback) {
feed(SKY_URL, function (err, articles) {
var html = onRssFetched(err, articles);
callback(err, html);
});
}
}, done);
function done(err, results) {
util.log('Received results: ' + Object.keys(results).join(','));
if (!err && results) {
var entry, html;
for (entry in results) {
html = results[entry];
res.write(html.join(''));
}
util.log('Send plete!');
res.end();
} else {
console.log(err || 'no data in results');
res.end('Unable to process your request');
}
}
}
function onRssFetched(err, articles) {
// limit number of articles;
articles = articles.slice(0, LIMIT);
var html = [];
if (err) {
html.push('<p>', UNABLE_TO_CONNECT = '</p>');
} else {
html.push('<ol>');
articles.forEach(function (entry) {
html.push('<li><a href="' + entry.link + '">' + entry.title + '</a></li>');
});
html.push('</ol>');
}
return html;
}
// -- Test Code ---------------------------------------------------------
if (require.main === module) {
(function () {
var req, res = {
writeHead: console.log,
write: console.log,
end: console.log
};
// onRequest(req, res);
})();
}
Let me know if you face any issues.