I'm working on a simple app using Node.js which needs to do the following when given a valid URL
- Retrieve the HTML of the remote page, save it locally.
- Spider the HTML (using cheerio) and record all JS and CSS file references.
- Make HTTP request for each JS/CSS file and save it to the server by file name.
- Zip up the html, css, and js files and stream the resulting file to the browser.
I've got 1 and 2 working, and the first half of #3 but I'm running into issues with the synchronous nature of the downloads. My code is running too fast and generating file names for the CSS and JS files, but none of the content. I'm guessing this is because my code isn't synchronous. The problem is that I don't know in advance how many files there might be and all of them have to be there before the ZIP file can be generated.
Here's the flow of my app as it currently exists. I've left out the helper methods as they don't affect synchronicity. Can any of you provide input as to what I should do?
http.get(fullurl, function(res) {
res.on('data', function (chunk) {
var $source = $(''+chunk),
js = getJS($source, domain),
css = getCSS($source, domain),
uniqueName = pw(),
dir = [baseDir,'jsd-', uniqueName, '/'].join(''),
jsdir = dir + 'js/',
cssdir = dir + 'css/',
html = rewritePaths($source);
// create tmp directory
fs.mkdirSync(dir);
console.log('creating index.html');
// save index file
fs.writeFileSync(dir + 'index.html', html);
// create js directory
fs.mkdirSync(jsdir);
// Save JS files
js.forEach(function(jsfile){
var filename = jsfile.split('/').reverse()[0];
request(jsfile).pipe(fs.createWriteStream(jsdir + filename));
console.log('creating ' + filename);
});
// create css directory
fs.mkdirSync(cssdir);
// Save CSS files
css.forEach(function(cssfile){
var filename = cssfile.split('/').reverse()[0];
request(cssfile).pipe(fs.createWriteStream(cssdir + filename));
console.log('creating ' + filename);
});
// write zip file to /tmp
writeZip(dir,uniqueName);
//
// /
});
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
I'm working on a simple app using Node.js which needs to do the following when given a valid URL
- Retrieve the HTML of the remote page, save it locally.
- Spider the HTML (using cheerio) and record all JS and CSS file references.
- Make HTTP request for each JS/CSS file and save it to the server by file name.
- Zip up the html, css, and js files and stream the resulting file to the browser.
I've got 1 and 2 working, and the first half of #3 but I'm running into issues with the synchronous nature of the downloads. My code is running too fast and generating file names for the CSS and JS files, but none of the content. I'm guessing this is because my code isn't synchronous. The problem is that I don't know in advance how many files there might be and all of them have to be there before the ZIP file can be generated.
Here's the flow of my app as it currently exists. I've left out the helper methods as they don't affect synchronicity. Can any of you provide input as to what I should do?
http.get(fullurl, function(res) {
res.on('data', function (chunk) {
var $source = $(''+chunk),
js = getJS($source, domain),
css = getCSS($source, domain),
uniqueName = pw(),
dir = [baseDir,'jsd-', uniqueName, '/'].join(''),
jsdir = dir + 'js/',
cssdir = dir + 'css/',
html = rewritePaths($source);
// create tmp directory
fs.mkdirSync(dir);
console.log('creating index.html');
// save index file
fs.writeFileSync(dir + 'index.html', html);
// create js directory
fs.mkdirSync(jsdir);
// Save JS files
js.forEach(function(jsfile){
var filename = jsfile.split('/').reverse()[0];
request(jsfile).pipe(fs.createWriteStream(jsdir + filename));
console.log('creating ' + filename);
});
// create css directory
fs.mkdirSync(cssdir);
// Save CSS files
css.forEach(function(cssfile){
var filename = cssfile.split('/').reverse()[0];
request(cssfile).pipe(fs.createWriteStream(cssdir + filename));
console.log('creating ' + filename);
});
// write zip file to /tmp
writeZip(dir,uniqueName);
// https://npmjs/package/node-zip
// http://stuk.github./jszip/
});
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
Share
Improve this question
edited Dec 8, 2013 at 21:59
Andrea
12.4k17 gold badges68 silver badges74 bronze badges
asked Mar 26, 2013 at 4:53
madelimitedmadelimited
5,1297 gold badges45 silver badges82 bronze badges
3
- 1 a) I dont think there is a way to do synchronous http request in node b) Doing all this synchronously would be a really bad idea – DeadAlready Commented Mar 26, 2013 at 7:00
- @DeadAlready: +1 for bad idea, though there's npmjs/package/httpsync – Bergi Commented Mar 26, 2013 at 9:25
- DeadAlready, the requirement is that all of the CSS/JS files referenced in index.html must be downloaded so that they can be zipped up. Whether it's done synchronously or asynchronously makes no difference when all files must be present before proceeding. – madelimited Commented Mar 26, 2013 at 13:18
1 Answer
Reset to default 7The way you are downloading file through request module is asynchronous
request(cssfile).pipe(fs.createWriteStream(cssdir + filename));
instead of download like that you need to do like this create a seperate function
function download (localFile, remotePath, callback) {
var localStream = fs.createWriteStream(localFile);
var out = request({ uri: remotePath });
out.on('response', function (resp) {
if (resp.statusCode === 200){
out.pipe(localStream);
localStream.on('close', function () {
callback(null, localFile);
});
}
else
callback(new Error("No file found at given url."),null);
})
};
you need to use async module by colan https://github./caolan/async for
// Save JS files
async.forEach(js,function(jsfile,cb){
var filename = jsfile.split('/').reverse()[0];
download(jsdir + filename,jsfile,function(err,result){
//handle error here
console.log('creating ' + filename);
cb();
})
},function(err){
// create css directory
fs.mkdirSync(cssdir);
// Save CSS files
css.forEach(function(cssfile){
var filename = cssfile.split('/').reverse()[0];
request(cssfile).pipe(fs.createWriteStream(cssdir + filename));
console.log('creating ' + filename);
});
// write zip file to /tmp
writeZip(dir,uniqueName);
});