最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

javascript - NodeJS Puppeteer setDownloadBehavior issue - Stack Overflow

programmeradmin1浏览0评论

I am trying to set custom path for downloading but chrome puts files into typical Downloads folder anyways.

const puppeteer = require('puppeteer');

(async () => {
   const browser = await puppeteer.launch({
      executablePath: 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe',
      defaultViewport: {
          width: 1920,
          height: 1080
      },
      headless: false,
      userDataDir: "./user_data"
   });

   const page = await browser.newPage();

   await page.goto(
     '',
      { waitUntil: 'domcontentloaded' },
   );

   await page._client.send('Page.setDownloadBehavior', {
         behavior: 'allow',
         downloadPath: 'C:/Users/Me/Downloads/custom/folder/'
   });

   console.log('Start downloading');

   await page.click('a.download-btn');

   await page.waitFor(5000);

   console.log('Complete');
   await browser.close();
})();

So, it ignores downloadPath option and puts the file to default C:/Users/Me/Downloads folder.

Also it does not wait for 5 seconds, it only has time to download the file and exits right after clicking download link.

Start downloading
Complete
(node:51016) UnhandledPromiseRejectionWarning: Error: WebSocket is not open: readyState 3 (CLOSED)
    at WebSocket.send (C:\Users\Me\Downloads\puppeteer\node_modules\ws\lib\websocket.js:329:19)
    at WebSocketTransport.send (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\WebSocketTransport.js:60:14)
    at Connection._rawSend (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Connection.js:86:21)
    at Connection.send (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Connection.js:72:21)
    at gracefullyCloseChrome (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Launcher.js:194:20)
    at Browser.close (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Browser.js:255:31)
    at Browser.<anonymous> (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\helper.js:112:23)
    at C:\Users\Me\Downloads\puppeteer\test-download-file.js:97:18
    at <anonymous>
(node:51016) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 3)
(node:51016) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

Why this script just exits after clicking download link and how to do it correctly? Is there any function for catching downloading status and monitor progress?

Any advice will be appreciated, thanks!

I am trying to set custom path for downloading but chrome puts files into typical Downloads folder anyways.

const puppeteer = require('puppeteer');

(async () => {
   const browser = await puppeteer.launch({
      executablePath: 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe',
      defaultViewport: {
          width: 1920,
          height: 1080
      },
      headless: false,
      userDataDir: "./user_data"
   });

   const page = await browser.newPage();

   await page.goto(
     'https://example./page-with-the-file-link',
      { waitUntil: 'domcontentloaded' },
   );

   await page._client.send('Page.setDownloadBehavior', {
         behavior: 'allow',
         downloadPath: 'C:/Users/Me/Downloads/custom/folder/'
   });

   console.log('Start downloading');

   await page.click('a.download-btn');

   await page.waitFor(5000);

   console.log('Complete');
   await browser.close();
})();

So, it ignores downloadPath option and puts the file to default C:/Users/Me/Downloads folder.

Also it does not wait for 5 seconds, it only has time to download the file and exits right after clicking download link.

Start downloading
Complete
(node:51016) UnhandledPromiseRejectionWarning: Error: WebSocket is not open: readyState 3 (CLOSED)
    at WebSocket.send (C:\Users\Me\Downloads\puppeteer\node_modules\ws\lib\websocket.js:329:19)
    at WebSocketTransport.send (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\WebSocketTransport.js:60:14)
    at Connection._rawSend (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Connection.js:86:21)
    at Connection.send (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Connection.js:72:21)
    at gracefullyCloseChrome (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Launcher.js:194:20)
    at Browser.close (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Browser.js:255:31)
    at Browser.<anonymous> (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\helper.js:112:23)
    at C:\Users\Me\Downloads\puppeteer\test-download-file.js:97:18
    at <anonymous>
(node:51016) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 3)
(node:51016) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

Why this script just exits after clicking download link and how to do it correctly? Is there any function for catching downloading status and monitor progress?

Any advice will be appreciated, thanks!

Share asked Jul 5, 2019 at 14:46 mr.incrediblemr.incredible 4,22510 gold badges44 silver badges80 bronze badges
Add a ment  | 

1 Answer 1

Reset to default 8

How to get around this with crutches

1) We should catch/wait for file which will appear in default 'Downloads' directory

function checkExistsWithTimeout(filePath, timeout) {
    return new Promise(function (resolve, reject) {

        var timer = setTimeout(function () {
            watcher.close();
            reject(new Error('File did not exists and was not created during the timeout.'));
        }, timeout);

        fs.access(filePath, fs.constants.R_OK, function (err) {
            if (!err) {
                clearTimeout(timer);
                watcher.close();
                resolve();
            }
        });

        var dir = path.dirname(filePath);
        var basename = path.basename(filePath);
        var watcher = fs.watch(dir, function (eventType, filename) {
            if (eventType === 'rename' && filename === basename) {
                clearTimeout(timer);
                watcher.close();
                resolve();
            }
        });
    });
}

Check file just in case (optionally)

function checkFile(path) {
    return new Promise(function (resolve, reject) {
        fs.access(path, fs.F_OK, (err) => {
            if (err) {
                reject(new Error(err));
            }
    
            //file exists
            console.log('File exists');
            resolve();
        });
    });
}

Move file wherever we need after download plete (optionally)

function moveFile(fromPath, toPath) {
    return new Promise(function (resolve, reject) {
        fs.rename(fromPath, toPath, function (err) {
            if (err) {
                reject(new Error('File did not move.'));
                throw err;
            } else {
                console.log('File moved');
                resolve();
            }
        });
    });
}

Example

const fs = require('fs');
const path = require('path');

const fileName = await page.evaluate(() => {
   return document.querySelector('.download-file-btn').textContent.trim();
});
await page.click('.download-file-btn');
await checkExistsWithTimeout('C:/Users/Me/Downloads/'+fileName, 10000);
await moveFile('C:/Users/Me/Downloads/'+fileName, 'C:/Users/me/Desktop/Videos/'+fileName);

2) Donwload file from url using 'request' package

function download(uri, filename, callback) {
    return new Promise(function (resolve, reject) {
        request.head(uri, function (err, res, body) {
            if (!err && res.statusCode == 200) {
                console.log('content-type:', res.headers['content-type']);
                console.log('content-length:', res.headers['content-length']);

                request(uri)
                .pipe(fs.createWriteStream(filename))
                .on('error', function(response) {
                    console.log(err);
                    reject(new Error(err));
                })
                .on('close', function() {
                    callback();
                    resolve();
                });
            } else {
                reject(new Error(err));
            }
        });
    });
}

Example

const videoSrc = await page.evaluate(() => {
     return document.querySelector('video.vjs-tech').src;
});

await download(videoSrc, "C:/Users/Me/Downloads/Videos/video.mp4", function() {
   console.log('downloaded');
});

It works for me without 'setDownloadBehavior' option, othervise it fails. Now I can click download button or download file directly from some URL, just by manipulating with filesystem with node little bit.

Hope it will help somebody.

发布评论

评论列表(0)

  1. 暂无评论
ok 不同模板 switch ($forum['model']) { /*case '0': include _include(APP_PATH . 'view/htm/read.htm'); break;*/ default: include _include(theme_load('read', $fid)); break; } } break; case '10': // 主题外链 / thread external link http_location(htmlspecialchars_decode(trim($thread['description']))); break; case '11': // 单页 / single page $attachlist = array(); $imagelist = array(); $thread['filelist'] = array(); $threadlist = NULL; $thread['files'] > 0 and list($attachlist, $imagelist, $thread['filelist']) = well_attach_find_by_tid($tid); $data = data_read_cache($tid); empty($data) and message(-1, lang('data_malformation')); $tidlist = $forum['threads'] ? page_find_by_fid($fid, $page, $pagesize) : NULL; if ($tidlist) { $tidarr = arrlist_values($tidlist, 'tid'); $threadlist = well_thread_find($tidarr, $pagesize); // 按之前tidlist排序 $threadlist = array2_sort_key($threadlist, $tidlist, 'tid'); } $allowpost = forum_access_user($fid, $gid, 'allowpost'); $allowupdate = forum_access_mod($fid, $gid, 'allowupdate'); $allowdelete = forum_access_mod($fid, $gid, 'allowdelete'); $access = array('allowpost' => $allowpost, 'allowupdate' => $allowupdate, 'allowdelete' => $allowdelete); $header['title'] = $thread['subject']; $header['mobile_link'] = $thread['url']; $header['keywords'] = $thread['keyword'] ? $thread['keyword'] : $thread['subject']; $header['description'] = $thread['description'] ? $thread['description'] : $thread['brief']; $_SESSION['fid'] = $fid; if ($ajax) { empty($conf['api_on']) and message(0, lang('closed')); $apilist['header'] = $header; $apilist['extra'] = $extra; $apilist['access'] = $access; $apilist['thread'] = well_thread_safe_info($thread); $apilist['thread_data'] = $data; $apilist['forum'] = $forum; $apilist['imagelist'] = $imagelist; $apilist['filelist'] = $thread['filelist']; $apilist['threadlist'] = $threadlist; message(0, $apilist); } else { include _include(theme_load('single_page', $fid)); } break; default: message(-1, lang('data_malformation')); break; } ?>