te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>javascript - How do I extract data from a .tar.gz file (stored in the cloud) from a browser - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

javascript - How do I extract data from a .tar.gz file (stored in the cloud) from a browser - Stack Overflow

programmeradmin3浏览0评论

Problem

I am making a single page application that will be storing its data in one of the major cloud provider's blob storage (for example goggle cloud storage). The data in the cloud storage is a .tar.gz file, and I want to access this from a browser app .

Inside the tar file there will be hundreds of files, and I just want to get one of these files and render it into HTML. I can already load the file, it's just 'how do I get the data out of it'.

Unsurprisingly I am currently using typescript/javascript in the single page application, but that could change if the answer was 'do it this way'.

I'm not worried about browser patibility (I can specify things like 'only works in this browser), but the browser doesn't have access to a file system and I can't 'shell out' to the operating system

What I have tried

I've had a look for npm packages, and the closest I've e to is (but that seems to need a file system). I am reasonably confident working with streams, but it doesn't feel (after reviewing the documentation) that zlib will do what I want 'out of the box'. I didn't get a lot of hits from google searching: most just gave the same advice I would: 'shell out to the operating system and have that do it with tar', but I can't follow that advice in the browser

My alternative

If this doesn't work I will put a lambda/function in place to do the de-tarring. I like avoiding 'more moving parts' if I can in a project, but this might be needed.

Problem

I am making a single page application that will be storing its data in one of the major cloud provider's blob storage (for example goggle cloud storage). The data in the cloud storage is a .tar.gz file, and I want to access this from a browser app .

Inside the tar file there will be hundreds of files, and I just want to get one of these files and render it into HTML. I can already load the file, it's just 'how do I get the data out of it'.

Unsurprisingly I am currently using typescript/javascript in the single page application, but that could change if the answer was 'do it this way'.

I'm not worried about browser patibility (I can specify things like 'only works in this browser), but the browser doesn't have access to a file system and I can't 'shell out' to the operating system

What I have tried

I've had a look for npm packages, and the closest I've e to is https://github./npm/node-tar (but that seems to need a file system). I am reasonably confident working with streams, but it doesn't feel (after reviewing the documentation) that zlib will do what I want 'out of the box'. I didn't get a lot of hits from google searching: most just gave the same advice I would: 'shell out to the operating system and have that do it with tar', but I can't follow that advice in the browser

My alternative

If this doesn't work I will put a lambda/function in place to do the de-tarring. I like avoiding 'more moving parts' if I can in a project, but this might be needed.

Share Improve this question asked Dec 25, 2020 at 8:45 Stave EscuraStave Escura 2,0981 gold badge19 silver badges24 bronze badges
Add a ment  | 

3 Answers 3

Reset to default 9

The result should be achievable by using a bination of pako (a fast zlib JavaScript port) and js-untar:

<script src="pako.min.js"></script>
<script src="untar.js"></script>
<script>
fetch('test.tar.gz').then(res => res.arrayBuffer()) // Download gzipped tar file and get ArrayBuffer
                    .then(pako.inflate)             // Depress gzip using pako
                    .then(arr => arr.buffer)        // Get ArrayBuffer from the Uint8Array pako returns
                    .then(untar)                    // Untar
                    .then(files => {                // js-untar returns a list of files (See https://github./InvokIT/js-untar#file-object for details)
                        console.log(files);
                    });
</script>

test.tar.gz was made by running tar -czvf test.tar.gz test on a directory with 3 text files in it, to be able to check that both directories and files show up in the result.

Similar to @Lasse's answer but fewer dependencies and perf improvement:

  1. You can replace pako with Browser's built-in depression API.
  2. Piping fetch stream into depression stream, so you are depressing while fetching is still in progress.
  3. In addition, I remend tarballjs, which is a cleaner untar implementation in my opinion and has recent repo activity. It is so simple that you can pick up the maintenance if the author quits.

// CORS Anywhere is needed for downloading from GitHub. Visit https://cors-anywhere.herokuapp. for details
const fetchSampleBlob = () => fetch("https://cors-anywhere.herokuapp./https://github./ankitrohatgi/tarballjs/tarball/master", {headers: {"X-Requested-With": "https://github."}})

const fetchStreamToDepressionStream = (response) => response.body.pipeThrough(new DepressionStream("gzip"));

const depressionStreamToBlob = (depressedStream) => new Response(depressedStream).blob();

const blobToDir = (blob) => new tarball.TarReader().readFile(blob)

 

fetchSampleBlob()
  .then(fetchStreamToDepressionStream)
  .then(depressionStreamToBlob)
  .then(blobToDir)
  .then(console.log); // you should see a few files from the downloaded git repo tarball


/**
 * Output
 *
 * [
 *  {
 *    "name": "pax_global_header",
 *    "type": "g",
 *    "size": 52,
 *    "header_offset": 0
 *  },
 *  ...
 * ]
 */
<!-- This is served from author's server. You need to find a different host for performance and security reasons -->
<script src="https://arohatgi.info/tarballjs/tarball.js"></script>

I have tested the following code to be working just fine, using @gera2ld/tarjs and the native DepressionStream

import { TarReader } from '@gera2ld/tarjs'
async function testTar() {
  const metadataResponse = await axios.get<NpmRegistryMetadata>('https://registry.npmjs./lodash')
  const { data } = metadataResponse

  const distTags = data['dist-tags']
  if (!distTags) throw new Error('dist-tags not found')

  const latest = distTags.latest
  if (!latest) throw new Error('latest version not found')

  const tarballUrl = data.versions[latest].dist.tarball

  const tarballResponse = await axios.get<ArrayBuffer>(tarballUrl, { responseType: 'arraybuffer' })
  const { data: tarball } = tarballResponse
  if (!(tarball instanceof ArrayBuffer)) throw new Error('tarball data is not an ArrayBuffer')

  const dataStream = new ReadableStream({
    start(controller) {
      controller.enqueue(tarball)
      controller.close()
    },
  })

  const depressionStream = dataStream.pipeThrough<Uint8Array>(new DepressionStream('gzip'))
  console.log('depressionStream', depressionStream)

  const depressed = await new Response(depressionStream).arrayBuffer()
  console.log('depressed', depressed)

  const untared = await TarReader.load(depressed)
  console.log('untared', untared)
}
发布评论

评论列表(0)

  1. 暂无评论