最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

node.js - How to optimize process of reading files in node js - Stack Overflow

programmeradmin6浏览0评论

In my application I have apis which work with csv files, and files can be greater then 200MB, like there is a part where I find not only necessary rows, but also count all rows length

async getResultAsArray(slotId: number, page = 1, size = 10) {
    const fileInfo = await this.getResultFile(slotId);

    if (!fileInfo)
      return {
        length: 0,
        data: [],
      };

    const skip = (page - 1 < 1 ? 0 : page - 1) * size;
    const length = await this.csvService.getFileRowsCount(fileInfo.filePath);

    if (skip >= length) throw new BadRequestException('Index Out of Range');

    const res = await this.csvService.getFileManyRows(fileInfo.filePath, {
      withHeaders: true,
      size,
      skip,
    });

    return {
      length: length,
      data: res,
    };
  }

async getFileRowsCount(path: string) {
    const stream = fs.createReadStream(path);
    return this.getRowsCountByStream(stream);
  }

private getRowsCountByStream(stream: Readable) {
    return new Promise<number>((resolve, reject) => {
      let count = 0;

      stream
        .pipe(csv())
        .on('data', () => {
          ++count;
        })
        .on('end', () => {
          resolve(count);
        })
        .on('error', (error) => reject(error));
    });
  }

async getFileManyRows<T>(
    path: string,
    options: CsvOptions & {
      skip: number;
      size?: number;
    } = { skip: 0 },
  ) {
    return new Promise<T[]>((resolve, reject) => {
      if (options.size < 0 && options.skip < 0) {
        reject(new Error('Index is out of range'));
        return;
      }

      let i = 0;
      const result: T[] = [];

      const readStream = fs.createReadStream(path);

      readStream
        .pipe(
          csv({
            headers: options?.withHeaders ? undefined : false,
          }),
        )
        .on('data', (data) => {
          if (
            i >= options.skip &&
            (!options.size || result.length < options.size)
          )
            result.push(options?.withHeaders ? data : Object.values(data));

          if (options.size && result.length == options.size) {
            readStream.destroy();
            resolve(result);
          }

          ++i;
        })
        .on('end', () => {
          resolve(result);
        })
        .on('error', (error) => reject(error));
    });
  }

And I have api to get all files associated with user

async getResultsFromStorage(userId: number) {
    const slots = await this.userRepository
      .createQueryBuilder('user')
      .leftJoin('user.slots', 'slot')
      .select([
        'slot.id as "id"',
        'slot.name as "name"',
        'slot.s3Folder as "s3Folder"',
      ])
      .where('user.id = :userId', { userId })
      .orderBy('slot.id', 'DESC')
      .getRawMany();

    const filteredList: StorageFile[] = [];

    const userFolder = await this.getUserS3Folder(userId);

    for (const slot of slots) {
      try {
        const slotFolderPath = this.slotsService.getSlotFolderPath(
          userFolder,
          slot.s3Folder,
        );
        const files = await fsPromises.readdir(slotFolderPath);

        for (const file of files) {
          if (extname(file) === '.csv') {
            const filePath = join(slotFolderPath, file);
            const stats = await fsPromises.stat(filePath);

            if (stats.isFile())
              filteredList.push({
                name: file,
                path: `${userFolder}/${slot.s3Folder}/${file}`,
                size: stats.size,
                mtime: stats.mtime,
                extname: '.csv',
                slotId: slot.id,
                slotName: slot.name,
              });
          }
        }
      } catch (err) {}
    }

    return filteredList;
  }

Everywhere I use async methods and streams. The problem is that when I use the second api (to get user files), that's working fast, like 870ms, but when I fetch first api, which is working slowly because I need count all rows, and parallel fetching to get files, I get response after 6 seconds, how to optimize in right way??

发布评论

评论列表(0)

  1. 暂无评论