In my application I have apis which work with csv files, and files can be greater then 200MB, like there is a part where I find not only necessary rows, but also count all rows length
async getResultAsArray(slotId: number, page = 1, size = 10) {
const fileInfo = await this.getResultFile(slotId);
if (!fileInfo)
return {
length: 0,
data: [],
};
const skip = (page - 1 < 1 ? 0 : page - 1) * size;
const length = await this.csvService.getFileRowsCount(fileInfo.filePath);
if (skip >= length) throw new BadRequestException('Index Out of Range');
const res = await this.csvService.getFileManyRows(fileInfo.filePath, {
withHeaders: true,
size,
skip,
});
return {
length: length,
data: res,
};
}
async getFileRowsCount(path: string) {
const stream = fs.createReadStream(path);
return this.getRowsCountByStream(stream);
}
private getRowsCountByStream(stream: Readable) {
return new Promise<number>((resolve, reject) => {
let count = 0;
stream
.pipe(csv())
.on('data', () => {
++count;
})
.on('end', () => {
resolve(count);
})
.on('error', (error) => reject(error));
});
}
async getFileManyRows<T>(
path: string,
options: CsvOptions & {
skip: number;
size?: number;
} = { skip: 0 },
) {
return new Promise<T[]>((resolve, reject) => {
if (options.size < 0 && options.skip < 0) {
reject(new Error('Index is out of range'));
return;
}
let i = 0;
const result: T[] = [];
const readStream = fs.createReadStream(path);
readStream
.pipe(
csv({
headers: options?.withHeaders ? undefined : false,
}),
)
.on('data', (data) => {
if (
i >= options.skip &&
(!options.size || result.length < options.size)
)
result.push(options?.withHeaders ? data : Object.values(data));
if (options.size && result.length == options.size) {
readStream.destroy();
resolve(result);
}
++i;
})
.on('end', () => {
resolve(result);
})
.on('error', (error) => reject(error));
});
}
And I have api to get all files associated with user
async getResultsFromStorage(userId: number) {
const slots = await this.userRepository
.createQueryBuilder('user')
.leftJoin('user.slots', 'slot')
.select([
'slot.id as "id"',
'slot.name as "name"',
'slot.s3Folder as "s3Folder"',
])
.where('user.id = :userId', { userId })
.orderBy('slot.id', 'DESC')
.getRawMany();
const filteredList: StorageFile[] = [];
const userFolder = await this.getUserS3Folder(userId);
for (const slot of slots) {
try {
const slotFolderPath = this.slotsService.getSlotFolderPath(
userFolder,
slot.s3Folder,
);
const files = await fsPromises.readdir(slotFolderPath);
for (const file of files) {
if (extname(file) === '.csv') {
const filePath = join(slotFolderPath, file);
const stats = await fsPromises.stat(filePath);
if (stats.isFile())
filteredList.push({
name: file,
path: `${userFolder}/${slot.s3Folder}/${file}`,
size: stats.size,
mtime: stats.mtime,
extname: '.csv',
slotId: slot.id,
slotName: slot.name,
});
}
}
} catch (err) {}
}
return filteredList;
}
Everywhere I use async methods and streams. The problem is that when I use the second api (to get user files), that's working fast, like 870ms, but when I fetch first api, which is working slowly because I need count all rows, and parallel fetching to get files, I get response after 6 seconds, how to optimize in right way??