te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>matlab - need to vectorize efficiently calculating only certain values in the matrix multiplication A * B, using a logical array
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

matlab - need to vectorize efficiently calculating only certain values in the matrix multiplication A * B, using a logical array

programmeradmin3浏览0评论

I have matrices A (m by v) and B (v by n). I also have a logical matrix L (m by n).

I am interested in calculating only the values in A * B that correspond to logical values in L (values of 1s). Essentially I am interested in the quantity ( A * B ) .* L .

For my problem, a typical L matrix has less than 0.1% percent of its values as 1s; the vast majority of the values are 0s. Thus, it makes no sense for me to literally perform ( A * B ) .* L , it would actually be faster to loop over each row of A * B that I want to compute, but even that is inefficient.


Possible solution (need help vectorizing this code if possible)

My particular problem may have a nice solution given that the logical matrix L has a nice structure.

Here's an example of L for a very small scale example (in most applications L is much much bigger and has much fewer 1-yellow entries, and many more 0-blue entries).

This L matrix is nice in that it can be represented as something like a permuted block matrix. This L in particular is composed of 9 "blocks" of 1s, where each block of 1s has its own set of row and column indices, defining a particular submatrix in A * B. For instance, the highlighted area here can be seen the values of 1 as a particular submatrix in L.

My solution was to do this: break the problem into submatrices over these blocks, and do matrix multiplications over each submatrix. I can get the row indices and column indices per each block's submatrix in L, anized in two cell lists "rowidxs_list" and "colidxs_list", both with the number of cells equal to the number of blocks. For instance in the block example I gave, subblock 1, I could calculate those particular values in A * B by simply doing A( rowidxs_list{1} , : ) * B( : , colidxs_list{1} ) .

That means that if I precomputed rowidxs_list and colidxs_list (ignore the costs of calculating these lists, they are negligable for my application), then my problem of calculating C = ( A * B ) .* L could effectively be done by:

C = sparse( m,n )
for i = 1:length( rowidxs_list )
    C( rowidxs_list{i} , colidxs_list{i} ) = ...
        A( rowidxs_list{i} , : ) * B( : , colidxs_list{i} );
end

This seems like it would be the most efficient way to solve this problem if I knew how to vectorize this for loop. Does anyone see a way to vectorize this?

There may be ways to vectorize if certain things hold, e.g. only if rowidxs_list and colidxs_list are matrix arrays instead of cell lists of lists (where each column in an array is an index list, thus replacing use of rowidxs_list{i} with rowidxs_list(i,:) ). I'd prefer to use cell lists here if possible since different lists can have different numbers of elements.


other suggested solution (creating a mex file?)

I first posted this question on the /r/matlab subreddit, see here for the reddit thread. The user "qtac" recommended that a C-MEX function linking to C programming language:

My gut feeling is the only way to really optimize this is with a C-MEX solution; otherwise, you are going to get obliterated by overhead from subsref in these loops. With C you could loop over L until you find a nonzero element, and then do only the row-column dot product needed to populate that specific element. You will miss out on a lot of the BLAS optimizations but the computational savings may make up for it. Honestly I bet an LLM could write 90%+ of that MEX function for you; it's a well-formulated problem.

I think this could be a good solution to pursue, but I'd like other opinions as well.

I have matrices A (m by v) and B (v by n). I also have a logical matrix L (m by n).

I am interested in calculating only the values in A * B that correspond to logical values in L (values of 1s). Essentially I am interested in the quantity ( A * B ) .* L .

For my problem, a typical L matrix has less than 0.1% percent of its values as 1s; the vast majority of the values are 0s. Thus, it makes no sense for me to literally perform ( A * B ) .* L , it would actually be faster to loop over each row of A * B that I want to compute, but even that is inefficient.


Possible solution (need help vectorizing this code if possible)

My particular problem may have a nice solution given that the logical matrix L has a nice structure.

Here's an example of L for a very small scale example (in most applications L is much much bigger and has much fewer 1-yellow entries, and many more 0-blue entries).

This L matrix is nice in that it can be represented as something like a permuted block matrix. This L in particular is composed of 9 "blocks" of 1s, where each block of 1s has its own set of row and column indices, defining a particular submatrix in A * B. For instance, the highlighted area here can be seen the values of 1 as a particular submatrix in L.

My solution was to do this: break the problem into submatrices over these blocks, and do matrix multiplications over each submatrix. I can get the row indices and column indices per each block's submatrix in L, anized in two cell lists "rowidxs_list" and "colidxs_list", both with the number of cells equal to the number of blocks. For instance in the block example I gave, subblock 1, I could calculate those particular values in A * B by simply doing A( rowidxs_list{1} , : ) * B( : , colidxs_list{1} ) .

That means that if I precomputed rowidxs_list and colidxs_list (ignore the costs of calculating these lists, they are negligable for my application), then my problem of calculating C = ( A * B ) .* L could effectively be done by:

C = sparse( m,n )
for i = 1:length( rowidxs_list )
    C( rowidxs_list{i} , colidxs_list{i} ) = ...
        A( rowidxs_list{i} , : ) * B( : , colidxs_list{i} );
end

This seems like it would be the most efficient way to solve this problem if I knew how to vectorize this for loop. Does anyone see a way to vectorize this?

There may be ways to vectorize if certain things hold, e.g. only if rowidxs_list and colidxs_list are matrix arrays instead of cell lists of lists (where each column in an array is an index list, thus replacing use of rowidxs_list{i} with rowidxs_list(i,:) ). I'd prefer to use cell lists here if possible since different lists can have different numbers of elements.


other suggested solution (creating a mex file?)

I first posted this question on the /r/matlab subreddit, see here for the reddit thread. The user "qtac" recommended that a C-MEX function linking to C programming language:

My gut feeling is the only way to really optimize this is with a C-MEX solution; otherwise, you are going to get obliterated by overhead from subsref in these loops. With C you could loop over L until you find a nonzero element, and then do only the row-column dot product needed to populate that specific element. You will miss out on a lot of the BLAS optimizations but the computational savings may make up for it. Honestly I bet an LLM could write 90%+ of that MEX function for you; it's a well-formulated problem.

I think this could be a good solution to pursue, but I'd like other opinions as well.

Share Improve this question edited 2 days ago chtz 18.8k5 gold badges29 silver badges62 bronze badges asked 2 days ago CalCal 411 silver badge2 bronze badges New contributor Cal is a new contributor to this site. Take care in asking for clarification, commenting, and answering. Check out our Code of Conduct. 7
  • 2 Reposting Bill's comment from proving ground: "How big are m, n, and v? It will be hard to beat A*B. If you compute C = A*B you can index the values you seek with D = C(L) if L is logical, or D=C(logical(L)) if L is numeric containing 0s and 1s" – Wolfie Commented 2 days ago
  • 1 Is the pattern of your L matrix always periodic as in your example? I.e., could you compute L = repmat(L_block, [3, 4]) or similar? (Not 100% sure if that is what you mean by "permuted block matrix") – chtz Commented 2 days ago
  • rahnema1's comment from proving ground: "You may try GraphBLAS that contains masked matrix multiplication operators." Yes, "masked matrix multiplication" sounds like what this is. Thanks for the recommendation – Cal Commented 23 hours ago
  • Wolfie (Bill's comment from proving ground), pt 1: v can be very small to very big, e.g. anywhere from 20 to 20000. m and n also have very wide ranges, either one can be between 400 to 200,000. Assuming max values for m and n are 200,000, that means L can be at most a 40 billion entry sparse matrix. Additionally, the larger these L matrices are, the fewer the percentage of entries in it that are nonzero (e.g. in the max case of 40 billion entries, I'd assume that less than 1e-5 % or the entries of L are nonzero (400,000). – Cal Commented 23 hours ago
  • Wolfie (Bill's comment from proving ground), pt 2: "It will be hard to beat A * B. If you compute C = A * B you can index the values you seek with D = C(L) if L is logical, or D=C(logical(L)) if L is numeric containing 0s and 1s" I could not get D=C(L) to work for logical L as I received the error "Array indices must be positive integers or logical values." I think they mean D=C(find(L)) instead of D=(C(L)) which errors out. I tested and D=C(find(L)) is slower. (A * B) takes 18.8 sec, (A * B) .*L takes 28.5 sec, and D=C(find(L)) takes about 92 seconds. – Cal Commented 23 hours ago
 |  Show 2 more comments

1 Answer 1

Reset to default 5

The (i , j)-th entry of the result matrix C can be seen as the vector product, without conjugation, of the i-th row of A (transposed) and the j-th column of B. So you can get all pairs of indices (i , j) from L and do that in a vectorized way:

% Example data:
m = 3; n = 4; v = 9;
L = logical([0 0 1 0; 0 1 1 0; 1 0 0 0]); % m by n
A = rand(m, v);
B = rand(v, n);

% Computation:
[ii, jj] = find(L); % all pairs for which the result is needed
C = sparse(m, n); % preallocate as sparse
ind = ii + (jj-1)*m; % linear index from ii and jj
C(ind) = sum(A(ii,:).'.*B(:,jj), 1); % actual computation

If you only need a vector containing the relevant entries of C, instead of the whole matrix:

% Computation:
[ii, jj] = find(L); % all pairs for which the result is needed
ind = ii + (jj-1)*m; % linear index from ii and jj
c = sum(A(ii,:).'.*B(:,jj), 1); % actual computation

与本文相关的文章

发布评论

评论列表(0)

  1. 暂无评论