ter = array_value($config, 'filter'); $arr = array_value($filter, $type); $enable = array_value($arr, 'enable'); $wordarr = array_value($arr, 'keyword'); if (0 == $enable || empty($wordarr)) return FALSE; foreach ($wordarr as $_keyword) { if (!$_keyword) continue; $r = strpos(strtolower($keyword), strtolower($_keyword)); if (FALSE !== $r) { $error = $_keyword; return TRUE; } } return FALSE; } // return http://domain.com OR https://domain.com function url_prefix() { $http = ((isset($_SERVER['HTTPS']) && 'on' == $_SERVER['HTTPS']) || (isset($_SERVER['HTTP_X_FORWARDED_PROTO']) && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https')) ? 'https://' : 'http://'; return $http . $_SERVER['HTTP_HOST']; } // 唯一身份ID function uniq_id() { return uniqid(substr(md5(microtime(true) . mt_rand(1000, 9999)), 8, 8)); } // 生成订单号 14位 function trade_no() { $trade_no = str_replace('.', '', microtime(1)); $strlen = mb_strlen($trade_no, 'UTF-8'); $strlen = 14 - $strlen; $str = ''; if ($strlen) { for ($i = 0; $i <= $strlen; $i++) { if ($i < $strlen) $str .= '0'; } } return $trade_no . $str; } // 生成订单号 16位 function trade_no_16() { $explode = explode(' ', microtime()); $trade_no = $explode[1] . mb_substr($explode[0], 2, 6, 'UTF-8'); return $trade_no; } // 当前年的天数 function date_year($time = NULL) { $time = intval($time) ? $time : time(); return date('L', $time) + 365; } // 当前年份中的第几天 function date_z($time = NULL) { $time = intval($time) ? $time : time(); return date('z', $time); } // 当前月份中的第几天,没有前导零 1 到 31 function date_j($time = NULL) { $time = intval($time) ? $time : time(); return date('j', $time); } // 当前月份中的第几天,有前导零的2位数字 01 到 31 function date_d($time = NULL) { $time = intval($time) ? $time : time(); return date('d', $time); } // 当前时间为星期中的第几天 数字表示 1表示星期一 到 7表示星期天 function date_w_n($time = NULL) { $time = intval($time) ? $time : time(); return date('N', $time); } // 当前日第几周 function date_d_w($time = NULL) { $time = intval($time) ? $time : time(); return date('W', $time); } // 当前几月 没有前导零1-12 function date_n($time = NULL) { $time = intval($time) ? $time : time(); return date('n', $time); } // 当前月的天数 function date_t($time = NULL) { $time = intval($time) ? $time : time(); return date('t', $time); } // 0 o'clock on the day function clock_zero() { return strtotime(date('Ymd')); } // 24 o'clock on the day function clock_twenty_four() { return strtotime(date('Ymd')) + 86400; } // 8点过期 / expired at 8 a.m. function eight_expired($time = NULL) { $time = intval($time) ? $time : time(); // 当前时间大于8点则改为第二天8点过期 $life = date('G') <= 8 ? (strtotime(date('Ymd')) + 28800 - $time) : clock_twenty_four() - $time + 28800; return $life; } // 24点过期 / expired at 24 a.m. function twenty_four_expired($time = NULL) { $time = intval($time) ? $time : time(); $twenty_four = clock_twenty_four(); $life = $twenty_four - $time; return $life; } /** * @param $url 提交地址 * @param string $post POST数组 / 空为GET获取数据 / $post='GET'获取连续跳转最终URL * @param string $cookie cookie * @param int $timeout 超时 * @param int $ms 设为1是毫秒 * @return mixed 返回数据 */ function https_request($url, $post = '', $cookie = '', $timeout = 30, $ms = 0) { if (empty($url)) return FALSE; if (version_compare(PHP_VERSION, '5.2.3', '<')) { $ms = 0; $timeout = 30; } is_array($post) and $post = http_build_query($post); // 没有安装curl 使用http的形式,支持post if (!extension_loaded('curl')) { //throw new Exception('server not install CURL'); if ($post) { return https_post($url, $post, $cookie, $timeout); } else { return http_get($url, $cookie, $timeout); } } is_array($cookie) and $cookie = http_build_query($cookie); $curl = curl_init(); // 返回执行结果,不输出 curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); //php5.5跟php5.6中的CURLOPT_SAFE_UPLOAD的默认值不同 if (class_exists('\CURLFile')) { curl_setopt($curl, CURLOPT_SAFE_UPLOAD, true); } else { defined('CURLOPT_SAFE_UPLOAD') and curl_setopt($curl, CURLOPT_SAFE_UPLOAD, false); } // 设定请求的RUL curl_setopt($curl, CURLOPT_URL, $url); // 设定返回信息中包含响应信息头 if (ini_get('safe_mode') && ini_get('open_basedir')) { // $post参数必须为GET if ('GET' == $post) { // 安全模式时将头文件的信息作为数据流输出 curl_setopt($curl, CURLOPT_HEADER, true); // 安全模式采用连续抓取 curl_setopt($curl, CURLOPT_NOBODY, true); } } else { curl_setopt($curl, CURLOPT_HEADER, false); // 允许跳转10次 curl_setopt($curl, CURLOPT_MAXREDIRS, 10); // 使用自动跳转,返回最后的Location curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); } $ua1 = 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'; $ua = empty($_SERVER["HTTP_USER_AGENT"]) ? $ua1 : $_SERVER["HTTP_USER_AGENT"]; curl_setopt($curl, CURLOPT_USERAGENT, $ua); // 兼容HTTPS if (FALSE !== stripos($url, 'https://')) { curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); //ssl版本控制 //curl_setopt($curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); curl_setopt($curl, CURLOPT_SSLVERSION, true); } $header = array('Content-type: application/x-www-form-urlencoded;charset=UTF-8', 'X-Requested-With: XMLHttpRequest'); $cookie and $header[] = "Cookie: $cookie"; curl_setopt($curl, CURLOPT_HTTPHEADER, $header); if ($post) { // POST curl_setopt($curl, CURLOPT_POST, true); // 自动设置Referer curl_setopt($curl, CURLOPT_AUTOREFERER, true); curl_setopt($curl, CURLOPT_POSTFIELDS, $post); } if ($ms) { curl_setopt($curl, CURLOPT_NOSIGNAL, true); // 设置毫秒超时 curl_setopt($curl, CURLOPT_TIMEOUT_MS, intval($timeout)); // 超时毫秒 } else { curl_setopt($curl, CURLOPT_TIMEOUT, intval($timeout)); // 秒超时 } //优先解析 IPv6 超时后IPv4 //curl_setopt($curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); curl_setopt($curl, CURLOPT_ENCODING, 'gzip'); // 返回执行结果 $output = curl_exec($curl); // 有效URL,输出URL非URL页面内容 CURLOPT_RETURNTRANSFER 必须为false 'GET' == $post and $output = curl_getinfo($curl, CURLINFO_EFFECTIVE_URL); curl_close($curl); return $output; } function save_image($img) { $ch = curl_init(); // 设定请求的RUL curl_setopt($ch, CURLOPT_URL, $img); // 设定返回信息中包含响应信息头 启用时会将头文件的信息作为数据流输出 //curl_setopt($ch, CURLOPT_HEADER, false); //curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER["HTTP_USER_AGENT"]); // true表示$html,false表示echo $html curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); //curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0); curl_setopt($ch, CURLOPT_ENCODING, 'gzip'); $output = curl_exec($ch); curl_close($ch); return $output; } // 计算字串宽度:剧中对齐(字体大小/字串内容/字体链接/背景宽度/倍数) function calculate_str_width($size, $str, $font, $width, $multiple = 2) { $box = imagettfbbox($size, 0, $font, $str); return ($width - $box[4] - $box[6]) / $multiple; } // 搜索目录下的文件 比对文件后缀 function search_directory($path) { if (is_dir($path)) { $paths = scandir($path); foreach ($paths as $val) { $sub_path = $path . '/' . $val; if ('.' == $val || '..' == $val) { continue; } else if (is_dir($sub_path)) { //echo '目录名:' . $val . '
'; search_directory($sub_path); } else { //echo ' 最底层文件: ' . $path . '/' . $val . '
'; $ext = strtolower(file_ext($sub_path)); if (in_array($ext, array('php', 'asp', 'jsp', 'cgi', 'exe', 'dll'), TRUE)) { echo '异常文件:' . $sub_path . '
'; } } } } } // 一维数组转字符串 $sign待签名字符串 $url为urlencode转码GET参数字符串 function array_to_string($arr, &$sign = '', &$url = '') { if (count($arr) != count($arr, 1)) throw new Exception('Does not support multi-dimensional array to string'); // 注销签名 unset($arr['sign']); // 排序 ksort($arr); reset($arr); // 转字符串做签名 $url = ''; $sign = ''; foreach ($arr as $key => $val) { if (empty($val) || is_array($val)) continue; $url .= $key . '=' . urlencode($val) . '&'; $sign .= $key . '=' . $val . '&'; } $url = substr($url, 0, -1); $url = htmlspecialchars($url); $sign = substr($sign, 0, -1); } // 私钥生成签名 function rsa_create_sign($data, $key, $sign_type = 'RSA') { if (!function_exists('openssl_sign')) throw new Exception('OpenSSL extension is not enabled'); if (!defined('OPENSSL_ALGO_SHA256')) throw new Exception('Only versions above PHP 5.4.8 support SHA256'); $key = wordwrap($key, 64, "\n", true); if (FALSE === $key) throw new Exception('Private Key Error'); $key = "-----BEGIN RSA PRIVATE KEY-----\n$key\n-----END RSA PRIVATE KEY-----"; if ('RSA2' == $sign_type) { openssl_sign($data, $sign, $key, OPENSSL_ALGO_SHA256); } else { openssl_sign($data, $sign, $key, OPENSSL_ALGO_SHA1); } // 加密 return base64_encode($sign); } // 公钥验证签名 function rsa_verify_sign($data, $sign, $key, $sign_type = 'RSA') { $key = wordwrap($key, 64, "\n", true); if (FALSE === $key) throw new Exception('Public Key Error'); $key = "-----BEGIN PUBLIC KEY-----\n$key\n-----END PUBLIC KEY-----"; // 签名正确返回1 签名不正确返回0 错误-1 if ('RSA2' == $sign_type) { $result = openssl_verify($data, base64_decode($sign), $key, OPENSSL_ALGO_SHA256); } else { $result = openssl_verify($data, base64_decode($sign), $key, OPENSSL_ALGO_SHA1); } return $result === 1; } // Array to xml array('appid' => 'appid', 'code' => 'success') function array_to_xml($arr) { if (!is_array($arr) || empty($arr)) throw new Exception('Array Error'); $xml = ""; foreach ($arr as $key => $val) { if (is_numeric($val)) { $xml .= "<" . $key . ">" . $val . ""; } else { $xml .= "<" . $key . ">"; } } $xml .= ""; return $xml; } // Xml to array function xml_to_array($xml) { if (!$xml) throw new Exception('XML error'); $old = libxml_disable_entity_loader(true); // xml解析 $result = (array)simplexml_load_string($xml, null, LIBXML_NOCDATA | LIBXML_COMPACT); // 恢复旧值 if (FALSE === $old) libxml_disable_entity_loader(false); return $result; } // 逐行读取 function well_import($file) { if ($handle = fopen($file, 'r')) { while (!feof($handle)) { yield trim(fgets($handle)); } fclose($handle); } } // 计算总行数 function well_import_total($file, $key = 'well_import_total') { static $cache = array(); if (isset($cache[$key])) return $cache[$key]; $count = cache_get($key); if (NULL === $count) { $count = 0; $globs = well_import($file); while ($globs->valid()) { ++$count; $globs->next(); // 指向下一个 } $count and cache_set($key, $count, 300); } return $cache[$key] = $count; } $g_dir_file = FALSE; function well_search_dir($path) { global $g_dir_file; FALSE === $g_dir_file and $g_dir_file = array(); if (is_dir($path)) { $paths = scandir($path); foreach ($paths as $val) { $sub_path = $path . '/' . $val; if ('.' == $val || '..' == $val) { continue; } else if (is_dir($sub_path)) { well_search_dir($sub_path); } else { $g_dir_file[] = $sub_path; } } } return $g_dir_file; } ?>gcc - Standard way of calling math functions in C when using OpenMP &amp; its offloading feature(s)? - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

gcc - Standard way of calling math functions in C when using OpenMP &amp; its offloading feature(s)? - Stack Overflow

programmeradmin2浏览0评论

I am writing some code in C in which I want to add the optional ability to have certain sections of the code accelerated using OpenMP, and with an additional optional ability to have them accelerated with devices such as GPUs. For example, my matrix multiplication function leverages GPU acceleration:

/* ... */
int numeric_matmul(const float_t *pt_a, const float_t *pt_b, float_t *pt_c, uintmax_t t_m, uintmax_t t_k, uintmax_t t_n)
{
#ifdef _OPENMP
#pragma omp target teams distribute parallel for collapse(2) schedule(dynamic) map(to: pt_a[0 : t_m * t_k], pt_b[0 : t_k * t_n]) map(from: pt_c[0 : t_m * t_n])
#endif
    for(uintmax_t l_i = 0; l_i < t_m; l_i++)
    {
        for(uintmax_t l_j = 0; l_j < t_n; l_j++)
        {
/* Compute the sum. */
            float_t l_sum = 0.0;
            for(uintmax_t l_p = 0; l_p < t_k; l_p++) l_sum += pt_a[l_i * t_k + l_p] * pt_b[l_p * t_n + l_j];

/* Store the result. */
            pt_c[l_i * t_n + l_j] = l_sum;
        }
    }

/* Return with success. */
    return 0;
}

And it works fine. However, when I try to use common mathematics-related functions (from math.h), I face an obstacle; how can I use those functions for both the CPU and the GPU? I have this function here:

/**
 *  @brief Perform the sigmoid function on a value.
 *  @param t_x The input value.
 *  @param pt_y The output value.
 *  @return The result status code. In this case, it'll always return 0.
 */
static inline int numeric_sigmoid(float_t t_x, float_t *pt_y)
{
/* Set the output value to the sigmoid of the input value. */
    *pt_y = 1.0 / (1.0 + expf(-t_x));

/* Return with success. */
    return 0;
}

Which relies on the expf function. I want this function to be both capable of being run on the CPU, and the GPU. It runs fine on the CPU-side of my codebase, but as soon as I try leveraging the GPU's power:

#pragma omp target teams distribute parallel for schedule(dynamic) map(to: pt_feedforward->ppt_hidden_layer_bias_buffer[l_i][0 : l_next_layer_activation_buffer_size]) map(from: pl_next_layer_activation_buffer[0 : l_next_layer_activation_buffer_size])
for(uintmax_t l_j = 0; l_j < l_next_layer_activation_buffer_size; l_j++)
{
    pl_next_layer_activation_buffer[l_j] += pt_feedforward->ppt_hidden_layer_bias_buffer[l_i][l_j];
    numeric_sigmoid(pl_next_layer_activation_buffer[l_j], &pl_next_layer_activation_buffer[l_j]);
}

I face a runtime error:

libgomp: pointer target not mapped for attach

I get GCC to compile with GPU offloading using my NVIDIA card by telling CMake to use some additional parameters:

cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_C_FLAGS="-fopenmp -foffload=nvptx-none -foffload-options=-misa=sm_80 -fcf-protection=none -fno-stack-protector -no-pie" ..

I also tried using -ffast-math, but that didn't seem to do anything either...

Does anyone here know how to properly accomplish the use of math.h & it's features on GPU-accelerated contexts as well when dealing with OpenMP's GPU offloading feature?

I am writing some code in C in which I want to add the optional ability to have certain sections of the code accelerated using OpenMP, and with an additional optional ability to have them accelerated with devices such as GPUs. For example, my matrix multiplication function leverages GPU acceleration:

/* ... */
int numeric_matmul(const float_t *pt_a, const float_t *pt_b, float_t *pt_c, uintmax_t t_m, uintmax_t t_k, uintmax_t t_n)
{
#ifdef _OPENMP
#pragma omp target teams distribute parallel for collapse(2) schedule(dynamic) map(to: pt_a[0 : t_m * t_k], pt_b[0 : t_k * t_n]) map(from: pt_c[0 : t_m * t_n])
#endif
    for(uintmax_t l_i = 0; l_i < t_m; l_i++)
    {
        for(uintmax_t l_j = 0; l_j < t_n; l_j++)
        {
/* Compute the sum. */
            float_t l_sum = 0.0;
            for(uintmax_t l_p = 0; l_p < t_k; l_p++) l_sum += pt_a[l_i * t_k + l_p] * pt_b[l_p * t_n + l_j];

/* Store the result. */
            pt_c[l_i * t_n + l_j] = l_sum;
        }
    }

/* Return with success. */
    return 0;
}

And it works fine. However, when I try to use common mathematics-related functions (from math.h), I face an obstacle; how can I use those functions for both the CPU and the GPU? I have this function here:

/**
 *  @brief Perform the sigmoid function on a value.
 *  @param t_x The input value.
 *  @param pt_y The output value.
 *  @return The result status code. In this case, it'll always return 0.
 */
static inline int numeric_sigmoid(float_t t_x, float_t *pt_y)
{
/* Set the output value to the sigmoid of the input value. */
    *pt_y = 1.0 / (1.0 + expf(-t_x));

/* Return with success. */
    return 0;
}

Which relies on the expf function. I want this function to be both capable of being run on the CPU, and the GPU. It runs fine on the CPU-side of my codebase, but as soon as I try leveraging the GPU's power:

#pragma omp target teams distribute parallel for schedule(dynamic) map(to: pt_feedforward->ppt_hidden_layer_bias_buffer[l_i][0 : l_next_layer_activation_buffer_size]) map(from: pl_next_layer_activation_buffer[0 : l_next_layer_activation_buffer_size])
for(uintmax_t l_j = 0; l_j < l_next_layer_activation_buffer_size; l_j++)
{
    pl_next_layer_activation_buffer[l_j] += pt_feedforward->ppt_hidden_layer_bias_buffer[l_i][l_j];
    numeric_sigmoid(pl_next_layer_activation_buffer[l_j], &pl_next_layer_activation_buffer[l_j]);
}

I face a runtime error:

libgomp: pointer target not mapped for attach

I get GCC to compile with GPU offloading using my NVIDIA card by telling CMake to use some additional parameters:

cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_C_FLAGS="-fopenmp -foffload=nvptx-none -foffload-options=-misa=sm_80 -fcf-protection=none -fno-stack-protector -no-pie" ..

I also tried using -ffast-math, but that didn't seem to do anything either...

Does anyone here know how to properly accomplish the use of math.h & it's features on GPU-accelerated contexts as well when dealing with OpenMP's GPU offloading feature?

Share Improve this question edited Mar 31 at 17:22 Marco Bonelli 69.7k21 gold badges127 silver badges146 bronze badges asked Mar 30 at 19:01 Matthew G.Matthew G. 1241 silver badge9 bronze badges 4
  • Few side notes: matrix multiplication is implemented on recent (Nvidia) GPUs and will certainly not be used by the kernel. MM units are generally much faster. 1.0 / (1.0 + expf(-t_x)); certainly uses the double precision for the computation so you should use 1.0f / (1.0f + expf(-t_x)); instead. Double precision is generally (much) slower on GPU (especially on client-side ones). – Jérôme Richard Commented Mar 30 at 19:36
  • Which gcc version do you use? According to the gcc wiki, you need to specify -foffload=-lm for older gcc versions: gcc.gnu./wiki/Offloading – Joachim Commented Mar 30 at 19:54
  • @JérômeRichard Thanks for the suggestion! I didn't know NVIDIA supported matrix multiplication as an independent operator but I'll certainly look into it! Thanks for the suggestion with fixing the double-precision problem as well, I didn't notice that! – Matthew G. Commented Mar 30 at 20:21
  • @Joachim I am using GCC 14. Specifically, 14.2.1. – Matthew G. Commented Mar 30 at 20:21
Add a comment  | 

2 Answers 2

Reset to default 2

From OpenMP spec perspective, calling functions from the math library is not different from calling any other function.

To call a function from within a target region, the function must be declared as target function (#pragma omp declare target) and also compiled for the targeted device.

Some math functions might have a specialized instruction or implementation available for the targeted architecture. In such case, the compiler would replace the function call by calling the specialized implementation. For math functions, most OpenMP implementations ship libraries compiled for the targeted architecture. Like any other library you want to use from an application code, you also need to link the math library (-lm). I think, that many OpenMP programmers are not used to explicitly link the math library, when needed, because for a long time gcc's -fopenmp implicitly linked the math library.

With gcc-13, you need to link with -lm -foffload=-lm. With gcc-14, it is sufficient to link with -lm, the offloading math library is implied then.

This looks like neural network code. There's a rather fundamental problem that you are completely overlooking here. GPU's physically have their own memory. If you are trying to execute operations on CPU and GPU, modern GPU's can hide some of the complexity, but you simply cannot hide the time it physically takes to move data between different memories.

Hence, what you absolutely want to do is to execute one big chunk of operations on the GPU. That means you copy the raw input in, and copy the output out. Leave the CPU for things like file reading, things that are not mathematically hard.

Specifically, use CUDA/CuDNN for all operations that they directly implement. Your OpenMP code is never going to be nearly as fast.

与本文相关的文章

发布评论

评论列表(0)

  1. 暂无评论