.= 'tag.htm'; break; case 'flag': $pre .= $default_pre .= 'flag.htm'; break; case 'my': $pre .= $default_pre .= 'my.htm'; break; case 'my_password': $pre .= $default_pre .= 'my_password.htm'; break; case 'my_bind': $pre .= $default_pre .= 'my_bind.htm'; break; case 'my_avatar': $pre .= $default_pre .= 'my_avatar.htm'; break; case 'home_article': $pre .= $default_pre .= 'home_article.htm'; break; case 'home_comment': $pre .= $default_pre .= 'home_comment.htm'; break; case 'user': $pre .= $default_pre .= 'user.htm'; break; case 'user_login': $pre .= $default_pre .= 'user_login.htm'; break; case 'user_create': $pre .= $default_pre .= 'user_create.htm'; break; case 'user_resetpw': $pre .= $default_pre .= 'user_resetpw.htm'; break; case 'user_resetpw_complete': $pre .= $default_pre .= 'user_resetpw_complete.htm'; break; case 'user_comment': $pre .= $default_pre .= 'user_comment.htm'; break; case 'single_page': $pre .= $default_pre .= 'single_page.htm'; break; case 'search': $pre .= $default_pre .= 'search.htm'; break; case 'operate_sticky': $pre .= $default_pre .= 'operate_sticky.htm'; break; case 'operate_close': $pre .= $default_pre .= 'operate_close.htm'; break; case 'operate_delete': $pre .= $default_pre .= 'operate_delete.htm'; break; case 'operate_move': $pre .= $default_pre .= 'operate_move.htm'; break; case '404': $pre .= $default_pre .= '404.htm'; break; case 'read_404': $pre .= $default_pre .= 'read_404.htm'; break; case 'list_404': $pre .= $default_pre .= 'list_404.htm'; break; default: $pre .= $default_pre .= theme_mode_pre(); break; } if ($config['theme']) { $conffile = APP_PATH . 'view/template/' . $config['theme'] . '/conf.json'; $json = is_file($conffile) ? xn_json_decode(file_get_contents($conffile)) : array(); } !empty($json['installed']) and $path_file = APP_PATH . 'view/template/' . $config['theme'] . '/htm/' . ($id ? $id . '_' : '') . $pre; (empty($path_file) || !is_file($path_file)) and $path_file = APP_PATH . 'view/template/' . $config['theme'] . '/htm/' . $pre; if (!empty($config['theme_child']) && is_array($config['theme_child'])) { foreach ($config['theme_child'] as $theme) { if (empty($theme) || is_array($theme)) continue; $path_file = APP_PATH . 'view/template/' . $theme . '/htm/' . ($id ? $id . '_' : '') . $pre; !is_file($path_file) and $path_file = APP_PATH . 'view/template/' . $theme . '/htm/' . $pre; } } !is_file($path_file) and $path_file = APP_PATH . ($dir ? 'plugin/' . $dir . '/view/htm/' : 'view/htm/') . $default_pre; return $path_file; } function theme_mode_pre($type = 0) { global $config; $mode = $config['setting']['website_mode']; $pre = ''; if (1 == $mode) { $pre .= 2 == $type ? 'portal_category.htm' : 'portal.htm'; } elseif (2 == $mode) { $pre .= 2 == $type ? 'flat_category.htm' : 'flat.htm'; } else { $pre .= 2 == $type ? 'index_category.htm' : 'index.htm'; } return $pre; } ?>python - How to sample Pandas DataFrame using a normal distribution by using random_state and numpy Generators - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

python - How to sample Pandas DataFrame using a normal distribution by using random_state and numpy Generators - Stack Overflow

programmeradmin0浏览0评论

I am trying to write Pandas code that would allow me to sample DataFrame using a normal distribution. The most convinient way is to use random_state parameter of the sample method to draw random samples, but somehow employ numpy.random.Generator.normal to draw random samples using a normal (Gaussian) distribution.

import pandas as pd
import numpy as np
import random

# Generate a list of unique random numbers
temp = random.sample(range(1, 101), 100)
df = pd.DataFrame({'temperature': temp})

# Sample normal
rng = np.random.default_rng()
triangle_df.sample(n=10, random_state=rng.normal())

This obviously doesn't work. There is an issue with random_state=rng.normal().

I am trying to write Pandas code that would allow me to sample DataFrame using a normal distribution. The most convinient way is to use random_state parameter of the sample method to draw random samples, but somehow employ numpy.random.Generator.normal to draw random samples using a normal (Gaussian) distribution.

import pandas as pd
import numpy as np
import random

# Generate a list of unique random numbers
temp = random.sample(range(1, 101), 100)
df = pd.DataFrame({'temperature': temp})

# Sample normal
rng = np.random.default_rng()
triangle_df.sample(n=10, random_state=rng.normal())

This obviously doesn't work. There is an issue with random_state=rng.normal().

Share Improve this question asked Jan 30 at 12:38 pjercicpjercic 4731 gold badge7 silver badges19 bronze badges 2
  • I don't think you understand correctly what is a random state. See stackoverflow/questions/28064634/… for an explanation. So we can help you further, could you mathematically define what drawing random samples using a normal distribution means in your case? – deep-learnt-nerd Commented Jan 30 at 12:51
  • I do understand Generators, but what I don't understand is why I cannot use a specific one - instead of the default uniform generator. Your link explains the case of reproducibility using pseudorandom numbers and seeds, which is not the issue in my question. – pjercic Commented Jan 30 at 20:54
Add a comment  | 

1 Answer 1

Reset to default 2

Passing a Generator to sample just changes the way the generator is initialized, it won't change the distribution that is used. Random sampling is uniform (choice is used internally [source]) and you can't change that directly with the random_state parameter.

Also note that normal sampling doesn't really make sense for discrete values (like the rows of a DataFrame).

Now let's assume that you want to sample the rows of your DataFrame in a non-uniform way (for example with weights that follow a normal distribution) you could use the weights parameter to pass custom weights for each row.

Here is an example with normal weights (although I'm not sure if this makes much sense):

rng = np.random.default_rng()
weights = abs(rng.normal(size=len(df)))

sampled = df.sample(n=10000, replace=True, weights=weights)

Another example based on this Q/A. Here we'll give higher probabilities to the rows from the middle of the DataFrame:

from scipy.stats import norm

N = len(df)
weights = norm.pdf(np.arange(N)-N//2, scale=5)
df.sample(n=10, weights=weights).sort_index()

Output (mostly rows around 50):

    temperature
43           94
44           50
47           80
48           99
50           63
51           52
52            1
53           20
54           41
63            3

Probabilities of sampling with a bias for the center (and sampled points):

与本文相关的文章

发布评论

评论列表(0)

  1. 暂无评论