te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>python - Convert dictionary of lists with entries as dictionaries into dataframe with top level key as additional column value i
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

python - Convert dictionary of lists with entries as dictionaries into dataframe with top level key as additional column value i

programmeradmin3浏览0评论

I have a dictionary of lists, each with a key string value (stock ticker) and value consisting of a list of dicts which looks like this:

data
Out[88]: 
{'NVDA': [{'open': 144.75, 'high': 144.21, 'low': 174.33, 'close': 210.47},
  {'open': 123.97, 'high': 128.5, 'low': 110.25, 'close': 154.09},
  {'open': 118.19, 'high': 134.81, 'low': 104.37, 'close': 149.72},
  {'open': 225.35, 'high': 126.81, 'low': 104.77, 'close': 209.46},
  {'open': 247.2, 'high': 243.25, 'low': 220.44, 'close': 186.01}],
 'MSFT': [{'open': 175.78, 'high': 213.98, 'low': 229.75, 'close': 206.59},
  {'open': 142.98, 'high': 168.42, 'low': 188.33, 'close': 232.52},
  {'open': 184.14, 'high': 163.42, 'low': 194.81, 'close': 153.03},
  {'open': 199.54, 'high': 130.26, 'low': 101.05, 'close': 102.1},
  {'open': 243.91, 'high': 119.21, 'low': 190.2, 'close': 223.31}],
 'AAPL': [{'open': 202.06, 'high': 162.54, 'low': 212.3, 'close': 226.78},
  {'open': 191.17, 'high': 153.49, 'low': 135.13, 'close': 151.83},
  {'open': 187.15, 'high': 149.75, 'low': 123.28, 'close': 247.32},
  {'open': 194.29, 'high': 175.34, 'low': 244.14, 'close': 207.45},
  {'open': 228.9, 'high': 133.26, 'low': 100.59, 'close': 129.35}]}


ticks = ['NVDA', 'MSFT', 'AAPL']
data = {}

for s in ticks:
    data[s] = []
    for _ in range(5):
        entry = {
            'open': round(random.uniform(100, 250), 2),
            'high': round(random.uniform(100, 250), 2),
            'low': round(random.uniform(100, 250), 2),
            'close': round(random.uniform(100, 250), 2)
        }
        data[s].append(entry)

I'd like to convert this to a dataframe which looks like this:

df
Out[98]: 
    tick    open    high     low   close
0   NVDA  215.44  124.29  121.61  244.35
1   NVDA  214.89  184.49  157.39  239.31
2   NVDA  221.42  204.17  148.83  215.00
3   NVDA  182.49  104.29  175.36  226.59
4   NVDA  127.31  182.31  228.92  173.52
5   MSFT  217.79  147.98  120.40  239.97
6   MSFT  108.66  222.83  177.20  172.62
7   MSFT  138.16  116.36  241.62  231.15
8   MSFT  160.53  234.88  154.93  127.49
9   MSFT  168.22  127.77  224.75  207.59
10  AAPL  119.95  106.36  150.28  195.93
11  AAPL  117.71  142.54  210.08  116.37
12  AAPL  147.07  204.46  223.98  104.91
13  AAPL  135.71  211.83  210.11  102.34
14  AAPL  216.45  136.08  130.27  236.48

I have a dictionary of lists, each with a key string value (stock ticker) and value consisting of a list of dicts which looks like this:

data
Out[88]: 
{'NVDA': [{'open': 144.75, 'high': 144.21, 'low': 174.33, 'close': 210.47},
  {'open': 123.97, 'high': 128.5, 'low': 110.25, 'close': 154.09},
  {'open': 118.19, 'high': 134.81, 'low': 104.37, 'close': 149.72},
  {'open': 225.35, 'high': 126.81, 'low': 104.77, 'close': 209.46},
  {'open': 247.2, 'high': 243.25, 'low': 220.44, 'close': 186.01}],
 'MSFT': [{'open': 175.78, 'high': 213.98, 'low': 229.75, 'close': 206.59},
  {'open': 142.98, 'high': 168.42, 'low': 188.33, 'close': 232.52},
  {'open': 184.14, 'high': 163.42, 'low': 194.81, 'close': 153.03},
  {'open': 199.54, 'high': 130.26, 'low': 101.05, 'close': 102.1},
  {'open': 243.91, 'high': 119.21, 'low': 190.2, 'close': 223.31}],
 'AAPL': [{'open': 202.06, 'high': 162.54, 'low': 212.3, 'close': 226.78},
  {'open': 191.17, 'high': 153.49, 'low': 135.13, 'close': 151.83},
  {'open': 187.15, 'high': 149.75, 'low': 123.28, 'close': 247.32},
  {'open': 194.29, 'high': 175.34, 'low': 244.14, 'close': 207.45},
  {'open': 228.9, 'high': 133.26, 'low': 100.59, 'close': 129.35}]}


ticks = ['NVDA', 'MSFT', 'AAPL']
data = {}

for s in ticks:
    data[s] = []
    for _ in range(5):
        entry = {
            'open': round(random.uniform(100, 250), 2),
            'high': round(random.uniform(100, 250), 2),
            'low': round(random.uniform(100, 250), 2),
            'close': round(random.uniform(100, 250), 2)
        }
        data[s].append(entry)

I'd like to convert this to a dataframe which looks like this:

df
Out[98]: 
    tick    open    high     low   close
0   NVDA  215.44  124.29  121.61  244.35
1   NVDA  214.89  184.49  157.39  239.31
2   NVDA  221.42  204.17  148.83  215.00
3   NVDA  182.49  104.29  175.36  226.59
4   NVDA  127.31  182.31  228.92  173.52
5   MSFT  217.79  147.98  120.40  239.97
6   MSFT  108.66  222.83  177.20  172.62
7   MSFT  138.16  116.36  241.62  231.15
8   MSFT  160.53  234.88  154.93  127.49
9   MSFT  168.22  127.77  224.75  207.59
10  AAPL  119.95  106.36  150.28  195.93
11  AAPL  117.71  142.54  210.08  116.37
12  AAPL  147.07  204.46  223.98  104.91
13  AAPL  135.71  211.83  210.11  102.34
14  AAPL  216.45  136.08  130.27  236.48
Share Improve this question asked Feb 17 at 19:35 ChrisChris 1,7004 gold badges19 silver badges29 bronze badges 3
  • Have you looked at pd.json_normalize()? – Barmar Commented Feb 17 at 19:48
  • @Barmar, hadn't, but doesn't appear to return what I need (gives tickers as column headers and associated list values as entries--not dissimilar to what pd.DataFrame() gives) – Chris Commented Feb 17 at 19:55
  • You only need a minor remodeling to be able to use json_normalize. See below. – mozway Commented Feb 18 at 2:41
Add a comment  | 

5 Answers 5

Reset to default 3

This is a simple transformation to do if you want to get it into a format the pd.DataFrame constructor understands (a list of dicts):

df = pd.DataFrame(
    [
        {"ticker":k, **v}
        for k, vs in data.items()
        for v in vs
    ]
)

This will require auxiliary memory though.

You can read this with json_normalize if you make the input a list of records:

df = pd.json_normalize([{'tick': k, 'data': v} for k, v in data.items()],
                       'data', meta='tick')

This should be relatively lightweight since the lists will be shared in memory with the original ones of data.

Output:

      open    high     low   close  tick
0   202.53  159.85  192.78  159.08  NVDA
1   161.14  165.17  189.66  155.31  NVDA
2   216.04  194.22  127.27  114.98  NVDA
3   204.64  137.89  103.44  111.93  NVDA
4   245.47  131.42  138.11  177.44  NVDA
5   197.37  140.20  190.76  180.82  MSFT
6   213.40  237.43  118.40  238.46  MSFT
7   127.91  192.21  186.09  221.07  MSFT
8   216.28  249.58  162.59  111.86  MSFT
9   100.44  149.07  223.15  185.34  MSFT
10  138.62  215.26  107.22  110.75  AAPL
11  188.77  104.89  193.78  183.34  AAPL
12  151.65  128.45  239.33  249.28  AAPL
13  151.82  142.17  241.76  134.61  AAPL
14  239.02  180.75  158.85  184.81  AAPL

Another option, chain and read the data, insert the tickers afterwards:

from itertools import chain
import numpy as np

df = pd.DataFrame(chain.from_iterable(data.values()))
df.insert(0, 'tick', np.repeat(list(data), [len(l) for l in  data.values()]))

Output:

    tick    open    high     low   close
0   NVDA  202.53  159.85  192.78  159.08
1   NVDA  161.14  165.17  189.66  155.31
2   NVDA  216.04  194.22  127.27  114.98
3   NVDA  204.64  137.89  103.44  111.93
4   NVDA  245.47  131.42  138.11  177.44
5   MSFT  197.37  140.20  190.76  180.82
6   MSFT  213.40  237.43  118.40  238.46
7   MSFT  127.91  192.21  186.09  221.07
8   MSFT  216.28  249.58  162.59  111.86
9   MSFT  100.44  149.07  223.15  185.34
10  AAPL  138.62  215.26  107.22  110.75
11  AAPL  188.77  104.89  193.78  183.34
12  AAPL  151.65  128.45  239.33  249.28
13  AAPL  151.82  142.17  241.76  134.61
14  AAPL  239.02  180.75  158.85  184.81

Another possible solution, which is also based on json_normalize:

out = pd.concat([pd.json_normalize(d[x]).assign(ticker = x) for x  in d])

If the ticker column really needs to be the first one, please use:

out[np.roll(out.columns, 1)]

Output:

  ticker    open    high     low   close
0   NVDA  144.75  144.21  174.33  210.47
1   NVDA  123.97  128.50  110.25  154.09
2   NVDA  118.19  134.81  104.37  149.72
3   NVDA  225.35  126.81  104.77  209.46
4   NVDA  247.20  243.25  220.44  186.01
0   MSFT  175.78  213.98  229.75  206.59
1   MSFT  142.98  168.42  188.33  232.52
2   MSFT  184.14  163.42  194.81  153.03
3   MSFT  199.54  130.26  101.05  102.10
4   MSFT  243.91  119.21  190.20  223.31
0   AAPL  202.06  162.54  212.30  226.78
1   AAPL  191.17  153.49  135.13  151.83
2   AAPL  187.15  149.75  123.28  247.32
3   AAPL  194.29  175.34  244.14  207.45
4   AAPL  228.90  133.26  100.59  129.35

Here is one option with pd.concat + pd.join

pd.concat(
    map(lambda x: pd.DataFrame({'tick':[x[0]]*len(x[1])})
        .join(pd.DataFrame(x[1])),
        data.items())).reset_index(drop = True)

or as suggested by @juanpa.arrivillaga in the comment to avoid np.join

(df := pd.concat(
    [pd.DataFrame(vs).assign(tick=k) for k, vs in data.items()], ignore_index=True
))[np.roll(df.columns,1)]

which gives

    tick    open    high     low   close
0   NVDA  144.75  144.21  174.33  210.47
1   NVDA  123.97  128.50  110.25  154.09
2   NVDA  118.19  134.81  104.37  149.72
3   NVDA  225.35  126.81  104.77  209.46
4   NVDA  247.20  243.25  220.44  186.01
5   MSFT  175.78  213.98  229.75  206.59
6   MSFT  142.98  168.42  188.33  232.52
7   MSFT  184.14  163.42  194.81  153.03
8   MSFT  199.54  130.26  101.05  102.10
9   MSFT  243.91  119.21  190.20  223.31
10  AAPL  202.06  162.54  212.30  226.78
11  AAPL  191.17  153.49  135.13  151.83
12  AAPL  187.15  149.75  123.28  247.32
13  AAPL  194.29  175.34  244.14  207.45
14  AAPL  228.90  133.26  100.59  129.35

One way to to it

import pandas as pd

data = {'NVDA': [{'open': 144.75, 'high': 144.21, 'low': 174.33, 'close': 210.47},
  {'open': 123.97, 'high': 128.5, 'low': 110.25, 'close': 154.09},
  {'open': 118.19, 'high': 134.81, 'low': 104.37, 'close': 149.72},
  {'open': 225.35, 'high': 126.81, 'low': 104.77, 'close': 209.46},
  {'open': 247.2, 'high': 243.25, 'low': 220.44, 'close': 186.01}],
 'MSFT': [{'open': 175.78, 'high': 213.98, 'low': 229.75, 'close': 206.59},
  {'open': 142.98, 'high': 168.42, 'low': 188.33, 'close': 232.52},
  {'open': 184.14, 'high': 163.42, 'low': 194.81, 'close': 153.03},
  {'open': 199.54, 'high': 130.26, 'low': 101.05, 'close': 102.1},
  {'open': 243.91, 'high': 119.21, 'low': 190.2, 'close': 223.31}],
 'AAPL': [{'open': 202.06, 'high': 162.54, 'low': 212.3, 'close': 226.78},
  {'open': 191.17, 'high': 153.49, 'low': 135.13, 'close': 151.83},
  {'open': 187.15, 'high': 149.75, 'low': 123.28, 'close': 247.32},
  {'open': 194.29, 'high': 175.34, 'low': 244.14, 'close': 207.45},
  {'open': 228.9, 'high': 133.26, 'low': 100.59, 'close': 129.35}]}

dfs = []
for k,v in data.items():
    for d in v:
        d['tick'] = k
        dfs.append(d)

df = pd.DataFrame(dfs, columns=['tick', 'open', 'high', 'low', 'close'])

print(df)

Result

    tick    open    high     low   close
0   NVDA  144.75  144.21  174.33  210.47
1   NVDA  123.97  128.50  110.25  154.09
2   NVDA  118.19  134.81  104.37  149.72
3   NVDA  225.35  126.81  104.77  209.46
4   NVDA  247.20  243.25  220.44  186.01
5   MSFT  175.78  213.98  229.75  206.59
6   MSFT  142.98  168.42  188.33  232.52
7   MSFT  184.14  163.42  194.81  153.03
8   MSFT  199.54  130.26  101.05  102.10
9   MSFT  243.91  119.21  190.20  223.31
10  AAPL  202.06  162.54  212.30  226.78
11  AAPL  191.17  153.49  135.13  151.83
12  AAPL  187.15  149.75  123.28  247.32
13  AAPL  194.29  175.34  244.14  207.45
14  AAPL  228.90  133.26  100.59  129.35

与本文相关的文章

发布评论

评论列表(0)

  1. 暂无评论