te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>python - Pandas DataFrame returning only 1 column after creating from a list - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

python - Pandas DataFrame returning only 1 column after creating from a list - Stack Overflow

programmeradmin3浏览0评论

I'm using something similar to this as input.txt

 040525 $$$$$   9999         12345
 040525 $$$$$   8888         12345
 040525 $$$$$   7777         12345
 040525 $$$$$   6666         12345

Due to the way this input is being pre-processed, I cannot correctly use pd.read_csv. I must first create a list from the input; Then, create a DataFrame from the list.

data_list = []
with open('input.txt', 'r') as data:
    for line in data:
        data_list.append(line.strip())
df = pd.DataFrame(data_list)

This results in each row being considered 1 column

print(df.shape)
print(df)
print(df.columns.tolist())

(4, 1)
                                   0
0  040525 $$$$$   9999         12345
1  040525 $$$$$   8888         12345
2  040525 $$$$$   7777         12345
3  040525 $$$$$   6666         12345
[0]

How can I create 4 columns in this DataFrame? Desired output would be:

(4, 4)
       a      b     c      d
0  40525  $$$$$  9999  12345
1  40525  $$$$$  8888  12345
2  40525  $$$$$  7777  12345
3  40525  $$$$$  6666  12345
['a', 'b', 'c', 'd']

I'm using something similar to this as input.txt

 040525 $$$$$   9999         12345
 040525 $$$$$   8888         12345
 040525 $$$$$   7777         12345
 040525 $$$$$   6666         12345

Due to the way this input is being pre-processed, I cannot correctly use pd.read_csv. I must first create a list from the input; Then, create a DataFrame from the list.

data_list = []
with open('input.txt', 'r') as data:
    for line in data:
        data_list.append(line.strip())
df = pd.DataFrame(data_list)

This results in each row being considered 1 column

print(df.shape)
print(df)
print(df.columns.tolist())

(4, 1)
                                   0
0  040525 $$$$$   9999         12345
1  040525 $$$$$   8888         12345
2  040525 $$$$$   7777         12345
3  040525 $$$$$   6666         12345
[0]

How can I create 4 columns in this DataFrame? Desired output would be:

(4, 4)
       a      b     c      d
0  40525  $$$$$  9999  12345
1  40525  $$$$$  8888  12345
2  40525  $$$$$  7777  12345
3  40525  $$$$$  6666  12345
['a', 'b', 'c', 'd']
Share Improve this question asked Feb 17 at 15:23 yodishyodish 8054 gold badges13 silver badges30 bronze badges 4
  • You already asked basically the same question the other day, receiving plenty of input, explaining that it shouldn't be a problem to do this with pd.read_csv. If you think it is a problem, that's of course fine, but then please edit your original question with more details and a proper minimal reproducible example so that users can run your code. This question has nothing new that should alter users' opinion. – ouroboros1 Commented Feb 17 at 15:31
  • With 'input.txt' as provided, pd.read_csv('input.txt', sep=r'\s+', names=[*'abcd']) should normally work perfectly fine. If it doesn't, please update the original question with the result of trying that, together with 1) any error message, 2) unexpected output vs expected output, 3) version of pd. – ouroboros1 Commented Feb 17 at 15:41
  • This question is similar to: looping over every line and appending to Pandas dataframe. If you believe it’s different, please edit the question, make it clear how it’s different and/or how the answers on that question are not helpful for your problem. – ouroboros1 Commented Feb 17 at 15:43
  • 1 @ouroboros1 yep, i'm doing that now. Thanks! – yodish Commented Feb 17 at 15:48
Add a comment  | 

1 Answer 1

Reset to default 1

In your loop, you should split the strings into a list of substrings for the fields:

for line in input_txt:
    data_list.append(line.strip().split())

This will give you the correct number of columns.

Alternatively, keep your loop as it is, but create a Series and str.split with expand=True. This might be less efficient, but could be more robust if you don't have a consistent number of fields:

data_list = []
with open('input.txt', 'r') as data:
    for line in data:
        data_list.append(line.strip())
df = pd.Series(data_list).str.split(expand=True)

Output:

        0      1     2      3
0  040525  $$$$$  9999  12345
1  040525  $$$$$  8888  12345
2  040525  $$$$$  7777  12345
3  040525  $$$$$  6666  12345

For the first approach, if you want column names:

df = pd.DataFrame(data_list, columns=['a', 'b', 'c', 'd'])

Output:

        a      b     c      d
0  040525  $$$$$  9999  12345
1  040525  $$$$$  8888  12345
2  040525  $$$$$  7777  12345
3  040525  $$$$$  6666  12345
发布评论

评论列表(0)

  1. 暂无评论