This is my dataframe:
df = pd.DataFrame({
'a': [0, 0, 1, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0]
})
Expected output is creating column b
:
a b
0 0 0
1 0 0
2 1 0
3 -1 1
4 -1 -1
5 0 -1
6 0 -1
7 0 -1
8 0 0
9 0 0
10 -1 0
11 0 -1
12 0 -1
13 1 -1
14 0 1
Logic:
I explain the logic by some examples:
I want to create column b to df
I want to have a window of three rows
for example for row number 3 I want to look at three previous rows and capture the last non 0 value. if all of the values are 0 then 'b' is 0. in this case the last non zero value is 1. so column b is 1
for example for row number 4 . The last non zero value is -1 so column b is -1
I want to do the same for all rows.
This is what I have tried so far. I think there must be a better way.
import pandas as pd
df = pd.DataFrame({
'a': [0, 0, 1, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0]
})
def last_nonzero(x):
# x is a pandas Series representing a window
nonzero = x[x != 0]
if not nonzero.empty:
# Return the last non-zero value in the window (i.e. the one closest to the current row)
return nonzero.iloc[-1]
return 0
# Shift by 1 so that the rolling window looks only at previous rows.
# Use a window size of 3 and min_periods=1 to allow early rows.
df['b'] = df['a'].shift(1).rolling(window=3, min_periods=1).apply(last_nonzero, raw=False).astype(int)
This is my dataframe:
df = pd.DataFrame({
'a': [0, 0, 1, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0]
})
Expected output is creating column b
:
a b
0 0 0
1 0 0
2 1 0
3 -1 1
4 -1 -1
5 0 -1
6 0 -1
7 0 -1
8 0 0
9 0 0
10 -1 0
11 0 -1
12 0 -1
13 1 -1
14 0 1
Logic:
I explain the logic by some examples:
I want to create column b to df
I want to have a window of three rows
for example for row number 3 I want to look at three previous rows and capture the last non 0 value. if all of the values are 0 then 'b' is 0. in this case the last non zero value is 1. so column b is 1
for example for row number 4 . The last non zero value is -1 so column b is -1
I want to do the same for all rows.
This is what I have tried so far. I think there must be a better way.
import pandas as pd
df = pd.DataFrame({
'a': [0, 0, 1, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0]
})
def last_nonzero(x):
# x is a pandas Series representing a window
nonzero = x[x != 0]
if not nonzero.empty:
# Return the last non-zero value in the window (i.e. the one closest to the current row)
return nonzero.iloc[-1]
return 0
# Shift by 1 so that the rolling window looks only at previous rows.
# Use a window size of 3 and min_periods=1 to allow early rows.
df['b'] = df['a'].shift(1).rolling(window=3, min_periods=1).apply(last_nonzero, raw=False).astype(int)
Share
Improve this question
asked Mar 19 at 10:04
AmirXAmirX
2,7492 gold badges14 silver badges35 bronze badges
5 Answers
Reset to default 4I don't think there is a much more straightforward approach. There is currently no rolling.last
method.
You could however simplify a bit your code:
def last_nonzero(s):
return 0 if (x:=s[s != 0]).empty else x.iloc[-1]
df['b'] = (df['a'].shift(1, fill_value=0)
.rolling(window=3, min_periods=1).apply(last_nonzero)
.convert_dtypes()
)
With a lambda:
df['b'] = (df['a'].shift(1, fill_value=0)
.rolling(window=3, min_periods=1)
.apply(lambda s: 0 if (x:=s[s != 0]).empty else x.iloc[-1])
.convert_dtypes()
)
Actually, if you have a range index, you could also use a merge_asof
on the indices:
window = 3
out = pd.merge_asof(
df,
df['a'].shift(1, fill_value=0).loc[lambda x: x != 0].rename('b'),
left_index=True,
right_index=True,
tolerance=window-1,
direction='backward',
).fillna({'b': 0})
Output:
a b
0 0 0
1 0 0
2 1 0
3 -1 1
4 -1 -1
5 0 -1
6 0 -1
7 0 -1
8 0 0
9 0 0
10 -1 0
11 0 -1
12 0 -1
13 1 -1
14 0 1
import pandas as pd
import torch
df = pd.DataFrame({
'a': [0, 0, 1, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0]
})
# Convert the 'a' column of the DataFrame to a PyTorch integer tensor.
atensor = torch.tensor(df['a'].values, dtype=torch.int32)
# Create a PyTorch tensor 'btensor' with the same shape as 'atensor', filled with zeros.
btensor = torch.zeros_like(atensor)
# Iterate through the 'atensor' starting from the 4th element (index 3).
for i in range(3, len(atensor)):
# Extract a window of 3 elements from 'atensor' preceding the current index 'i'.
win = atensor[i - 3: i]
# Select only the non-zero elements from the 'win' tensor.
nonzero = win[win != 0]
# Check if there are any non-zero elements in the 'nonzero' tensor.
if len(nonzero) > 0:
# If there are non-zero elements, assign the last non-zero element to 'btensor[i]'.
btensor[i] = nonzero[-1]
df['res'] = btensor.numpy()
print(df)
'''
a res
0 0 0
1 0 0
2 1 0
3 -1 1
4 -1 -1
5 0 -1
6 0 -1
7 0 -1
8 0 0
9 0 0
10 -1 0
11 0 -1
12 0 -1
13 1 -1
14 0 1
'''
import pandas as pd
import torch
df = pd.DataFrame({
'a': [0, 0, 1, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0]
})
# Convert the 'a' column to a PyTorch integer tensor
atensor = torch.tensor(df['a'].values, dtype=torch.int32)
a_len = len(atensor) # Store the length of the tensor for later use
# Create a column vector of indices from 0 to len(atensor) - 1
row_indices = torch.arange(len(atensor)).unsqueeze(1)
'''
row_indices :
tensor([[ 0],
[ 1],
[ 2],
[ 3],
[ 4],
[ 5],
[ 6],
[ 7],
[ 8],
[ 9],
[10],
[11],
[12],
[13],
[14]])
'''
# Create a row vector of offsets [1, 2, 3]
backward_steps = torch.arange(1, 4).unsqueeze(0)
'''
tensor([[1, 2, 3]])
'''
# Calculate indices for rolling windows of size 3
# Each row represents the indices of elements 1, 2, and 3 positions before the current element
idx = row_indices - backward_steps
'''
tensor([[-1, -2, -3],
[ 0, -1, -2],
[ 1, 0, -1],
[ 2, 1, 0],
[ 3, 2, 1],
[ 4, 3, 2],
[ 5, 4, 3],
[ 6, 5, 4],
[ 7, 6, 5],
[ 8, 7, 6],
[ 9, 8, 7],
[10, 9, 8],
[11, 10, 9],
[12, 11, 10],
[13, 12, 11]])
'''
# Clamp the indices to be non-negative (replace negative indices with 0)
idxClamp = torch.clamp(idx, 0)
'''
tensor([[ 0, 0, 0],
[ 0, 0, 0],
[ 1, 0, 0],
[ 2, 1, 0],
[ 3, 2, 1],
[ 4, 3, 2],
[ 5, 4, 3],
[ 6, 5, 4],
[ 7, 6, 5],
[ 8, 7, 6],
[ 9, 8, 7],
[10, 9, 8],
[11, 10, 9],
[12, 11, 10],
[13, 12, 11]])
'''
# Gather values from 'atensor' using the clamped indices to create rolling windows
win = atensor[idxClamp]
'''
tensor([[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 1, 0, 0],
[-1, 1, 0],
[-1, -1, 1],
[ 0, -1, -1],
[ 0, 0, -1],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[-1, 0, 0],
[ 0, -1, 0],
[ 0, 0, -1],
[ 1, 0, 0]], dtype=torch.int32)
'''
# Create a tensor where nonzero elements in 'win' are replaced with their
# reverse indices [3, 2, 1] and zero elements are replaced with -infinity
validIdx = torch.where(win != 0, torch.arange(3, 0, -1), float('-inf'))
# Find the index of the maximum value in each row of 'validIdx'
# This gives the index of the last nonzero element in each rolling window
last_nonzero_idx = validIdx.argmax(dim=1)
# Gather the last nonzero values from 'win' using the 'last_nonzero_idx'
last_nonzero_values = win[torch.arange(len(atensor)), last_nonzero_idx]
'''
tensor([ 0, 0, 0, 1, -1, -1, -1, -1, 0, 0, 0, -1, -1, -1, 1],
dtype=torch.int32)
'''
df['res'] = last_nonzero_values
print(df)
'''
a res
0 0 0
1 0 0
2 1 0
3 -1 1
4 -1 -1
5 0 -1
6 0 -1
7 0 -1
8 0 0
9 0 0
10 -1 0
11 0 -1
12 0 -1
13 1 -1
14 0 1
'''
Method in Numpy :
import numpy as np
import pandas as pd
df = pd.DataFrame({'a': [0, 0, 1, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0]})
# Extract the 'a' column as a NumPy array
aa = df['a'].values
# Create an array of offsets [1, 2, 3]
idx2 = np.arange(1, 4)
# Create a column vector of indices [0, 1, ..., 14]
# [:, None] adds a new dimension, making it a column
idx1 = np.arange(len(aa))[:, None]
# Calculate indices for rolling windows of size 3
# Each row represents the indices of elements 1, 2, and 3 positions before the current element
idx = idx1 - idx2
# Ensure indices are within the bounds of the array 'aa'
# Replace indices outside the valid range [0, len(aa) - 1] with the closest valid index
idx_within_bounds = np.clip(idx, 0, len(aa) - 1)
# Gather values from 'aa' using the adjusted indices to create rolling windows
win = aa[idx_within_bounds]
'''
win:
[[ 0 0 0]
[ 0 0 0]
[ 0 0 0]
[ 1 0 0]
[-1 1 0]
[-1 -1 1]
[ 0 -1 -1]
[ 0 0 -1]
[ 0 0 0]
[ 0 0 0]
[ 0 0 0]
[-1 0 0]
[ 0 -1 0]
[ 0 0 -1]
[ 1 0 0]]
'''
# Create a mask to identify nonzero elements in 'win'
mask = win != 0
# Create an array where nonzero elements in 'win' are replaced
# with their reverse indices [3, 2, 1] and zero elements
# are replaced with -infinity.
lastNonZeroIdx = np.where(mask, np.arange(3, 0, -1), -np.inf)
# Find the index of the maximum value in each row of 'lastNonZeroIdx'
# This gives the index of the last nonzero element in each rolling window
max_indices = lastNonZeroIdx.argmax(axis=1)
#[0 0 0 0 0 0 1 2 0 0 0 0 1 2 0]
# Gather the last nonzero values from 'win' using the 'max_indices'
lastNonZeroVals = win[np.arange(len(aa)), max_indices]
#[ 0 0 0 1 -1 -1 -1 -1 0 0 0 -1 -1 -1 1]
df['res'] = lastNonZeroVals
print(df)
'''
a res
0 0 0
1 0 0
2 1 0
3 -1 1
4 -1 -1
5 0 -1
6 0 -1
7 0 -1
8 0 0
9 0 0
10 -1 0
11 0 -1
12 0 -1
13 1 -1
14 0 1
'''
Here is an option:
df['a'].rolling(3).agg(lambda x: x.where(x.ne(0)).bfill().ffill().iloc[-1]).shift().fillna(0)
Output:
0 0.0
1 0.0
2 0.0
3 1.0
4 -1.0
5 -1.0
6 -1.0
7 -1.0
8 0.0
9 0.0
10 0.0
11 -1.0
12 -1.0
13 -1.0
14 1.0