In the below dataframe, I'm trying to forward fill the Pos and Stop columns based on the following criteria:
- If (Prior
Pos== -1) & (CurrentHigh< PriorStop) - If (Prior
Pos== 1 ) & (CurrentLow> PriorStop)
Once either of these conditions is violated, then the values should remain unchanged until the next non-zero instance of Pos and Stop at which point the above criteria should again be evaluated.
import numpy as np
import pandas as pd
Open = {'Open':np.array([126.81999969, 126.55999756, 123.16000366, 125.23000336,127.81999969, 126.01000214, 127.81999969, 126.95999908,126.44000244, 125.56999969, 125.08000183, 124.27999878,124.68000031, 124.06999969, 126.16999817, 126.59999847,127.20999908])}
High = {'High': np.array([126.93000031, 126.98999786, 124.91999817, 127.72000122,128. , 127.94000244, 128.32000732, 127.38999939, 127.63999939, 125.80000305, 125.34999847, 125.23999786, 124.84999847, 126.16000366, 126.31999969, 128.46000671, 127.75 ])}
Low = {'Low' : np.array([125.16999817, 124.77999878, 122.86000061, 125.09999847,125.20999908, 125.94000244, 126.31999969, 126.41999817, 125.08000183, 124.55000305, 123.94000244, 124.05000305, 123.12999725, 123.84999847, 124.83000183, 126.20999908, 126.51999664])}
Close = {'Close': np.array([126.26999664, 124.84999847, 124.69000244, 127.30999756,125.43000031, 127.09999847, 126.90000153, 126.84999847, 125.27999878, 124.61000061, 124.27999878, 125.05999756, 123.54000092, 125.88999939, 125.90000153, 126.73999786, 127.12999725])}
Pos = {'Pos': np.array([ 0, 0, 1, 0, -1, 0, 0, 0, 0, -1, 0, 1, 0, 1, 0, 0, 0])}
Stop = {'Stop': np.array([ 0. , 0. , 122.86000061, 0. , 128. , 0. , 0. , 0. ,0. , 125.80000305, 0. , 124.05000305, 0. , 123.84999847, 0. , 0. , 0. ])}
index = pd.date_range('2022-1-1',periods = 17)
df = pd.DataFrame(dict(Open, **High, **Low, **Close, **Pos, **Stop), index = index)
df
Open High Low Close Pos Stop
Date
2022-01-01 126.82 126.93 125.17 126.27 0 0.00
2022-01-02 126.56 126.99 124.78 124.85 0 0.00
2022-01-03 123.16 124.92 122.86 124.69 1 122.86
2022-01-04 125.23 127.72 125.10 127.31 0 0.00
2022-01-05 127.82 128.00 125.21 125.43 -1 128.00
2022-01-06 126.01 127.94 125.94 127.10 0 0.00
2022-01-07 127.82 128.32 126.32 126.90 0 0.00
2022-01-08 126.96 127.39 126.42 126.85 0 0.00
2022-01-09 126.44 127.64 125.08 125.28 0 0.00
2022-01-10 125.57 125.80 124.55 124.61 -1 125.80
2022-01-11 125.08 125.35 123.94 124.28 0 0.00
2022-01-12 124.28 125.24 124.05 125.06 1 124.05
2022-01-13 124.68 124.85 123.13 123.54 0 0.00
2022-01-14 124.07 126.16 123.85 125.89 1 123.85
2022-01-15 126.17 126.32 124.83 125.90 0 0.00
2022-01-16 126.60 128.46 126.21 126.74 0 0.00
2022-01-17 127.21 127.75 126.52 127.13 0 0.00
The desired result is:
Open High Low Close Pos Stop
Date
2022-01-01 126.82 126.93 125.17 126.27 0 0.00
2022-01-02 126.56 126.99 124.78 124.85 0 0.00
2022-01-03 123.16 124.92 122.86 124.69 1 122.86
2022-01-04 125.23 127.72 125.10 127.31 1 122.86
2022-01-05 127.82 128.00 125.21 125.43 -1 128.00
2022-01-06 126.01 127.94 125.94 127.10 -1 128.00
2022-01-07 127.82 128.32 126.32 126.90 0 0.00
2022-01-08 126.96 127.39 126.42 126.85 0 0.00
2022-01-09 126.44 127.64 125.08 125.28 0 0.00
2022-01-10 125.57 125.80 124.55 124.61 -1 125.80
2022-01-11 125.08 125.35 123.94 124.28 -1 125.80
2022-01-12 124.28 125.24 124.05 125.06 1 124.05
2022-01-13 124.68 124.85 123.13 123.54 0 0.00
2022-01-14 124.07 126.16 123.85 125.89 1 123.85
2022-01-15 126.17 126.32 124.83 125.90 1 123.85
2022-01-16 126.60 128.46 126.21 126.74 1 123.85
2022-01-17 127.21 127.75 126.52 127.13 1 123.85
I've tried using the groupby and where mathods which produce a df that is close to the desired but does not keep the values unchanged for the subsequent rows in a group once the the criteria is breached.
s = df[['Pos','Stop']].mask(df['Stop'].eq(0)).ffill()
grouped = s.groupby(['Pos','Stop'])
df.update(grouped.apply(lambda g: g.where((s['Pos'] == 1) & (s['Stop'] <= df['Low']) | (s['Pos'] == -1) & (s['Stop'] >= df['High'])))
df
Open High Low Close Pos Stop
Date
2022-01-01 126.82 126.93 125.17 126.27 0.0 0.00
2022-01-02 126.56 126.99 124.78 124.85 0.0 0.00
2022-01-03 123.16 124.92 122.86 124.69 1.0 122.86
2022-01-04 125.23 127.72 125.10 127.31 1.0 122.86
2022-01-05 127.82 128.00 125.21 125.43 -1.0 128.00
2022-01-06 126.01 127.94 125.94 127.10 -1.0 128.00
2022-01-07 127.82 128.32 126.32 126.90 0.0 0.00
2022-01-08 126.96 127.39 126.42 126.85 -1.0 128.00
2022-01-09 126.44 127.64 125.08 125.28 -1.0 128.00
2022-01-10 125.57 125.80 124.55 124.61 -1.0 125.80
2022-01-11 125.08 125.35 123.94 124.28 -1.0 125.80
2022-01-12 124.28 125.24 124.05 125.06 1.0 124.05
2022-01-13 124.68 124.85 123.13 123.54 0.0 0.00
2022-01-14 124.07 126.16 123.85 125.89 1.0 123.85
2022-01-15 126.17 126.32 124.83 125.90 1.0 123.85
2022-01-16 126.60 128.46 126.21 126.74 1.0 123.85
2022-01-17 127.21 127.75 126.52 127.13 1.0 123.85
CodePudding user response:
I would suggest iterating over all rows and checking for both of the conditions. Unfortunately, I cannot reproduce your result as your code generates a different dataframe. Nonetheless, I think the following code does what you need:
import numpy as np
import pandas as pd
Open = {'Open':np.array([126.81999969, 126.55999756, 123.16000366, 125.23000336,127.81999969, 126.01000214, 127.81999969, 126.95999908,126.44000244, 125.56999969, 125.08000183, 124.27999878,124.68000031, 124.06999969, 126.16999817, 126.59999847,127.20999908])}
High = {'High': np.array([126.93000031, 126.98999786, 124.91999817, 127.72000122,128. , 127.94000244, 128.32000732, 127.38999939, 127.63999939, 125.80000305, 125.34999847, 125.23999786, 124.84999847, 126.16000366, 126.31999969, 128.46000671, 127.75 ])}
Low = {'Low' : np.array([125.16999817, 124.77999878, 122.86000061, 125.09999847,125.20999908, 125.94000244, 126.31999969, 126.41999817, 125.08000183, 124.55000305, 123.94000244, 124.05000305, 123.12999725, 123.84999847, 124.83000183, 126.20999908, 126.51999664])}
Close = {'Close': np.array([126.26999664, 124.84999847, 124.69000244, 127.30999756,125.43000031, 127.09999847, 126.90000153, 126.84999847, 125.27999878, 124.61000061, 124.27999878, 125.05999756, 123.54000092, 125.88999939, 125.90000153, 126.73999786, 127.12999725])}
Pos = {'Pos': np.array([ 0, 0, 1, 0, -1, 0, 0, 0, 0, -1, 0, 1, 0, 1, 0, 0, 0])}
Stop = {'Stop': np.array([ 0. , 0. , 122.86000061, 0. , 128. , 0. , 0. , 0. ,0. , 125.80000305, 0. , 124.05000305, 0. , 123.84999847, 0. , 0. , 0. ])}
index = pd.date_range('2022-1-1',periods = 17)
df = pd.DataFrame(dict(Open, **High, **Low, **Close, **Pos, **Stop), index = index)
# Produce iterable index
df = df.reset_index()
# Iterate over every row
for i in range(1, len(df)):
curr_pos = df.loc[i, 'Pos']
curr_stop = df.loc[i, 'Stop']
curr_low = df.loc[i, 'Low']
curr_high = df.loc[i, 'High']
prev_pos = df.loc[i-1, 'Pos']
prev_stop = df.loc[i-1, 'Stop']
# Check your conditions
if ((prev_pos == -1) and (curr_high > prev_stop)) or ((prev_pos == 1) and (curr_low < prev_stop)):
df.loc[i, 'Pos'] = prev_pos
df.loc[i, 'Stop'] = prev_stop
# Restore original index
df = df.set_index('index')
df
CodePudding user response:
Use the mask, where, groupby and cummin methods
s = df[['Pos','Stop']].mask(df['Stop'].eq(0)).ffill()
#-999 is necessary because apply drops NaNs
grouped = s.fillna(-999).groupby(['Pos','Stop'], dropna = False)
#Identify days where stop has been breached with a NaN
grouped1 = grouped.apply(
lambda g: g.where(
(s['Pos'] == 1) & (s['Stop'] <= df['Low']) |
(s['Pos'] == -1) & (s['Stop'] >= df['High'])
))
stop_breached = grouped1['Pos'].notna()
positions = s.assign(tmp = stop_breached).groupby(['Pos', 'Stop'])['tmp'].cummin().eq(1)
df[['Pos','Stop']] = grouped1.where(positions, 0)
print(df.round(2))
Open High Low Close Pos Stop
2022-01-01 126.82 126.93 125.17 126.27 0.0 0.00
2022-01-02 126.56 126.99 124.78 124.85 0.0 0.00
2022-01-03 123.16 124.92 122.86 124.69 1.0 122.86
2022-01-04 125.23 127.72 125.10 127.31 1.0 122.86
2022-01-05 127.82 128.00 125.21 125.43 -1.0 128.00
2022-01-06 126.01 127.94 125.94 127.10 -1.0 128.00
2022-01-07 127.82 128.32 126.32 126.90 0.0 0.00
2022-01-08 126.96 127.39 126.42 126.85 0.0 0.00
2022-01-09 126.44 127.64 125.08 125.28 0.0 0.00
2022-01-10 125.57 125.80 124.55 124.61 -1.0 125.80
2022-01-11 125.08 125.35 123.94 124.28 -1.0 125.80
2022-01-12 124.28 125.24 124.05 125.06 1.0 124.05
2022-01-13 124.68 124.85 123.13 123.54 0.0 0.00
2022-01-14 124.07 126.16 123.85 125.89 1.0 123.85
2022-01-15 126.17 126.32 124.83 125.90 1.0 123.85
2022-01-16 126.60 128.46 126.21 126.74 1.0 123.85
2022-01-17 127.21 127.75 126.52 127.13 1.0 123.85
