I have a directory that has 72 folders, and within each folder are 245 numpy files. The directory is called MP_Data. I would like to read all the 245 numpy files within each folder in any order. However, the loop through the folders should be in numerical order.
The 72 folders are named as follows: (And I would like to keep it that way)
r0
r5
r10
r15
r20
r25
r30
r35
r40
r45
r50
r55
r60
r65
r70
r75
r80
r85
r90
r95
r100
r105
r110
r115
r120
r125
r130
r135
r140
r145
r150
r155
r160
r165
r170
r175
r180
r185
r190
r195
r200
r205
r210
r215
r220
r225
r230
r235
r240
r245
r250
r255
r260
r265
r270
r275
r280
r285
r290
r295
r300
r305
r310
r315
r320
r325
r330
r335
r340
r345
r350
r355
Here is what I have tried so far, but doesnt seem to work
sequences, labels = [], []
directory_1= r'C:\Users\.....\angle-model\MP_DATA'
direc=[]
for root, subdirectories, filenames in (sorted(os.walk(directory_1))):
subdirectories=natsorted(subdirectories)
for dirs in subdirectories:
direc.append(dirs)
direc=np.array(direc)
for directory in direc:
directory_2=(os.path.join(directory_1, directory))
for action in actions:
window = []
for root, subdirectories, filenames in (sorted(os.walk(directory_2))):
filenames = natsorted(filenames)
for filename in filenames:
res=np.load(os.path.join(directory_2, filename))
window.append(res)
sequences.append(window)
labels.append(label_map[action])
The above code is stuck within the same folder and doesnt move to the next folder. If I take a print of filenames, its repetitive.
['72.npy', '144.npy', '216.npy', '288.npy', '360.npy', '432.npy', '504.npy', '576.npy', '648.npy', '720.npy', '792.npy', '864.npy', '936.npy', '1008.npy', '1080.npy', '1152.npy', '1224.npy', '1296.npy', '1368.npy', '1440.npy', '1512.npy', '1584.npy', '1656.npy', '1728.npy', '1800.npy', '1872.npy', '1944.npy', '2016.npy', '2088.npy', '2160.npy', '2232.npy', '2304.npy', '2376.npy', '2448.npy', '2520.npy', '2592.npy', '2664.npy', '2736.npy', '2808.npy', '2880.npy', '2952.npy', '3024.npy', '3096.npy', '3168.npy', '3240.npy', '3312.npy', '3384.npy', '3456.npy', '3528.npy', '3600.npy', '3672.npy', '3744.npy', '3816.npy', '3888.npy', '3960.npy', '4032.npy', '4104.npy', '4176.npy', '4248.npy', '4320.npy', '4392.npy', '4464.npy', '4536.npy', '4608.npy', '4680.npy', '4752.npy', '4824.npy', '4896.npy', '4968.npy', '5040.npy', '5112.npy', '5184.npy', '5256.npy', '5328.npy', '5400.npy', '5472.npy', '5544.npy', '5616.npy', '5688.npy', '5760.npy', '5832.npy', '5904.npy', '5976.npy', '6048.npy', '6120.npy', '6192.npy', '6264.npy', '6336.npy', '6408.npy', '6480.npy', '6552.npy', '6624.npy', '6696.npy', '6768.npy', '6840.npy', '6912.npy', '6984.npy', '7056.npy', '7128.npy', '7200.npy', '7272.npy', '7344.npy', '7416.npy', '7488.npy', '7560.npy', '7632.npy', '7704.npy', '7776.npy', '7848.npy', '7920.npy', '7992.npy', '8064.npy', '8136.npy', '8208.npy', '8280.npy', '8352.npy', '8424.npy', '8496.npy', '8568.npy', '8640.npy', '8712.npy', '8784.npy', '8856.npy', '8928.npy', '9000.npy', '9072.npy', '9144.npy', '9216.npy', '9288.npy', '9360.npy', '9432.npy', '9504.npy', '9576.npy', '9648.npy', '9720.npy', '9792.npy', '9864.npy', '9936.npy', '10008.npy', '10080.npy', '10152.npy', '10224.npy', '10296.npy', '10368.npy', '10440.npy', '10512.npy', '10584.npy', '10656.npy', '10728.npy', '10800.npy', '10872.npy', '10944.npy', '11016.npy', '11088.npy', '11160.npy', '11232.npy', '11304.npy', '11376.npy', '11448.npy', '11520.npy', '11592.npy', '11664.npy', '11736.npy', '11808.npy', '11880.npy', '11952.npy', '12024.npy', '12096.npy', '12168.npy', '12240.npy', '12312.npy', '12384.npy', '12456.npy', '12528.npy', '12600.npy', '12672.npy', '12744.npy', '12816.npy', '12888.npy', '12960.npy', '13032.npy', '13104.npy', '13176.npy', '13248.npy', '13320.npy', '13392.npy', '13464.npy', '13536.npy', '13608.npy', '13680.npy', '13752.npy', '13824.npy', '13896.npy', '13968.npy', '14040.npy', '14112.npy', '14184.npy', '14256.npy', '14328.npy', '14400.npy', '14472.npy', '14544.npy', '14616.npy', '14688.npy', '14760.npy', '14832.npy', '14904.npy', '14976.npy', '15048.npy', '15120.npy', '15192.npy', '15264.npy', '15336.npy', '15408.npy', '15480.npy', '15552.npy', '15624.npy', '15696.npy', '15768.npy', '15840.npy', '15912.npy', '15984.npy', '16056.npy', '16128.npy', '16200.npy', '16272.npy', '16344.npy', '16416.npy', '16488.npy', '16560.npy', '16632.npy', '16704.npy', '16776.npy', '16848.npy', '16920.npy', '16992.npy', '17064.npy', '17136.npy', '17208.npy', '17280.npy', '17352.npy', '17424.npy', '17496.npy', '17568.npy']
.
.
.
.
['72.npy', '144.npy', '216.npy', '288.npy', '360.npy', '432.npy', '504.npy', '576.npy', '648.npy', '720.npy', '792.npy', '864.npy', '936.npy', '1008.npy', '1080.npy', '1152.npy', '1224.npy', '1296.npy', '1368.npy', '1440.npy', '1512.npy', '1584.npy', '1656.npy', '1728.npy', '1800.npy', '1872.npy', '1944.npy', '2016.npy', '2088.npy', '2160.npy', '2232.npy', '2304.npy', '2376.npy', '2448.npy', '2520.npy', '2592.npy', '2664.npy', '2736.npy', '2808.npy', '2880.npy', '2952.npy', '3024.npy', '3096.npy', '3168.npy', '3240.npy', '3312.npy', '3384.npy', '3456.npy', '3528.npy', '3600.npy', '3672.npy', '3744.npy', '3816.npy', '3888.npy', '3960.npy', '4032.npy', '4104.npy', '4176.npy', '4248.npy', '4320.npy', '4392.npy', '4464.npy', '4536.npy', '4608.npy', '4680.npy', '4752.npy', '4824.npy', '4896.npy', '4968.npy', '5040.npy', '5112.npy', '5184.npy', '5256.npy', '5328.npy', '5400.npy', '5472.npy', '5544.npy', '5616.npy', '5688.npy', '5760.npy', '5832.npy', '5904.npy', '5976.npy', '6048.npy', '6120.npy', '6192.npy', '6264.npy', '6336.npy', '6408.npy', '6480.npy', '6552.npy', '6624.npy', '6696.npy', '6768.npy', '6840.npy', '6912.npy', '6984.npy', '7056.npy', '7128.npy', '7200.npy', '7272.npy', '7344.npy', '7416.npy', '7488.npy', '7560.npy', '7632.npy', '7704.npy', '7776.npy', '7848.npy', '7920.npy', '7992.npy', '8064.npy', '8136.npy', '8208.npy', '8280.npy', '8352.npy', '8424.npy', '8496.npy', '8568.npy', '8640.npy', '8712.npy', '8784.npy', '8856.npy', '8928.npy', '9000.npy', '9072.npy', '9144.npy', '9216.npy', '9288.npy', '9360.npy', '9432.npy', '9504.npy', '9576.npy', '9648.npy', '9720.npy', '9792.npy', '9864.npy', '9936.npy', '10008.npy', '10080.npy', '10152.npy', '10224.npy', '10296.npy', '10368.npy', '10440.npy', '10512.npy', '10584.npy', '10656.npy', '10728.npy', '10800.npy', '10872.npy', '10944.npy', '11016.npy', '11088.npy', '11160.npy', '11232.npy', '11304.npy', '11376.npy', '11448.npy', '11520.npy', '11592.npy', '11664.npy', '11736.npy', '11808.npy', '11880.npy', '11952.npy', '12024.npy', '12096.npy', '12168.npy', '12240.npy', '12312.npy', '12384.npy', '12456.npy', '12528.npy', '12600.npy', '12672.npy', '12744.npy', '12816.npy', '12888.npy', '12960.npy', '13032.npy', '13104.npy', '13176.npy', '13248.npy', '13320.npy', '13392.npy', '13464.npy', '13536.npy', '13608.npy', '13680.npy', '13752.npy', '13824.npy', '13896.npy', '13968.npy', '14040.npy', '14112.npy', '14184.npy', '14256.npy', '14328.npy', '14400.npy', '14472.npy', '14544.npy', '14616.npy', '14688.npy', '14760.npy', '14832.npy', '14904.npy', '14976.npy', '15048.npy', '15120.npy', '15192.npy', '15264.npy', '15336.npy', '15408.npy', '15480.npy', '15552.npy', '15624.npy', '15696.npy', '15768.npy', '15840.npy', '15912.npy', '15984.npy', '16056.npy', '16128.npy', '16200.npy', '16272.npy', '16344.npy', '16416.npy', '16488.npy', '16560.npy', '16632.npy', '16704.npy', '16776.npy', '16848.npy', '16920.npy', '16992.npy', '17064.npy', '17136.npy', '17208.npy', '17280.npy', '17352.npy', '17424.npy', '17496.npy', '17568.npy']
CodePudding user response:
This is much simpler than you're making it.
have a directory that has 72 folders, and within each folder are 245 numpy files. The directory is called MP_Data. I would like to read all the 245 numpy files within each folder in any order. However, the loop through the folders should be in numerical order.
From the documentation for os.walk:
When topdown is
True, the caller can modify the dirnames list in-place (perhaps usingdelor slice assignment), andwalk()will only recurse into the subdirectories whose names remain in dirnames; this can be used to prune the search, impose a specific order of visiting, or even to informwalk()about directories the caller creates or renames before it resumeswalk()again.
So, we use that feature to make the os.walk visit all the subdirectories and files in the correct order, and we read the files as we encounter them. We should only need to os.walk once. Thus:
window = []
for current, subdirectories, filenames in sorted(os.walk(directory_1)):
subdirectories[:] = natsorted(subdirectories)
for filename in filenames:
window.append(np.load(os.path.join(current, filename)))
Having read each file once and built a list of the data, we can then just clone that data for each action in actions:
from copy import deepcopy
sequences = [deepcopy(window) for action in actions]
labels = [label_map[action] for action in actions]
Assuming that you actually need separate data for each action, that is. That would depend on the rest of your program.
CodePudding user response:
Your code could simplify (aside from actions, which you don't show us) to something like
import os
from collections import defaultdict
root = "/tmp/mp_data"
# Map labels (subdirectories of root) to data
data_per_label = defaultdict(list)
# Get all top-level directories within `root`
label_dirs = [
name for name in os.listdir(root) if os.path.isdir(os.path.join(root, name))
]
print(f"{label_dirs=}")
# Loop over each label directory
for label in label_dirs:
label_dir = os.path.join(root, label)
# Loop over each filename in the label directory
for filename in os.listdir(label_dir):
# Take care to only look at .npy files
if filename.endswith(".npy"):
filepath = os.path.join(label_dir, filename)
print(f"{label=} {filename=} {filepath=}")
data = filename # replace with np.load(filename)
data_per_label[label].append(data)
print(data_per_label)
Given a tree like
/tmp/mp_data
├── r1
│ └── a.npy
├── r2
│ └── b.npy
└── r3
└── c.npy
this prints out
label_dirs=['r1', 'r3', 'r2']
label='r1' filename='a.npy' filepath='/tmp/mp_data/r1/a.npy'
label='r3' filename='c.npy' filepath='/tmp/mp_data/r3/c.npy'
label='r2' filename='b.npy' filepath='/tmp/mp_data/r2/b.npy'
defaultdict(<class 'list'>, {'r1': ['/tmp/mp_data/r1/a.npy'], 'r3': ['/tmp/mp_data/r3/c.npy'], 'r2': ['/tmp/mp_data/r2/b.npy']})
CodePudding user response:
You should move the action loop inside the directory loop. If you don't, the action loop uses only the last directory of the directory loop.
