I am trying to achieve below tasks:
traverse_dir() function
- read a root directory, and get the names of the sub directories.
- read the sub directories and see if 'installed-files.json' file is present.
- if the 'installed-files.json' file is present in all the directories, then open them and create a excel file out of the JSON file those are present in all the sub directories.
filter_apk() function
- read the excel file generated in the first function and create another excel file that will store only file names ending with '.apk'.
Below is the code snippet:
def traverse_dir(rootDir, file_name):
dir_names = []
for names in os.listdir(rootDir):
entry_path = os.path.join(names)
if os.path.isdir(entry_path):
dir_names.append(entry_path)
for i in dir_names:
if file_name in i:
with open(file_name) as jf:
data = json.load(jf)
df = pd.DataFrame(data)
new_df = df[df.columns.difference(['SHA256'])]
new_df.to_excel('abc.xlsx')
def filter_apk():
traverse_dir(rootDir, file_name)
old_xl = pd.read_excel('abc.xlsx')
a = old_xl[old_xl["Name"].str.contains("\.apk")]
a.to_excel('zybg.xlsx')
rootDir = '<root path where sub folders resides>'
file_name = 'installed-files.json'
filter_apk()
Note:
I have tested the code separately on single folder, and its working like charm. I am only facing issues when I am trying to work with multiple directories.
In fact, in the 1st function
traverse_dir(), I am able to list the sub directories.
I am getting below errors while executing the program.
Traceback (most recent call last):
File "Jenkins.py", line 36, in <module>
filter_apk()
File "Jenkins.py", line 30, in filter_apk
old_xl = pd.read_excel('abc.xlsx')
with open(filename, "rb") as f:
FileNotFoundError: [Errno 2] No such file or directory: 'abc.xlsx'
Why the file is not getting generated? Any suggestions?
modified code
def traverse_dir(rootDir, file_name):
dir_names = []
for names in os.listdir(rootDir):
entry_path = os.path.join(names)
if os.path.isdir(entry_path):
dir_names.append(entry_path)
for i in dir_names:
if file_name in i:
file_path = os.path.join(rootDir, i, file_name)
if os.path.isfile(file_path):
with open(file_name) as jf:
data = json.load(jf)
df = pd.DataFrame({'BRA': data})
df1 = pd.DataFrame({'CNA': data})
df2 = pd.DataFrame({'COA': data})
df3 = pd.DataFrame({'JPA': data})
new_df = df[df.columns.difference(['SHA256'])]
new_df1 = df1[df.columns.difference(['SHA256'])]
new_df2 = df2[df.columns.difference(['SHA256'])]
new_df3 = df3[df.columns.difference(['SHA256'])]
with pd.ExcelWriter('abc.xlsx') as writer:
new_df.to_excel(writer, sheet_name='BRA', index=False)
new_df1.to_excel(writer, sheet_name='CNA', index=False)
new_df2.to_excel(writer, sheet_name='COA', index=False)
new_df3.to_excel(writer, sheet_name='JPA', index=False)
rootDir = '<root path where sub folders resides>'
file_name = 'installed-files.json'
traverse_dir(rootDir, file_name)
CodePudding user response:
The main issue is that if file_name in i: is always false, hence no xlsx file created.
You may need to make some changes to test for the file existence, for example:
import os
def traverse_dir(rootDir, file_name):
dir_names = []
for names in os.listdir(rootDir):
entry_path = os.path.join(names)
if os.path.isdir(entry_path):
dir_names.append(entry_path)
for i in dir_names:
file_path=os.path.join(rootDir,i,file_name)
if os.path.isfile(file_path):
with open(file_path) as jf:
data = json.load(jf)
df = pd.DataFrame(data)
new_df = df[df.columns.difference(['SHA256'])]
new_df.to_excel('abc.xlsx')
def filter_apk():
traverse_dir(rootDir, file_name)
old_xl = pd.read_excel('abc.xlsx')
a = old_xl[old_xl["Name"].str.contains("\.apk")]
a.to_excel('zybg.xlsx')
rootDir = '<root path where sub folders resides>'
file_name = 'installed-files.json'
filter_apk()
