I have a list variable listA as below:
[
'abcd1-2 4d4e',
'xyz0-1 551',
'foo 3ea',
'bar1 2bd',
'mc-mqisd0-2 77a'
]
I need to return a dict out of this list with expanding the digits in the first field. The end result would look like below:
{
abcd1: 4d4e,
abcd2: 4d4e,
xyz0: 551,
xyz1: 551,
foo: 3ea,
bar1: 2bd,
mc-mqisd0: 77a,
mc-mqisd1: 77a,
mc-mqisd2: 77a,
}
I have created below function. It is working with python3 but throwing unmatched group error in some lower python versions.
Anything wrong with the regex here?
def listFln(listA):
import re
fL = []
for i in listA:
aL = i.split()[0]
bL = i.split()[1]
comp = re.sub('^(. ?)(\d -\d )?$',r'\1',aL)
cmpCountR = re.sub('^(. ?)(\d -\d )?$',r'\2',aL)
if cmpCountR.strip():
nStart = int(cmpCountR.split('-')[0])
nEnd = int(cmpCountR.split('-')[1])
for j in range(nStart,nEnd 1):
fL.append(comp str(j) ' ' bL)
else:
fL.append(i)
return(dict([k.split() for k in fL]))
Error:
cmpCountR = re.sub('^(. ?)(\d -\d )?$',r'\2',aL)
File "/usr/lib64/python2.7/re.py", line 151, in sub
return _compile(pattern, flags).sub(repl, string, count)
File "/usr/lib64/python2.7/re.py", line 275, in filter
return sre_parse.expand_template(template, match)
File "/usr/lib64/python2.7/sre_parse.py", line 800, in expand_template
raise error, "unmatched group"
CodePudding user response:
Here's a simpler version using findall instead of sub, successfully tested on 2,7. It also directly creates the dict instead of first building a list:
mylist=[
'abcd1-2 4d4e',
'xyz0-1 551',
'foo 3ea',
'bar1 2bd',
'mc-mqisd0-2 77a'
]
def listFln(listA):
import re
fL = {}
for i in listA:
aL = i.split()[0]
bL = i.split()[1]
comp = re.findall('^(. ?)(\d -\d )?$',aL)[0]
if comp[1]:
nStart = int(comp[1].split('-')[0])
nEnd = int(comp[1].split('-')[1])
for j in range(nStart,nEnd 1):
fL[comp[0] str(j)] = bL
else:
fL[comp[0]] = bL
return fL
print(listFln(mylist))
# {'abcd1': '4d4e',
# 'abcd2': '4d4e',
# 'xyz0': '551',
# 'xyz1': '551',
# 'foo': '3ea',
# 'bar1': '2bd',
# 'mc-mqisd0': '77a',
# 'mc-mqisd1': '77a',
# 'mc-mqisd2': '77a'}
CodePudding user response:
Used Python 2.7 to reproduce:
Both patterns compile
import re
# both seem identical
regex1 = '^(. ?)(\d -\d )?$'
regex2 = '^(. ?)(\d -\d )?$'
# also the compiled pattern is identical, see hash
re.compile(regex1) # <_sre.SRE_Pattern object at 0x7f575ef8fd40>
re.compile(regex2) # <_sre.SRE_Pattern object at 0x7f575ef8fd40>
Note: The compiled pattern using re.compile() saves time when re-using multiple times like in this loop.
Fix: test for groups found
The error-message indicates that there are groups that aren't matched.
Put it other: In the matching result of re.sub (docs to 2.7) there are references to groups like the second capturing group (\2) that have not been found or captured in the given string input:
sre_constants.error: unmatched group
To fix this, we should test on groups that were found in the match.
Therefore we use re.match(regex, str) or the compiled variant pattern.match(str) to create a Match object, then Match.groups() to return all found groups as tuple.
import re
regex = '^(. ?)(\d -\d )?$'
pattern = re.compile(regex) # <_sre.SRE_Pattern object at 0x7f575ef8fd40>
def listFln(listA):
fL = []
for i in listA:
aL = i.split()[0]
bL = i.split()[1]
# test for match and groups found
match = pattern.match(aL)
print("DEBUG groups:", match.groups()) # tuple containing all the subgroups of the match,
# watch: the 3 iteration has only group(1)
# break to next iteration here, if no 2nd group
if not match or not match.group(2):
continue
comp = re.sub(pattern, r'\1', aL)
cmpCountR = re.sub(pattern, r'\2', aL)
if cmpCountR.strip():
parts = cmpCountR.split('-')
nStart = int(parts[0])
nEnd = int(parts[1])
for j in range(nStart,nEnd 1):
fL.append(comp str(j) ' ' bL)
else:
fL.append(i)
return dict([k.split() for k in fL])
listA = [
'abcd1-2 4d4e',
'xyz0-1 551',
'foo 3ea',
'bar1 2bd',
'mc-mqisd0-2 77a'
]
as_dict = listFln(listA)
print("resulting dict:", as_dict)
Prints:
('DEBUG groups:', ('abcd', '1-2'))
('DEBUG groups:', ('xyz', '0-1'))
('DEBUG groups:', ('foo', None))
('DEBUG groups:', ('bar1', None))
('DEBUG groups:', ('mc-mqisd', '0-2'))
('resulting dict:', {'mc-mqisd2': '77a', 'mc-mqisd0': '77a', 'mc-mqisd1': '77a', 'xyz1': '551', 'xyz0': '551', 'abcd1': '4d4e', 'abcd2': '4d4e'})
