How to "chain" iterators?
Question:
I’m trying to chain iterators together with one iterator reading from a master file and another iterator taking each line of the master file and processing another file depending on the output of the first.
The working code that I have is as follows
class MasterReader(object):
def __init__(self, filename):
self.f = open(filename, "r")
def __iter__(self):
return self
def __next__(self):
line = self.f.readline().strip()
if line == "":
raise StopIteration
return line
class SubReader(object):
def __init__(self, mr):
self.mr = mr
def __iter__(self):
self._next()
return self
def _next(self):
self.current = open(self.mr.__next__(), "r")
def __next__(self):
while True:
line = self.current.readline().strip()
if line == "":
self._next()
continue
return line
mr = MasterReader("master")
sr = SubReader(mr)
for line in sr:
print(line)
Where master
is a file containing lines of other files
file1
file2
file1
contains
1.a
1.b
1.c
file2
contains
2.a
2.b
2.c
The output is
1.a
1.b
1.c
2.a
2.b
2.c
Again what I have works, but feels wrong in that I have a while
loop in __next__
I’m having to manually check for the end of each sub file and explicitly calling the next line in the master
file.
Is there a better/more pythonic way of doing this?
EDIT:
This is a simplified problem of what I’m trying to accomplish. In the real version SubReader
is going to be threaded and I only want one MasterReader
. Actually this won’t work for my threading project but want to make sure I’m generalizing iterators before diving deeper into a mess.
Answers:
Since the file object is itself an iterator, you don’t necessarily need to implement a __next__
in both cases, just yield lines from it in your __iter__
. More so, reading the file with a for loop implicitly handles EOF:
class MasterReader(object):
def __init__(self, filename):
self.f = open(filename)
def __iter__(self):
for line in self.f:
yield line.strip()
self.f.close()
class SubReader(object):
def __init__(self, mr):
self.mr = mr
def __iter__(self):
for filename in mr:
with open(filename) as f:
for line in f:
yield line.strip()
You could use itertools.chain.from_iterable
with the help of small function yielding the stripped lines from each file.
from itertools import chain
def fgen(fname):
with open(fname) as f:
for line in f:
yield line.strip()
for a in chain.from_iterable(fgen(line) for line in fgen('master.txt')):
print(a)
I’m trying to chain iterators together with one iterator reading from a master file and another iterator taking each line of the master file and processing another file depending on the output of the first.
The working code that I have is as follows
class MasterReader(object):
def __init__(self, filename):
self.f = open(filename, "r")
def __iter__(self):
return self
def __next__(self):
line = self.f.readline().strip()
if line == "":
raise StopIteration
return line
class SubReader(object):
def __init__(self, mr):
self.mr = mr
def __iter__(self):
self._next()
return self
def _next(self):
self.current = open(self.mr.__next__(), "r")
def __next__(self):
while True:
line = self.current.readline().strip()
if line == "":
self._next()
continue
return line
mr = MasterReader("master")
sr = SubReader(mr)
for line in sr:
print(line)
Where master
is a file containing lines of other files
file1
file2
file1
contains
1.a
1.b
1.c
file2
contains
2.a
2.b
2.c
The output is
1.a
1.b
1.c
2.a
2.b
2.c
Again what I have works, but feels wrong in that I have a while
loop in __next__
I’m having to manually check for the end of each sub file and explicitly calling the next line in the master
file.
Is there a better/more pythonic way of doing this?
EDIT:
This is a simplified problem of what I’m trying to accomplish. In the real version SubReader
is going to be threaded and I only want one MasterReader
. Actually this won’t work for my threading project but want to make sure I’m generalizing iterators before diving deeper into a mess.
Since the file object is itself an iterator, you don’t necessarily need to implement a __next__
in both cases, just yield lines from it in your __iter__
. More so, reading the file with a for loop implicitly handles EOF:
class MasterReader(object):
def __init__(self, filename):
self.f = open(filename)
def __iter__(self):
for line in self.f:
yield line.strip()
self.f.close()
class SubReader(object):
def __init__(self, mr):
self.mr = mr
def __iter__(self):
for filename in mr:
with open(filename) as f:
for line in f:
yield line.strip()
You could use itertools.chain.from_iterable
with the help of small function yielding the stripped lines from each file.
from itertools import chain
def fgen(fname):
with open(fname) as f:
for line in f:
yield line.strip()
for a in chain.from_iterable(fgen(line) for line in fgen('master.txt')):
print(a)