Seeing multiple events with Python watchdog library when folders are created
Question:
I am having issues where I am seeing additional events that I am not expecting.
I am watching the folder C:UserskvaskoDownloadsdata
. If I copy a folder 2017 725LogFile.xml
I will see the following 3 “created” events, when I would expect to only see 1. If I create the date folder structure ahead of time (but while the application running watching the folders) it will only generate one event like I expect. I never get an event for just a folder creation. It is like the events are being generated for the creation of the folders, but when inspecting the actual event messaged on my on_created(self,event)
all three look events look exactly the same. What is going on here?
Here is the sample output and minimum example.
2017-09-22 13:58:10,182 - root - INFO - Watchdog: file created C:UserskvaskoDownloadsdata2017 725LogFile.xml
2017-09-22 13:58:11,184 - root - INFO - Watchdog: file created C:UserskvaskoDownloadsdata2017 725LogFile.xml
2017-09-22 13:58:12,187 - root - INFO - Watchdog: file created C:UserskvaskoDownloadsdata2017 725LogFile.xml
I would expect:
2017-09-22 13:58:12,187 - root - INFO - Watchdog: file created C:UserskvaskoDownloadsdata2017 725LogFile.xml
Is there a way to detect if its actually multiple events from folder creation?
The following is my observer configuration.
folder = "C:\Users\kvasko\Downloads\data"
observer = Observer(MyProcessHandler(patterns=["*.xml"]), folder, recursive=True)
observer.start_observer()
os.mkdirs("C:\Users\kvasko\Downloads\data\2017\07\25")
shutil.copy2("C:tempLogFile.xml", "C:\Users\kvasko\Downloads\data\2017\07\25")
try:
while True:
time.sleep(5)
except:
print("Error")
The following is my handler class.
import logging
from watchdog.events import PatternMatchingEventHandler
class MyProcessHandler(PatternMatchingEventHandler):
def on_created(self, event):
logging.info("Watchdog: file created " + str(event.src_path))
Edit:
Here is a minimum working example:
import time
import os
import shutil
import datetime
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
class TestEventHandler(PatternMatchingEventHandler):
def on_created(self, event):
print (str(datetime.datetime.now()) + " " + str(event))
if __name__ == '__main__':
path = "C:\Temp"
event_handler = TestEventHandler(patterns=["*.xml"])
observer = Observer()
observer.schedule(event_handler, path, recursive=True)
observer.start()
os.makedirs("C:\Temp\2017\07\25")
shutil.copy2("C:\Temp2\2017\07\25\test.xml", "C:\Temp\2017\07\25")
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
Prints out:
2017-09-22 15:49:51.334262 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
2017-09-22 15:49:52.335468 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
2017-09-22 15:49:53.340998 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
Edit2:
Change on_created() to on_any_event(). This is what was produced.
2017-09-23 13:14:57.288792 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
2017-09-23 13:14:58.291327 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
2017-09-23 13:14:59.293334 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
2017-09-23 13:14:59.293334 <FileModifiedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
Answers:
You might be experiencing this bug. As a workaround, you could use the TestEventHandler
class to record the last file path created and not respond to subsequent on_created
events
unless the path is different than the last created path or if that path has been deleted:
import time
import os
import shutil
import datetime
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
class TestEventHandler(PatternMatchingEventHandler):
def __init__(self, *args, **kwargs):
super(TestEventHandler, self).__init__(*args, **kwargs)
self.last_created = None
def on_created(self, event):
path = event.src_path
if path != self.last_created:
print(str(datetime.datetime.now()) + " " + str(event))
self.last_created = path
def on_deleted(self, event):
path = event.src_path
if path == self.last_created:
self.last_created = None
if __name__ == '__main__':
path = "C:\Temp"
target_dir = "C:\Temp\2017\07\25"
src_dir = "C:\Temp2\2017\07\25"
filename = 'test.xml'
target = os.path.join(target_dir, filename)
src = os.path.join(src_dir, filename)
event_handler = TestEventHandler(patterns=["*.xml"])
observer = Observer()
observer.schedule(event_handler, path, recursive=True)
observer.start()
if not os.path.exists(target_dir):
os.makedirs(target_dir)
if os.path.exists(target):
os.unlink(target)
for i in range(3):
shutil.copy2(src, target_dir)
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
Thanks for the answer! I tried solving the problem in another way.
from functools import lru_cache
import time
from watchdog.events import FileSystemEvent
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
WATCH_DIR = "..."
def on_modified(event: FileSystemEvent):
# Additional filtering logic etc...
modfied_event_trigger(event.src_path)
# This value needs to be adjusted
# Caching is possible as arg is str and thus immutable
# LRU Cache is Thread Safe
@lru_cache(maxsize=256)
def modfied_event_trigger(src_path: str):
# Inser logic here
pass
if __name__ == "__main__":
patterns = ["*"]
event_handler = PatternMatchingEventHandler(patterns, None, False, True)
# Assign on modified
event_handler.on_modified = on_modified
observer = Observer()
observer.schedule(event_handler, WATCH_DIR, recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
My rationale:
In case there are rapid successive calls, the first call will be executed. From the second call onwards, the cached value will be returned without executing the function. Thus, frequent calls will be prevented.
256 value is arbitrary. I need to try if 1 will suffice or not. Multiple different events need will be triggered, and I reserved enough space for my use case.
This is a possible solution. Worked for my use case.
NOTE: Noob here! This is my first answer. Apologies for anything I may have overlooked.
I am having issues where I am seeing additional events that I am not expecting.
I am watching the folder C:UserskvaskoDownloadsdata
. If I copy a folder 2017 725LogFile.xml
I will see the following 3 “created” events, when I would expect to only see 1. If I create the date folder structure ahead of time (but while the application running watching the folders) it will only generate one event like I expect. I never get an event for just a folder creation. It is like the events are being generated for the creation of the folders, but when inspecting the actual event messaged on my on_created(self,event)
all three look events look exactly the same. What is going on here?
Here is the sample output and minimum example.
2017-09-22 13:58:10,182 - root - INFO - Watchdog: file created C:UserskvaskoDownloadsdata2017 725LogFile.xml
2017-09-22 13:58:11,184 - root - INFO - Watchdog: file created C:UserskvaskoDownloadsdata2017 725LogFile.xml
2017-09-22 13:58:12,187 - root - INFO - Watchdog: file created C:UserskvaskoDownloadsdata2017 725LogFile.xml
I would expect:
2017-09-22 13:58:12,187 - root - INFO - Watchdog: file created C:UserskvaskoDownloadsdata2017 725LogFile.xml
Is there a way to detect if its actually multiple events from folder creation?
The following is my observer configuration.
folder = "C:\Users\kvasko\Downloads\data"
observer = Observer(MyProcessHandler(patterns=["*.xml"]), folder, recursive=True)
observer.start_observer()
os.mkdirs("C:\Users\kvasko\Downloads\data\2017\07\25")
shutil.copy2("C:tempLogFile.xml", "C:\Users\kvasko\Downloads\data\2017\07\25")
try:
while True:
time.sleep(5)
except:
print("Error")
The following is my handler class.
import logging
from watchdog.events import PatternMatchingEventHandler
class MyProcessHandler(PatternMatchingEventHandler):
def on_created(self, event):
logging.info("Watchdog: file created " + str(event.src_path))
Edit:
Here is a minimum working example:
import time
import os
import shutil
import datetime
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
class TestEventHandler(PatternMatchingEventHandler):
def on_created(self, event):
print (str(datetime.datetime.now()) + " " + str(event))
if __name__ == '__main__':
path = "C:\Temp"
event_handler = TestEventHandler(patterns=["*.xml"])
observer = Observer()
observer.schedule(event_handler, path, recursive=True)
observer.start()
os.makedirs("C:\Temp\2017\07\25")
shutil.copy2("C:\Temp2\2017\07\25\test.xml", "C:\Temp\2017\07\25")
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
Prints out:
2017-09-22 15:49:51.334262 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
2017-09-22 15:49:52.335468 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
2017-09-22 15:49:53.340998 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
Edit2:
Change on_created() to on_any_event(). This is what was produced.
2017-09-23 13:14:57.288792 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
2017-09-23 13:14:58.291327 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
2017-09-23 13:14:59.293334 <FileCreatedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
2017-09-23 13:14:59.293334 <FileModifiedEvent: src_path='C:\Temp\2017\07\25\test.xml'>
You might be experiencing this bug. As a workaround, you could use the TestEventHandler
class to record the last file path created and not respond to subsequent on_created
events
unless the path is different than the last created path or if that path has been deleted:
import time
import os
import shutil
import datetime
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
class TestEventHandler(PatternMatchingEventHandler):
def __init__(self, *args, **kwargs):
super(TestEventHandler, self).__init__(*args, **kwargs)
self.last_created = None
def on_created(self, event):
path = event.src_path
if path != self.last_created:
print(str(datetime.datetime.now()) + " " + str(event))
self.last_created = path
def on_deleted(self, event):
path = event.src_path
if path == self.last_created:
self.last_created = None
if __name__ == '__main__':
path = "C:\Temp"
target_dir = "C:\Temp\2017\07\25"
src_dir = "C:\Temp2\2017\07\25"
filename = 'test.xml'
target = os.path.join(target_dir, filename)
src = os.path.join(src_dir, filename)
event_handler = TestEventHandler(patterns=["*.xml"])
observer = Observer()
observer.schedule(event_handler, path, recursive=True)
observer.start()
if not os.path.exists(target_dir):
os.makedirs(target_dir)
if os.path.exists(target):
os.unlink(target)
for i in range(3):
shutil.copy2(src, target_dir)
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
Thanks for the answer! I tried solving the problem in another way.
from functools import lru_cache
import time
from watchdog.events import FileSystemEvent
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
WATCH_DIR = "..."
def on_modified(event: FileSystemEvent):
# Additional filtering logic etc...
modfied_event_trigger(event.src_path)
# This value needs to be adjusted
# Caching is possible as arg is str and thus immutable
# LRU Cache is Thread Safe
@lru_cache(maxsize=256)
def modfied_event_trigger(src_path: str):
# Inser logic here
pass
if __name__ == "__main__":
patterns = ["*"]
event_handler = PatternMatchingEventHandler(patterns, None, False, True)
# Assign on modified
event_handler.on_modified = on_modified
observer = Observer()
observer.schedule(event_handler, WATCH_DIR, recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
My rationale:
In case there are rapid successive calls, the first call will be executed. From the second call onwards, the cached value will be returned without executing the function. Thus, frequent calls will be prevented.
256 value is arbitrary. I need to try if 1 will suffice or not. Multiple different events need will be triggered, and I reserved enough space for my use case.
This is a possible solution. Worked for my use case.
NOTE: Noob here! This is my first answer. Apologies for anything I may have overlooked.