Hook your own Python code
Python programmers can hook their own code to expand webchanges with custom functionality by writing such
code into a hooks.py
file located in the same directory as the job and configuration files (or as specified in the
command line using the --hooks
argument). The file will be automatically loaded as a module at startup.
An example hooks.py
file to get you started is below.
Smaller code snippets can also be run using the execute filter, for example as used here for filtering JSON dictionaries.
Important
On Linux and macOS systems, due to security reasons the hooks module will not be imported unless both the hooks file and the directory it is located in are owned and writeable by only the user who is running the job (and not by its group or by other users). To set this up:
cd ~/.config/webchanges # could be different
sudo chown $USER:$(id -g -n) . hooks.py
sudo chmod go-w . hooks.py
sudo
may or may not be required.Replace
$USER
with the username that runs webchanges if different than the use you’re logged in when making the above changes, similarly with$(id -g -n)
for the group.
Example hooks.py
file:
"""Example hooks file for webchanges (for Python >= 3.12)."""
import re
from pathlib import Path
from typing import Any, Literal, Union
from webchanges.differs import DifferBase
from webchanges.filters import AutoMatchFilter, FilterBase, RegexMatchFilter
from webchanges.handler import JobState
from webchanges.jobs import UrlJob, UrlJobBase
from webchanges.reporters import HtmlReporter, TextReporter
class CustomLoginJob(UrlJob):
"""Custom login for my webpage.
Add ``kind: hooks_custom_login`` to the job to retrieve data using this class instead of the
built-in ones.
"""
__kind__ = 'hooks_custom_login'
__required__ = ('username', 'password') # These are added to the ones from the super classes.
def retrieve(self, job_state: JobState, headless: bool = True) -> tuple[bytes | str, str, str]:
""":returns: The data retrieved, the ETag, and the mime_type (e.g. HTTP Content-Type)."""
... # custom code here to actually do the login.
return super().retrieve(job_state) # uses the existing code to then browse and capture data
class CustomBrowserJob(UrlJobBase):
"""Custom browser job.
Add ``kind: hooks_custom_browser`` to the job to retrieve data using this class instead of the
built-in ones.
"""
__kind__ = 'hooks_custom_browser'
__is_browser__ = True # This is required for execution in the correct parallel processing queue.
def retrieve(self, job_state: JobState, headless: bool = True) -> tuple[bytes | str, str, str]:
"""
:returns: The data retrieved, the ETag, and the data's MIME type (e.g. HTTP Content-Type).
"""
... # custom code here to launch browser and capture data.
return (
f'Data captured after browsing to {self.url}\n',
'The Etag (if any) or empty string',
'The Content-Type (if any) or empty string',
)
class CaseFilter(FilterBase):
"""Custom filter for changing case.
Needs to be selected manually, i.e. add `- hooks_case:` (or e.g. `- hooks_case: lower`) to the
list of filters in the job's `filter:` directive. E.g.:
.. code-block:: yaml
url: example.com/hooks/len
filter:
- hooks_case: lower
"""
__kind__ = 'hooks_case'
__supported_subfilters__ = {
'upper': 'Upper case (default)',
'lower': 'Lower case'
}
__default_subfilter__ = 'upper'
@staticmethod
def filter(
data: Union[str, bytes], mime_type: str, subfilter: dict[str, Any]
) -> tuple[Union[str, bytes], str]:
""":returns: The filtered data and its MIME type."""
if not subfilter or subfilter.get('upper'):
return data.upper(), mime_type
elif subfilter.get('lower'):
return data.lower(), mime_type
else:
raise ValueError(f'Unknown case subfilter {subfilter}')
class IndentFilter(FilterBase):
"""Custom filter for indenting.
Needs to be selected manually, i.e. add ``- hooks_indent:`` (or e.g. ``- hooks_indent: 4``) to
the list of filters in the job's ``filter:`` directive. E.g.:
.. code-block:: yaml
url: example.com/hooks/indent
filter:
- hooks_indent: 4
"""
__kind__ = 'hooks_indent'
__supported_subfilters__ = {
'indent': 'Number of spaces to indent (default 8)'
}
__default_subfilter__ = 'indent'
@staticmethod
def filter(
data: Union[str, bytes], mime_type: str, subfilter: dict[str, Any]
) -> tuple[Union[str, bytes], str]:
""":returns: The filtered data and its MIME type."""
indent = int(subfilter.get('indent', 8))
return '\n'.join((' ' * indent) + line for line in data.splitlines()), mime_type
class CustomMatchUrlFilter(AutoMatchFilter):
"""
An AutoMatchFilter applies automatically to all jobs that exactly match the MATCH properties set.
"""
MATCH = {'url': 'https://example.org/'}
@staticmethod
def filter(
data: Union[str, bytes], mime_type: str, subfilter: dict[str, Any]
) -> tuple[Union[str, bytes], str]:
""":returns: The filtered data and its MIME type."""
return data.replace('foo', 'bar'), mime_type
class CustomRegexMatchUrlFilter(RegexMatchFilter):
"""
A RegexMatchFilter applies automatically to all jobs that match the MATCH regex properties set.
"""
MATCH = {'url': re.compile(r'https://example.org/.*')}
@staticmethod
def filter(
data: Union[str, bytes], mime_type: str, subfilter: dict[str, Any]
) -> tuple[Union[str, bytes], str]:
""":returns: The filtered data and its MIME type."""
return data.replace('foo', 'bar'), mime_type
class LenDiffer(DifferBase):
"""Custom differ to show difference in length of the data.
Needs to be selected manually, i.e. add the directive ``differ: hooks_differ`` the job. E.g.:
.. code-block:: yaml
url: example.com/hooks/len
differ: hooks_lendiffer
"""
__kind__ = 'hooks_lendiffer'
__no_subdiffer__ = True
__supported__report_kinds__ = {'html'}
def differ(
self,
subdiffer: dict[str, Any],
report_kind: Literal['text', 'markdown', 'html'],
_unfiltered_diff: dict[Literal['text', 'markdown', 'html'], str] | None = None,
tz: str | None = None,
) -> dict[Literal['text', 'markdown', 'html'], str]:
len_diff = len(self.state.new_data) - len(self.state.old_data)
diff_text = f'Length of data has changed by {len_diff:+,}'
return {
'text': diff_text,
'markdown': diff_text,
'html': diff_text,
}
class CustomTextFileReporter(TextReporter):
"""Custom reporter that writes the text-only report to a file. Insert the filename in config.py
as a filename key to the text reporter.
Needs to enabled in the config.yaml file:
.. code-block:: yaml
report:
hooks_save_text_report:
enabled: true
"""
__kind__ = 'hooks_save_text_report'
def submit(self) -> None:
Path(self.config['filename']).write_text('\n'.join(super().submit()))
class CustomHtmlFileReporter(HtmlReporter):
"""Custom reporter that writes the HTML report to a file. Insert the filename in config.py
as a filename key to the html reporter.
.. code-block:: yaml
report:
hooks_save_html_report:
enabled: true
"""
__kind__ = 'hooks_save_html_report'
def submit(self) -> None:
Path(self.config['filename']).write_text('\n'.join(super().submit()))
Changed in version 3.22: The definitions of the filter method (of FilterBase and its subclasses) and of the retrieve method (of JobBase and
its subclasses) have been updated to accommodate the capturing and processing of mime_type
:
def filter(
data: Union[str, bytes], mime_type: str, subfilter: dict[str, Any]
) -> tuple[Union[str, bytes], str]:
""":returns: The filtered data and its MIME type."""
...
def retrieve(self, job_state: JobState, headless: bool = True) -> tuple[bytes | str, str, str]:
""":returns: The data retrieved, the ETag, and the data's MIME type (e.g. HTTP Content-Type)."""
...