From b527bb200f274fd3a76db19144e8f92c8314a35a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Gruszczy=C5=84ski?= Date: Thu, 26 Feb 2026 15:59:44 +0100 Subject: [PATCH] remove js handlers --- app/api.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/app/api.py b/app/api.py index 5308776..74460f2 100644 --- a/app/api.py +++ b/app/api.py @@ -14,6 +14,31 @@ ALLOWED_EXTENSIONS = { 'svg', 'ico', 'woff', 'woff2', 'ttf', 'eot', 'json', 'map' } +def sanitize_no_js(soup): + + for script in soup.find_all('script'): + script.decompose() + + dangerous_attrs = [ + 'onabort', 'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror', + 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onloadstart', + 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', + 'onmouseover', 'onmouseup', 'onmousewheel', 'onprogress', 'onreset', + 'onresize', 'onscroll', 'onselect', 'onselectionchange', 'onstalled', + 'onsubmit', 'onsuspend', 'ontimeupdate', 'onunload', 'onwaiting', + 'onwheel' + ] + + for tag in soup.find_all(True): + cleaned_attrs = {} + for attr, value in tag.attrs.items(): + attr_lower = attr.lower() + if not any(dangerous.startswith(attr_lower) for dangerous in dangerous_attrs): + cleaned_attrs[attr] = value + tag.attrs = cleaned_attrs + + return soup + def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS @@ -132,16 +157,44 @@ def resolve_css_imports(css_file_path, file_index): return parse_css_recursively(css_file_path) +def sanitize_no_js(soup): + """ + Remove JS handlers + """ + for script in soup.find_all('script'): + script.decompose() + + # Optional + # for style in soup.find_all('style'): + # style.decompose() + + dangerous_attrs = [ + 'onabort', 'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror', + 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onloadstart', + 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', + 'onmouseover', 'onmouseup', 'onmousewheel', 'onprogress', 'onreset', + 'onresize', 'onscroll', 'onselect', 'onselectionchange', 'onstalled', + 'onsubmit', 'onsuspend', 'ontimeupdate', 'onunload', 'onwaiting', + 'onwheel' + ] + + for tag in soup.find_all(True): + cleaned_attrs = {} + for attr, value in tag.attrs.items(): + attr_lower = attr.lower() + if not any(dangerous.startswith(attr_lower) for dangerous in dangerous_attrs): + cleaned_attrs[attr] = value + tag.attrs = cleaned_attrs + + return soup + def make_aio_html(input_file, file_index): - """Generate basic AIO HTML with embedded resources.""" + """Generate basic AIO HTML with embedded resources (NO JS).""" base_path = os.path.dirname(os.path.abspath(input_file)) with open(input_file, 'r', encoding='utf-8', errors='ignore') as f: soup = BeautifulSoup(f.read(), 'html.parser') - for script in soup.find_all('script', src=True): - script['src'] = embed_resource(base_path, script['src'], file_index) - for elem in soup.find_all(attrs={'src': True}): elem['src'] = embed_resource(base_path, elem['src'], file_index) @@ -149,10 +202,12 @@ def make_aio_html(input_file, file_index): if style.string: style.string = embed_css_resources(base_path, style.string, file_index) + soup = sanitize_no_js(soup) + return str(soup) def make_aio_html_advanced(input_file, file_index): - """Advanced AIO: resolves @import CSS chains + embeds everything.""" + """Advanced AIO: resolves @import CSS chains + embeds (NO JS).""" base_path = os.path.dirname(os.path.abspath(input_file)) with open(input_file, 'r', encoding='utf-8', errors='ignore') as f: @@ -187,9 +242,6 @@ def make_aio_html_advanced(input_file, file_index): if href and not href.startswith(('http', 'data:')): link.decompose() - for script in soup.find_all('script', src=True): - script['src'] = embed_resource(base_path, script['src'], file_index) - for elem in soup.find_all(attrs={'src': True}): elem['src'] = embed_resource(base_path, elem['src'], file_index) @@ -197,8 +249,11 @@ def make_aio_html_advanced(input_file, file_index): if style.string: style.string = embed_css_resources(base_path, style.string, file_index) + soup = sanitize_no_js(soup) + return str(soup) + @api_bp.route('/upload', methods=['POST']) def upload_files(): return _upload_and_generate(make_aio_html)