remove js handlers

This commit is contained in:
Mateusz Gruszczyński
2026-02-26 15:59:44 +01:00
parent 13734e585b
commit b527bb200f

View File

@@ -14,6 +14,31 @@ ALLOWED_EXTENSIONS = {
'svg', 'ico', 'woff', 'woff2', 'ttf', 'eot', 'json', 'map'
}
def sanitize_no_js(soup):
for script in soup.find_all('script'):
script.decompose()
dangerous_attrs = [
'onabort', 'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror',
'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onloadstart',
'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout',
'onmouseover', 'onmouseup', 'onmousewheel', 'onprogress', 'onreset',
'onresize', 'onscroll', 'onselect', 'onselectionchange', 'onstalled',
'onsubmit', 'onsuspend', 'ontimeupdate', 'onunload', 'onwaiting',
'onwheel'
]
for tag in soup.find_all(True):
cleaned_attrs = {}
for attr, value in tag.attrs.items():
attr_lower = attr.lower()
if not any(dangerous.startswith(attr_lower) for dangerous in dangerous_attrs):
cleaned_attrs[attr] = value
tag.attrs = cleaned_attrs
return soup
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@@ -132,16 +157,44 @@ def resolve_css_imports(css_file_path, file_index):
return parse_css_recursively(css_file_path)
def sanitize_no_js(soup):
"""
Remove JS handlers
"""
for script in soup.find_all('script'):
script.decompose()
# Optional
# for style in soup.find_all('style'):
# style.decompose()
dangerous_attrs = [
'onabort', 'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror',
'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onloadstart',
'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout',
'onmouseover', 'onmouseup', 'onmousewheel', 'onprogress', 'onreset',
'onresize', 'onscroll', 'onselect', 'onselectionchange', 'onstalled',
'onsubmit', 'onsuspend', 'ontimeupdate', 'onunload', 'onwaiting',
'onwheel'
]
for tag in soup.find_all(True):
cleaned_attrs = {}
for attr, value in tag.attrs.items():
attr_lower = attr.lower()
if not any(dangerous.startswith(attr_lower) for dangerous in dangerous_attrs):
cleaned_attrs[attr] = value
tag.attrs = cleaned_attrs
return soup
def make_aio_html(input_file, file_index):
"""Generate basic AIO HTML with embedded resources."""
"""Generate basic AIO HTML with embedded resources (NO JS)."""
base_path = os.path.dirname(os.path.abspath(input_file))
with open(input_file, 'r', encoding='utf-8', errors='ignore') as f:
soup = BeautifulSoup(f.read(), 'html.parser')
for script in soup.find_all('script', src=True):
script['src'] = embed_resource(base_path, script['src'], file_index)
for elem in soup.find_all(attrs={'src': True}):
elem['src'] = embed_resource(base_path, elem['src'], file_index)
@@ -149,10 +202,12 @@ def make_aio_html(input_file, file_index):
if style.string:
style.string = embed_css_resources(base_path, style.string, file_index)
soup = sanitize_no_js(soup)
return str(soup)
def make_aio_html_advanced(input_file, file_index):
"""Advanced AIO: resolves @import CSS chains + embeds everything."""
"""Advanced AIO: resolves @import CSS chains + embeds (NO JS)."""
base_path = os.path.dirname(os.path.abspath(input_file))
with open(input_file, 'r', encoding='utf-8', errors='ignore') as f:
@@ -187,9 +242,6 @@ def make_aio_html_advanced(input_file, file_index):
if href and not href.startswith(('http', 'data:')):
link.decompose()
for script in soup.find_all('script', src=True):
script['src'] = embed_resource(base_path, script['src'], file_index)
for elem in soup.find_all(attrs={'src': True}):
elem['src'] = embed_resource(base_path, elem['src'], file_index)
@@ -197,8 +249,11 @@ def make_aio_html_advanced(input_file, file_index):
if style.string:
style.string = embed_css_resources(base_path, style.string, file_index)
soup = sanitize_no_js(soup)
return str(soup)
@api_bp.route('/upload', methods=['POST'])
def upload_files():
return _upload_and_generate(make_aio_html)