Spaces:
Runtime error
Runtime error
| from fastai.vision.all import * | |
| import gradio as gr | |
| import requests | |
| import base64 | |
| from bs4 import BeautifulSoup | |
| import os | |
| # Load the trained model | |
| learn = load_learner('nsfw_model.pkl') | |
| labels = learn.dls.vocab | |
| def analyze(url): | |
| """Analyzer function that classifies the images found at the given URL""" | |
| # Make sure URL starts with http or https | |
| # TODO: confirm that the url points to a web page, and not some resource. | |
| # Regex could be useful here | |
| if not url.startswith(('http://','https://')): | |
| url = 'http://'+url | |
| safety = 'safe' # our return variable | |
| # Extract html and all img tags | |
| html = requests.get(url) | |
| soup = BeautifulSoup(html.text, "html.parser") | |
| img_elements = soup.find_all("img") | |
| # Save all src urls that we can clearly tell are img urls. | |
| # A better approach would be to use regex here | |
| srcs = [] | |
| for img in img_elements: | |
| for v in img.attrs.values(): | |
| if isinstance(v, str): | |
| if v.lower().endswith(('jpg', 'png', 'gif', 'jpeg')): | |
| srcs.append(v) | |
| # Get the images from the urls and classify | |
| # If there is a single unsafe image, report it. | |
| for src_url in srcs: | |
| try: | |
| img_data = requests.get(src_url).content | |
| temp = 'temp.' + src_url.lower().split('.')[-1] | |
| with open(temp, 'wb') as handler: | |
| handler.write(img_data) | |
| is_nsfw,_,probs = learn.predict(PILImage.create(temp)) | |
| os.remove(temp) | |
| if is_nsfw == "unsafe_searches": | |
| safety = 'NOT safe' | |
| return safety | |
| except Exception as e: | |
| pass | |
| return safety | |
| title = "Website Safety Analyzer" | |
| description = "**The internet is not safe for children**. Even if we know the 'bad' sites, social media is hard to regulate. \n"+\ | |
| "This is step one in an attempt to solve that. An image classifier that audits every image at a URL. \n"+\ | |
| "In this iteration, I classify sites with sexually explicit content as **'NOT safe'**. \n\n"+\ | |
| "There is a long way to go with NLP for profanity, cyber-bullying, as well as CV for violence, substance abuse, etc. \n"+\ | |
| "Another step will be to convert this into a browser extension/add-on. \n"+\ | |
| "I welcome any help on this. π" | |
| examples = ['pornhub.com', 'cnn.com', 'xvideos.com', 'www.pinterest.com'] | |
| enable_queue=True | |
| iface = gr.Interface( | |
| fn=analyze, | |
| inputs="text", | |
| outputs="text", | |
| title=title, | |
| description=description, | |
| examples=examples, | |
| ) | |
| iface.launch(enable_queue=enable_queue) |