Spaces:

KayO
/

WebsiteImageSafetyAnalyzer

Runtime error

App Files Files Community

WebsiteImageSafetyAnalyzer / app.py

KayO

Corrected spelling

a591dc8 about 3 years ago

raw

history blame contribute delete

2.67 kB

	from fastai.vision.all import *
	import gradio as gr
	import requests
	import base64
	from bs4 import BeautifulSoup
	import os

	# Load the trained model
	learn = load_learner('nsfw_model.pkl')
	labels = learn.dls.vocab

	def analyze(url):
	"""Analyzer function that classifies the images found at the given URL"""

	# Make sure URL starts with http or https
	# TODO: confirm that the url points to a web page, and not some resource.
	# Regex could be useful here
	if not url.startswith(('http://','https://')):
	url = 'http://'+url

	safety = 'safe' # our return variable

	# Extract html and all img tags
	html = requests.get(url)
	soup = BeautifulSoup(html.text, "html.parser")
	img_elements = soup.find_all("img")

	# Save all src urls that we can clearly tell are img urls.
	# A better approach would be to use regex here
	srcs = []
	for img in img_elements:
	for v in img.attrs.values():
	if isinstance(v, str):
	if v.lower().endswith(('jpg', 'png', 'gif', 'jpeg')):
	srcs.append(v)

	# Get the images from the urls and classify
	# If there is a single unsafe image, report it.
	for src_url in srcs:
	try:
	img_data = requests.get(src_url).content
	temp = 'temp.' + src_url.lower().split('.')[-1]
	with open(temp, 'wb') as handler:
	handler.write(img_data)
	is_nsfw,_,probs = learn.predict(PILImage.create(temp))
	os.remove(temp)
	if is_nsfw == "unsafe_searches":
	safety = 'NOT safe'
	return safety
	except Exception as e:
	pass
	return safety

	title = "Website Safety Analyzer"
	description = "The internet is not safe for children. Even if we know the 'bad' sites, social media is hard to regulate. \n"+\
	"This is step one in an attempt to solve that. An image classifier that audits every image at a URL. \n"+\
	"In this iteration, I classify sites with sexually explicit content as 'NOT safe'. \n\n"+\
	"There is a long way to go with NLP for profanity, cyber-bullying, as well as CV for violence, substance abuse, etc. \n"+\
	"Another step will be to convert this into a browser extension/add-on. \n"+\
	"I welcome any help on this. 🙂"
	examples = ['pornhub.com', 'cnn.com', 'xvideos.com', 'www.pinterest.com']
	enable_queue=True

	iface = gr.Interface(
	fn=analyze,
	inputs="text",
	outputs="text",
	title=title,
	description=description,
	examples=examples,
	)
	iface.launch(enable_queue=enable_queue)