import gradio as gr import requests from bs4 import BeautifulSoup from urllib.parse import urlparse import re import matplotlib.pyplot as plt def fetch_html(url): try: headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(url, timeout=10, headers=headers) response.raise_for_status() return response.text, "" except Exception as e: return None, f"❌ Error fetching the URL: {e}" def seo_check(url): html, error = fetch_html(url) if error: return error, "", "", "", "", None soup = BeautifulSoup(html, 'html.parser') parsed = urlparse(url) checks = [] def add_check(name, result, details, suggestion=""): checks.append((name, result, details.strip(), suggestion.strip())) # SEO Tests title = soup.title.string.strip() if soup.title and soup.title.string else "" add_check("Title Tag", "Passed" if title else "Failed", f"Found: {title}" if title else "Title tag missing.", "" if title else "Add a tag with relevant keywords.") meta_desc = soup.find("meta", attrs={"name": "description"}) meta_text = meta_desc.get("content", "").strip() if meta_desc else "" add_check("Meta Description", "Passed" if meta_text else "Warning", f"Found: {meta_text}" if meta_text else "No meta description found.", "Add a concise meta description (150-160 characters)." if not meta_text else "") h1 = soup.find("h1") h1_text = h1.get_text(strip=True) if h1 else "" add_check("H1 Tag", "Passed" if h1_text else "Failed", f"Found: {h1_text}" if h1_text else "No <h1> tag found.", "Include one <h1> tag per page.") h2_tags = soup.find_all("h2") add_check("H2 Tags", "Passed" if h2_tags else "Warning", f"Found {len(h2_tags)} <h2> tags." if h2_tags else "No <h2> tags found.", "Use <h2> tags to structure subheadings.") canonical = soup.find("link", rel="canonical") add_check("Canonical Tag", "Passed" if canonical else "Warning", f"Found: {canonical['href']}" if canonical and canonical.has_attr('href') else "No canonical tag found.", "Add a canonical tag to prevent duplicate content issues.") add_check("HTTPS", "Passed" if parsed.scheme == "https" else "Failed", f"URL uses {'HTTPS' if parsed.scheme == 'https' else 'HTTP'}.", "Use HTTPS for secure connections.") robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt" try: robots_response = requests.get(robots_url, timeout=5) add_check("robots.txt", "Passed" if robots_response.status_code == 200 else "Warning", "robots.txt is accessible." if robots_response.status_code == 200 else "robots.txt not found.", "Create a robots.txt file to manage crawler access.") except: add_check("robots.txt", "Warning", "robots.txt could not be fetched.", "Ensure it's accessible.") sitemap_url = f"{parsed.scheme}://{parsed.netloc}/sitemap.xml" try: sitemap_response = requests.get(sitemap_url, timeout=5) add_check("sitemap.xml", "Passed" if sitemap_response.status_code == 200 else "Warning", "sitemap.xml is accessible." if sitemap_response.status_code == 200 else "sitemap.xml not found.", "Add a sitemap.xml to help search engines index your pages.") except: add_check("sitemap.xml", "Warning", "Could not access sitemap.xml.", "Ensure it's publicly accessible.") favicon = soup.find("link", rel=re.compile("icon", re.I)) add_check("Favicon", "Passed" if favicon else "Warning", "Favicon found." if favicon else "No favicon detected.", "Add a favicon for branding and user experience.") charset = soup.find("meta", attrs={"charset": True}) add_check("Charset", "Passed" if charset else "Warning", f"Found: {charset['charset']}" if charset else "No charset declared.", "Add a <meta charset='UTF-8'> to define character encoding.") lang_attr = soup.html.get("lang", "") if soup.html else "" add_check("Language Attribute", "Passed" if lang_attr else "Warning", f"Found: lang='{lang_attr}'" if lang_attr else "No lang attribute found in <html>.", "Set <html lang='en'> for proper language targeting.") viewport = soup.find("meta", attrs={"name": "viewport"}) add_check("Mobile Viewport", "Passed" if viewport else "Warning", "Viewport tag present." if viewport else "No viewport meta tag found.", "Add <meta name='viewport' content='width=device-width, initial-scale=1.0'>.") ga_code = "google-analytics.com" in html or "gtag(" in html add_check("Google Analytics", "Passed" if ga_code else "Warning", "Google Analytics script detected." if ga_code else "No GA script found.", "Install GA script to track visitors.") og_tags = soup.find("meta", property="og:title") add_check("Open Graph Tags", "Passed" if og_tags else "Warning", "OG tags found." if og_tags else "No OG tags present.", "Add Open Graph meta tags to enhance social sharing.") twitter_card = soup.find("meta", attrs={"name": "twitter:card"}) add_check("Twitter Card", "Passed" if twitter_card else "Warning", "Twitter Card tag present." if twitter_card else "No Twitter Card meta tag found.", "Add Twitter Card tags to improve tweet previews.") images = soup.find_all("img") alt_missing = sum(1 for img in images if not img.get("alt")) add_check("Image ALT Texts", "Passed" if alt_missing == 0 else "Warning", f"{len(images)} images found, {alt_missing} missing alt text.", "Add descriptive alt attributes to all images.") inline_styles = bool(soup.find(style=True)) add_check("Inline Styles", "Warning" if inline_styles else "Passed", "Inline styles detected." if inline_styles else "No inline styles found.", "Move inline styles to external CSS.") font_tags = soup.find_all("font") add_check("Deprecated <font> Tags", "Warning" if font_tags else "Passed", f"Found {len(font_tags)} <font> tags." if font_tags else "No deprecated tags.", "Avoid deprecated tags like <font>, use CSS instead.") strong_tags = soup.find_all("strong") + soup.find_all("em") add_check("Semantic Emphasis Tags", "Passed" if strong_tags else "Warning", f"Found {len(strong_tags)} <strong>/<em> tags." if strong_tags else "No emphasis tags.", "Use <strong> and <em> to highlight important content.") noindex = soup.find("meta", attrs={"name": "robots", "content": re.compile("noindex", re.I)}) add_check("Noindex Tag", "Warning" if noindex else "Passed", "Page marked noindex." if noindex else "No noindex tag found.", "Remove noindex to allow search indexing (if intentional).") ext_scripts = soup.find_all("script", src=True) ext_styles = soup.find_all("link", rel="stylesheet") add_check("External JS/CSS", "Passed" if len(ext_scripts) + len(ext_styles) <= 10 else "Warning", f"Found {len(ext_scripts)} JS and {len(ext_styles)} CSS includes.", "Reduce number of external scripts/styles for better performance.") social_links = [a['href'] for a in soup.find_all('a', href=True) if any(x in a['href'] for x in ['facebook', 'twitter', 'linkedin'])] add_check("Social Media Links", "Passed" if social_links else "Warning", f"Found links: {', '.join(social_links)}" if social_links else "No social media links found.", "Add links to your social profiles to build trust.") page_size = len(html.encode('utf-8')) add_check("Page Size", "Passed" if page_size < 250000 else "Warning", f"Page size: {page_size / 1024:.2f} KB.", "Keep HTML under 250KB for faster load times.") broken_links = [] for a in soup.find_all('a', href=True): href = a['href'] if href.startswith("http"): try: r = requests.head(href, timeout=5) if r.status_code >= 400: broken_links.append(href) except: broken_links.append(href) add_check("Broken Links", "Passed" if not broken_links else "Failed", f"Broken links: {broken_links}" if broken_links else "No broken links found.", "Fix or remove broken links.") # Score Calculation total = len(checks) passed = sum(1 for _, r, _, _ in checks if r == "Passed") warning = sum(1 for _, r, _, _ in checks if r == "Warning") failed = sum(1 for _, r, _, _ in checks if r == "Failed") score = int((passed / total) * 100) report = "" for name, result, details, suggestion in checks: icon = {"Passed": "✅", "Warning": "⚠️", "Failed": "❌"}[result] report += f"{icon} {name} — {result}\n{details}\n" if suggestion: report += f"💡 {suggestion}\n" report += "\n" # Generate graph fig, ax = plt.subplots() ax.bar(["Passed", "Warnings", "Failed"], [passed, warning, failed], color=["green", "orange", "red"]) ax.set_title(f"SEO Test Summary (Score: {score}/100)") ax.set_ylabel("Number of Checks") fig.tight_layout() return f"✅ SEO Score: {score}/100", f"{passed}", f"{warning}", f"{failed}", fig, report.strip() # Gradio Interface interface = gr.Interface( fn=seo_check, inputs=gr.Textbox(label="Enter Website URL"), outputs=[ gr.Textbox(label="SEO Score"), gr.Textbox(label="Passed Tests"), gr.Textbox(label="Warnings"), gr.Textbox(label="Failed Tests"), gr.Plot(label="SEO Score Graph"), gr.Textbox(label="Detailed Report", lines=60, max_lines=100, show_copy_button=True), ], title="🔍 Advanced SEO Checker", description="Checks 25 SEO parameters and gives detailed results, solutions, and a score graph. Built with ❤️ using Gradio." ) if __name__ == "__main__": interface.launch()