Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from urllib.parse import urlparse | |
| import re | |
| import matplotlib.pyplot as plt | |
| def fetch_html(url): | |
| try: | |
| headers = {'User-Agent': 'Mozilla/5.0'} | |
| response = requests.get(url, timeout=10, headers=headers) | |
| response.raise_for_status() | |
| return response.text, "" | |
| except Exception as e: | |
| return None, f"β Error fetching the URL: {e}" | |
| def seo_check(url): | |
| html, error = fetch_html(url) | |
| if error: | |
| return error, "", "", "", "", None | |
| soup = BeautifulSoup(html, 'html.parser') | |
| parsed = urlparse(url) | |
| checks = [] | |
| def add_check(name, result, details, suggestion=""): | |
| checks.append((name, result, details.strip(), suggestion.strip())) | |
| # SEO Tests | |
| title = soup.title.string.strip() if soup.title and soup.title.string else "" | |
| add_check("Title Tag", "Passed" if title else "Failed", | |
| f"Found: {title}" if title else "Title tag missing.", | |
| "" if title else "Add a <title> tag with relevant keywords.") | |
| meta_desc = soup.find("meta", attrs={"name": "description"}) | |
| meta_text = meta_desc.get("content", "").strip() if meta_desc else "" | |
| add_check("Meta Description", "Passed" if meta_text else "Warning", | |
| f"Found: {meta_text}" if meta_text else "No meta description found.", | |
| "Add a concise meta description (150-160 characters)." if not meta_text else "") | |
| h1 = soup.find("h1") | |
| h1_text = h1.get_text(strip=True) if h1 else "" | |
| add_check("H1 Tag", "Passed" if h1_text else "Failed", | |
| f"Found: {h1_text}" if h1_text else "No <h1> tag found.", | |
| "Include one <h1> tag per page.") | |
| h2_tags = soup.find_all("h2") | |
| add_check("H2 Tags", "Passed" if h2_tags else "Warning", | |
| f"Found {len(h2_tags)} <h2> tags." if h2_tags else "No <h2> tags found.", | |
| "Use <h2> tags to structure subheadings.") | |
| canonical = soup.find("link", rel="canonical") | |
| add_check("Canonical Tag", "Passed" if canonical else "Warning", | |
| f"Found: {canonical['href']}" if canonical and canonical.has_attr('href') else "No canonical tag found.", | |
| "Add a canonical tag to prevent duplicate content issues.") | |
| add_check("HTTPS", "Passed" if parsed.scheme == "https" else "Failed", | |
| f"URL uses {'HTTPS' if parsed.scheme == 'https' else 'HTTP'}.", | |
| "Use HTTPS for secure connections.") | |
| robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt" | |
| try: | |
| robots_response = requests.get(robots_url, timeout=5) | |
| add_check("robots.txt", "Passed" if robots_response.status_code == 200 else "Warning", | |
| "robots.txt is accessible." if robots_response.status_code == 200 else "robots.txt not found.", | |
| "Create a robots.txt file to manage crawler access.") | |
| except: | |
| add_check("robots.txt", "Warning", "robots.txt could not be fetched.", "Ensure it's accessible.") | |
| sitemap_url = f"{parsed.scheme}://{parsed.netloc}/sitemap.xml" | |
| try: | |
| sitemap_response = requests.get(sitemap_url, timeout=5) | |
| add_check("sitemap.xml", "Passed" if sitemap_response.status_code == 200 else "Warning", | |
| "sitemap.xml is accessible." if sitemap_response.status_code == 200 else "sitemap.xml not found.", | |
| "Add a sitemap.xml to help search engines index your pages.") | |
| except: | |
| add_check("sitemap.xml", "Warning", "Could not access sitemap.xml.", "Ensure it's publicly accessible.") | |
| favicon = soup.find("link", rel=re.compile("icon", re.I)) | |
| add_check("Favicon", "Passed" if favicon else "Warning", | |
| "Favicon found." if favicon else "No favicon detected.", | |
| "Add a favicon for branding and user experience.") | |
| charset = soup.find("meta", attrs={"charset": True}) | |
| add_check("Charset", "Passed" if charset else "Warning", | |
| f"Found: {charset['charset']}" if charset else "No charset declared.", | |
| "Add a <meta charset='UTF-8'> to define character encoding.") | |
| lang_attr = soup.html.get("lang", "") if soup.html else "" | |
| add_check("Language Attribute", "Passed" if lang_attr else "Warning", | |
| f"Found: lang='{lang_attr}'" if lang_attr else "No lang attribute found in <html>.", | |
| "Set <html lang='en'> for proper language targeting.") | |
| viewport = soup.find("meta", attrs={"name": "viewport"}) | |
| add_check("Mobile Viewport", "Passed" if viewport else "Warning", | |
| "Viewport tag present." if viewport else "No viewport meta tag found.", | |
| "Add <meta name='viewport' content='width=device-width, initial-scale=1.0'>.") | |
| ga_code = "google-analytics.com" in html or "gtag(" in html | |
| add_check("Google Analytics", "Passed" if ga_code else "Warning", | |
| "Google Analytics script detected." if ga_code else "No GA script found.", | |
| "Install GA script to track visitors.") | |
| og_tags = soup.find("meta", property="og:title") | |
| add_check("Open Graph Tags", "Passed" if og_tags else "Warning", | |
| "OG tags found." if og_tags else "No OG tags present.", | |
| "Add Open Graph meta tags to enhance social sharing.") | |
| twitter_card = soup.find("meta", attrs={"name": "twitter:card"}) | |
| add_check("Twitter Card", "Passed" if twitter_card else "Warning", | |
| "Twitter Card tag present." if twitter_card else "No Twitter Card meta tag found.", | |
| "Add Twitter Card tags to improve tweet previews.") | |
| images = soup.find_all("img") | |
| alt_missing = sum(1 for img in images if not img.get("alt")) | |
| add_check("Image ALT Texts", "Passed" if alt_missing == 0 else "Warning", | |
| f"{len(images)} images found, {alt_missing} missing alt text.", | |
| "Add descriptive alt attributes to all images.") | |
| inline_styles = bool(soup.find(style=True)) | |
| add_check("Inline Styles", "Warning" if inline_styles else "Passed", | |
| "Inline styles detected." if inline_styles else "No inline styles found.", | |
| "Move inline styles to external CSS.") | |
| font_tags = soup.find_all("font") | |
| add_check("Deprecated <font> Tags", "Warning" if font_tags else "Passed", | |
| f"Found {len(font_tags)} <font> tags." if font_tags else "No deprecated tags.", | |
| "Avoid deprecated tags like <font>, use CSS instead.") | |
| strong_tags = soup.find_all("strong") + soup.find_all("em") | |
| add_check("Semantic Emphasis Tags", "Passed" if strong_tags else "Warning", | |
| f"Found {len(strong_tags)} <strong>/<em> tags." if strong_tags else "No emphasis tags.", | |
| "Use <strong> and <em> to highlight important content.") | |
| noindex = soup.find("meta", attrs={"name": "robots", "content": re.compile("noindex", re.I)}) | |
| add_check("Noindex Tag", "Warning" if noindex else "Passed", | |
| "Page marked noindex." if noindex else "No noindex tag found.", | |
| "Remove noindex to allow search indexing (if intentional).") | |
| ext_scripts = soup.find_all("script", src=True) | |
| ext_styles = soup.find_all("link", rel="stylesheet") | |
| add_check("External JS/CSS", "Passed" if len(ext_scripts) + len(ext_styles) <= 10 else "Warning", | |
| f"Found {len(ext_scripts)} JS and {len(ext_styles)} CSS includes.", | |
| "Reduce number of external scripts/styles for better performance.") | |
| social_links = [a['href'] for a in soup.find_all('a', href=True) if any(x in a['href'] for x in ['facebook', 'twitter', 'linkedin'])] | |
| add_check("Social Media Links", "Passed" if social_links else "Warning", | |
| f"Found links: {', '.join(social_links)}" if social_links else "No social media links found.", | |
| "Add links to your social profiles to build trust.") | |
| page_size = len(html.encode('utf-8')) | |
| add_check("Page Size", "Passed" if page_size < 250000 else "Warning", | |
| f"Page size: {page_size / 1024:.2f} KB.", | |
| "Keep HTML under 250KB for faster load times.") | |
| broken_links = [] | |
| for a in soup.find_all('a', href=True): | |
| href = a['href'] | |
| if href.startswith("http"): | |
| try: | |
| r = requests.head(href, timeout=5) | |
| if r.status_code >= 400: | |
| broken_links.append(href) | |
| except: | |
| broken_links.append(href) | |
| add_check("Broken Links", "Passed" if not broken_links else "Failed", | |
| f"Broken links: {broken_links}" if broken_links else "No broken links found.", | |
| "Fix or remove broken links.") | |
| # Score Calculation | |
| total = len(checks) | |
| passed = sum(1 for _, r, _, _ in checks if r == "Passed") | |
| warning = sum(1 for _, r, _, _ in checks if r == "Warning") | |
| failed = sum(1 for _, r, _, _ in checks if r == "Failed") | |
| score = int((passed / total) * 100) | |
| report = "" | |
| for name, result, details, suggestion in checks: | |
| icon = {"Passed": "β ", "Warning": "β οΈ", "Failed": "β"}[result] | |
| report += f"{icon} {name} β {result}\n{details}\n" | |
| if suggestion: | |
| report += f"π‘ {suggestion}\n" | |
| report += "\n" | |
| # Generate graph | |
| fig, ax = plt.subplots() | |
| ax.bar(["Passed", "Warnings", "Failed"], [passed, warning, failed], color=["green", "orange", "red"]) | |
| ax.set_title(f"SEO Test Summary (Score: {score}/100)") | |
| ax.set_ylabel("Number of Checks") | |
| fig.tight_layout() | |
| return f"β SEO Score: {score}/100", f"{passed}", f"{warning}", f"{failed}", fig, report.strip() | |
| # Gradio Interface | |
| interface = gr.Interface( | |
| fn=seo_check, | |
| inputs=gr.Textbox(label="Enter Website URL"), | |
| outputs=[ | |
| gr.Textbox(label="SEO Score"), | |
| gr.Textbox(label="Passed Tests"), | |
| gr.Textbox(label="Warnings"), | |
| gr.Textbox(label="Failed Tests"), | |
| gr.Plot(label="SEO Score Graph"), | |
| gr.Textbox(label="Detailed Report", lines=60, max_lines=100, show_copy_button=True), | |
| ], | |
| title="π Advanced SEO Checker", | |
| description="Checks 25 SEO parameters and gives detailed results, solutions, and a score graph. Built with β€οΈ using Gradio." | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() | |