Spaces:
Running
Running
| import urllib, urllib.request | |
| from pydantic import Field | |
| from datetime import datetime | |
| from markitdown import MarkItDown | |
| from Bio import Entrez | |
| import xml.etree.ElementTree as ET | |
| md = MarkItDown() | |
| def format_today(): | |
| d = datetime.now() | |
| if d.month < 10: | |
| month = f"0{d.month}" | |
| else: | |
| month = d.month | |
| if d.day < 10: | |
| day = f"0{d.day}" | |
| else: | |
| day = d.day | |
| if d.hour < 10: | |
| hour = f"0{d.hour}" | |
| else: | |
| hour = d.hour | |
| if d.minute < 10: | |
| minute = f"0{d.hour}" | |
| else: | |
| minute = d.minute | |
| today = f"{d.year}{month}{day}{hour}{minute}" | |
| two_years_ago = f"{d.year-2}{month}{day}{hour}{minute}" | |
| return today, two_years_ago | |
| def arxiv_tool(search_query: str = Field(description="The query with which to search ArXiv database")): | |
| """A tool to search ArXiv""" | |
| today, two_years_ago = format_today() | |
| query = search_query.replace(" ", "+") | |
| url = f'http://export.arxiv.org/api/query?search_query=all:{query}&submittedDate:[{two_years_ago}+TO+{today}]&start=0&max_results=3' | |
| data = urllib.request.urlopen(url) | |
| content = data.read().decode("utf-8") | |
| f = open("arxiv_results.xml", "w") | |
| f.write(content) | |
| f.close() | |
| result = md.convert("arxiv_results.xml") | |
| return result.text_content | |
| def search_pubmed(query): | |
| Entrez.email = "[email protected]" # Replace with your email | |
| handle = Entrez.esearch(db="pubmed", term=query, retmax=3) | |
| record = Entrez.read(handle) | |
| handle.close() | |
| return record["IdList"] | |
| def fetch_pubmed_details(pubmed_ids): | |
| Entrez.email = "[email protected]" # Replace with your email | |
| handle = Entrez.efetch(db="pubmed", id=pubmed_ids, rettype="medline", retmode="xml") | |
| records = handle.read() | |
| handle.close() | |
| recs = records.decode("utf-8") | |
| f = open("biomed_results.xml", "w") | |
| f.write(recs) | |
| f.close() | |
| def fetch_xml(): | |
| tree = ET.parse("biomed_results.xml") | |
| root = tree.getroot() | |
| parsed_articles = [] | |
| for article in root.findall('PubmedArticle'): | |
| # Extract title | |
| title = article.find('.//ArticleTitle') | |
| title_text = title.text if title is not None else "No title" | |
| # Extract abstract | |
| abstract = article.find('.//Abstract/AbstractText') | |
| abstract_text = abstract.text if abstract is not None else "No abstract" | |
| # Format output | |
| formatted_entry = f"## {title_text}\n\n**Abstract**:\n\n{abstract_text}" | |
| parsed_articles.append(formatted_entry) | |
| return "\n\n".join(parsed_articles) | |
| def pubmed_tool(search_query: str = Field(description="The query with which to search PubMed database")): | |
| """A tool to search PubMed""" | |
| idlist = search_pubmed(search_query) | |
| if len(idlist) == 0: | |
| return "There is no significant match in PubMed" | |
| fetch_pubmed_details(idlist) | |
| content = fetch_xml() | |
| return content | |