Spaces:
Sleeping
Sleeping
| import json | |
| from langchain_community.document_loaders import ArxivLoader, WikipediaLoader | |
| from markitdown import MarkItDown | |
| from smolagents import ( | |
| tool, | |
| ) | |
| md = MarkItDown(enable_plugins=True) # Set to True to enable plugins | |
| def arvix_search(query: str) -> str: | |
| """Search Arxiv for a query and return maximum 3 result. | |
| Args: | |
| query: The search query.""" | |
| search_docs = ArxivLoader(query=query, load_max_docs=3).load() | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' | |
| for doc in search_docs | |
| ] | |
| ) | |
| return formatted_search_docs | |
| def read_excel_content_to_markdown_content(file_location: str) -> str: | |
| """Read the content of an Excel file and convert it to markdown content. | |
| Args: | |
| file_location: The path to the Excel file.""" | |
| result = md.convert(file_location) | |
| return result.text_content | |
| def read_pdf_content_to_markdown(file_location: str) -> str: | |
| """Read the content of a PDF file and convert it to markdown. | |
| Args: | |
| file_location: The path to the PDF file.""" | |
| result = md.convert(file_location) | |
| return result.text_content | |
| def get_audio_transcription(file_path: str) -> str: | |
| """Get the transcription of the audio file using the file path. | |
| Args: | |
| file_path: The path of the audio file.""" | |
| result = md.convert(file_path) | |
| return result.text_content | |
| def get_python_file_content(file_name: str) -> str: | |
| """Get the content of a mentioned Python file. | |
| Args: | |
| file_name: The name of the file.""" | |
| file_path = f"{file_name}" | |
| with open(file_path, "r") as f: | |
| content = f.read() | |
| return content | |
| def visit_webpage_to_markdown(url: str) -> str: | |
| """Visit a web page and return its content in markdown format. | |
| Args: | |
| url: The URL of the web page.""" | |
| result = md.convert(url) | |
| return result.text_content | |
| def extract_markdown_tables_from_markdown_content(markdown_content: str) -> str: | |
| """Extract and return the markdown tables from a given markdown content string in a structured json format. | |
| Args: | |
| markdown_content: The markdown string containing the table.""" | |
| from mrkdwn_analysis import MarkdownAnalyzer | |
| analyzer = MarkdownAnalyzer.from_string(markdown_content) | |
| analyzer.analyse() | |
| return json.dumps(analyzer.identify_tables()) | |
| def wiki_search(query: str) -> str: | |
| """Search Wikipedia for a query and return maximum 2 results. | |
| Args: | |
| query: The search query.""" | |
| search_docs = WikipediaLoader(query=query, load_max_docs=2).load() | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' | |
| for doc in search_docs | |
| ] | |
| ) | |
| return formatted_search_docs | |