|
|
import gradio as gr |
|
|
|
|
|
import requests |
|
|
from langchain_community.document_loaders import UnstructuredURLLoader |
|
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
|
|
|
|
import subprocess |
|
|
|
|
|
|
|
|
def text_extract(generic_url: str) -> str: |
|
|
""" |
|
|
Extract the text from any website or youtube video. |
|
|
Args: |
|
|
url (str): the url of website or youtube to extract text from it |
|
|
Returns: |
|
|
str: A string containing text extracted from website or youtube |
|
|
""" |
|
|
|
|
|
final_text="" |
|
|
|
|
|
if not generic_url.strip(): |
|
|
print("Please provide the information to get started") |
|
|
return "Please provide the information to get started" |
|
|
|
|
|
else: |
|
|
try: |
|
|
|
|
|
response = requests.get(generic_url, timeout=5) |
|
|
|
|
|
if response.status_code == 200: |
|
|
print("URL is valid and reachable.") |
|
|
else: |
|
|
print("Unable to reach") |
|
|
|
|
|
|
|
|
if "youtube.com" in generic_url: |
|
|
video_id = generic_url.split("v=")[-1] |
|
|
transcript = YouTubeTranscriptApi.get_transcript(video_id=video_id) |
|
|
final_text = " ".join([entry['text'] for entry in transcript]) |
|
|
|
|
|
else: |
|
|
loader=UnstructuredURLLoader(urls=[generic_url],ssl_verify=False, |
|
|
headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"}) |
|
|
docs=loader.load() |
|
|
text=docs[0].page_content |
|
|
cleaned_lines = [line.strip() for line in text.split("\n") if line.strip()] |
|
|
final_text = "\n".join(cleaned_lines) |
|
|
|
|
|
except requests.exceptions.ConnectionError as e: |
|
|
print("Error reaching the URL:", e) |
|
|
return "Pls enter valid url we have encounterd ConnectionError\n"+str(e) |
|
|
except requests.exceptions.RequestException as e: |
|
|
print("Error reaching the URL:", e) |
|
|
return "Pls enter valid url we have encounterd RequestException\n"+str(e) |
|
|
except Exception as e: |
|
|
print(f"Exception:{e}") |
|
|
return "We have encounterd the following error\n"+str(e) |
|
|
|
|
|
return final_text |
|
|
|
|
|
def terminal(command: str) -> str: |
|
|
"""Execute a terminal command and return the output |
|
|
|
|
|
Args: |
|
|
command: The command to execute |
|
|
|
|
|
Returns: |
|
|
The command output (stdout and stderr combined) |
|
|
""" |
|
|
return ( |
|
|
"# Hey you are accessing a dummy terminal. \n" |
|
|
"- Its very dangerous to exposing a terminal as a tool to public. \n" |
|
|
"- If you want this terminal tool working in action, then checkout my youtube video: " |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_extract_fn = gr.Interface( |
|
|
fn=text_extract, |
|
|
inputs=gr.Textbox(placeholder="Paste any website or youtube video url"), |
|
|
outputs=gr.Textbox(placeholder="Text extracted from website or youtube video"), |
|
|
title="Text Extractor", |
|
|
description="Extract the text from any website or youtube video." |
|
|
) |
|
|
|
|
|
terminal_fn = gr.Interface( |
|
|
fn=terminal, |
|
|
inputs=gr.Textbox(placeholder="Enter you command"), |
|
|
outputs="markdown", |
|
|
flagging_mode="never", |
|
|
title="Shell Server", |
|
|
description="Runs the shell commands on your computer." |
|
|
) |
|
|
|
|
|
|
|
|
demo = gr.TabbedInterface( |
|
|
[text_extract_fn, terminal_fn], |
|
|
["Text Extractor", "Command Terminal"] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(mcp_server=True) |
|
|
|