diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml new file mode 100644 index 00000000..c73e032c --- /dev/null +++ b/.github/workflows/pylint.yml @@ -0,0 +1,23 @@ +name: Pylint + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + - name: Analysing the code with pylint + run: | + pylint $(git ls-files '*.py') diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..dc9bcf8a Binary files /dev/null and b/.gitignore differ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..13566b81 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000..105ce2da --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..dce6d35d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..95309e37 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/tldw.iml b/.idea/tldw.iml new file mode 100644 index 00000000..8e5446ac --- /dev/null +++ b/.idea/tldw.iml @@ -0,0 +1,14 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..35eb1ddf --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Bin/ffmpeg.exe b/Bin/ffmpeg.exe new file mode 100644 index 00000000..db2987ea Binary files /dev/null and b/Bin/ffmpeg.exe differ diff --git a/Get_Playlist_URLs.py b/Get_Playlist_URLs.py new file mode 100644 index 00000000..aaa39aef --- /dev/null +++ b/Get_Playlist_URLs.py @@ -0,0 +1,58 @@ +import sys +import yt_dlp +from urllib.parse import urlparse, parse_qs + +def get_playlist_videos(playlist_url): + ydl_opts = { + 'extract_flat': True, 'skip_download': True, + 'quiet': False + } + + try: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(playlist_url, download=False) + + if 'entries' in info: + video_urls = [entry['url'] for entry in info['entries']] + playlist_title = info['title'] + return video_urls, playlist_title + else: + print("No videos found in the playlist.") + return [], None + except Exception as e: + print(f"An error occurred: {e}") + return [], None + + +def save_to_file(video_urls, filename): + with open(filename, 'w') as file: + file.write('\n'.join(video_urls)) + print(f"Video URLs saved to {filename}") + + +def parse_playlist_url(url): + parsed_url = urlparse(url) + query_params = parse_qs(parsed_url.query) + + if 'list' in query_params: + playlist_id = query_params['list'][0] + base_url = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}" + playlist_url = f"{base_url}?list={playlist_id}" + return playlist_url + else: + return url + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print("Please provide the playlist URL as a command-line argument.") + print("""Example:\n\t python Get_Playlist_URLs.py "https://www.youtube.com/playlist?list=PLH15HpR5qRsWalnnt-9eYELxbEcYBPB6I" """) + sys.exit(1) + + playlist_url = sys.argv[1] + parsed_playlist_url = parse_playlist_url(playlist_url) + video_urls, playlist_title = get_playlist_videos(parsed_playlist_url) + + if video_urls: + filename = f"{playlist_title}.txt" + save_to_file(video_urls, filename) \ No newline at end of file diff --git a/HF/Dockerfile b/HF/Dockerfile new file mode 100644 index 00000000..f206fe26 --- /dev/null +++ b/HF/Dockerfile @@ -0,0 +1,42 @@ +FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 + +ARG DEBIAN_FRONTEND=noninteractive + +ENV PYTHONUNBUFFERED=1 + +RUN apt-get update && apt-get install --no-install-recommends -y \ + build-essential \ + python3.9 \ + python3-pip \ + git \ + ffmpeg \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +WORKDIR /code + +COPY ./requirements.txt /code/requirements.txt + +# Set up a new user named "user" with user ID 1000 +RUN useradd -m -u 1000 user +# Switch to the "user" user +USER user +# Set home to the user's home directory +ENV HOME=/home/user \ + PATH=/home/user/.local/bin:$PATH \ + PYTHONPATH=$HOME/app \ + PYTHONUNBUFFERED=1 \ + GRADIO_ALLOW_FLAGGING=never \ + GRADIO_NUM_PORTS=1 \ + GRADIO_SERVER_NAME=0.0.0.0 \ + GRADIO_THEME=huggingface \ + SYSTEM=spaces + +RUN pip3 install --no-cache-dir --upgrade -r /code/requirements.txt + +# Set the working directory to the user's home directory +WORKDIR $HOME/app + +# Copy the current directory contents into the container at $HOME/app setting the owner to the user +COPY --chown=user . $HOME/app + +CMD ["python3", "app.py"] \ No newline at end of file diff --git a/HF/app.py b/HF/app.py new file mode 100644 index 00000000..9f9e9ee9 --- /dev/null +++ b/HF/app.py @@ -0,0 +1,1670 @@ +#!/usr/bin/env python3 +import argparse +import configparser +import json +import logging +import os +import platform +import requests +import shutil +import subprocess +import sys +import time +import unicodedata +import zipfile + +import gradio as gr +from huggingface_hub import InferenceClient +import torch +import yt_dlp + +log_level = "DEBUG" +logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s') +os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" +####### +# Function Sections +# +# System Checks +# Processing Paths and local file handling +# Video Download/Handling +# Audio Transcription +# Diarization +# Summarizers +# Main +# +####### + +# To Do +# Offline diarization - https://github.com/pyannote/pyannote-audio/blob/develop/tutorials/community/offline_usage_speaker_diarization.ipynb +# Dark mode changes under gradio +# +# Changes made to app.py version: +# 1. Removal of video files after conversion -> check main function +# 2. Usage of/Hardcoding HF_TOKEN as token for API calls +# 3. Usage of HuggingFace for Inference +# 4. Other stuff I can't remember. Will eventually do a diff and document them. +# + + +#### +# +# TL/DW: Too Long Didn't Watch +# +# Project originally created by https://github.com/the-crypt-keeper +# Modifications made by https://github.com/rmusser01 +# All credit to the original authors, I've just glued shit together. +# +# +# Usage: +# +# Download Audio only from URL -> Transcribe audio: +# python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s` +# +# Download Audio+Video from URL -> Transcribe audio from Video:** +# python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s` +# +# Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` ( +# llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:** python summarize.py +# -v https://www.youtube.com/watch?v=4nd1CDZP21s -api ` - Make sure to put your API key into +# `config.txt` under the appropriate API variable +# +# Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:** +# python summarize.py ./local/file_on_your/system --api_name ` +# +# Run it as a WebApp** python summarize.py -gui` - This requires you to either stuff your API keys into the +# `config.txt` file, or pass them into the app every time you want to use it. Can be helpful for setting up a shared +# instance, but not wanting people to perform inference on your server. +# +### + + +####################### +# Config loading +# + +# Read configuration from file +config = configparser.ConfigParser() +config.read('config.txt') + +# API Keys +anthropic_api_key = config.get('API', 'anthropic_api_key', fallback=None) +logging.debug(f"Loaded Anthropic API Key: {anthropic_api_key}") + +cohere_api_key = config.get('API', 'cohere_api_key', fallback=None) +logging.debug(f"Loaded cohere API Key: {cohere_api_key}") + +groq_api_key = config.get('API', 'groq_api_key', fallback=None) +logging.debug(f"Loaded groq API Key: {groq_api_key}") + +openai_api_key = config.get('API', 'openai_api_key', fallback=None) +logging.debug(f"Loaded openAI Face API Key: {openai_api_key}") + +huggingface_api_key = config.get('API', 'huggingface_api_key', fallback=None) +logging.debug(f"Loaded HuggingFace Face API Key: {huggingface_api_key}") + +# Models +anthropic_model = config.get('API', 'anthropic_model', fallback='claude-3-sonnet-20240229') +cohere_model = config.get('API', 'cohere_model', fallback='command-r-plus') +groq_model = config.get('API', 'groq_model', fallback='FIXME') +openai_model = config.get('API', 'openai_model', fallback='gpt-4-turbo') +huggingface_model = config.get('API', 'huggingface_model', fallback='CohereForAI/c4ai-command-r-plus') + +# Local-Models +kobold_api_IP = config.get('Local-API', 'kobold_api_IP', fallback='http://127.0.0.1:5000/api/v1/generate') +kobold_api_key = config.get('Local-API', 'kobold_api_key', fallback='') +llama_api_IP = config.get('Local-API', 'llama_api_IP', fallback='http://127.0.0.1:8080/v1/chat/completions') +llama_api_key = config.get('Local-API', 'llama_api_key', fallback='') +ooba_api_IP = config.get('Local-API', 'ooba_api_IP', fallback='http://127.0.0.1:5000/v1/chat/completions') +ooba_api_key = config.get('Local-API', 'ooba_api_key', fallback='') + +# Retrieve output paths from the configuration file +output_path = config.get('Paths', 'output_path', fallback='results') + +# Retrieve processing choice from the configuration file +processing_choice = config.get('Processing', 'processing_choice', fallback='cpu') + +# Log file +# logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG) + +# API Key Shenanigans +api_key = "UNSET" + +# +# +####################### + +# Dirty hack - sue me. +os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' + +whisper_models = ["small", "medium", "small.en", "medium.en"] +source_languages = { + "en": "English", + "zh": "Chinese", + "de": "German", + "es": "Spanish", + "ru": "Russian", + "ko": "Korean", + "fr": "French" +} +source_language_list = [key[0] for key in source_languages.items()] + +print(r""" + _____ _ ________ _ _ +|_ _|| | / /| _ \| | | | _ + | | | | / / | | | || | | |(_) + | | | | / / | | | || |/\| | + | | | |____ / / | |/ / \ /\ / _ + \_/ \_____//_/ |___/ \/ \/ (_) + + + _ _ +| | | | +| |_ ___ ___ | | ___ _ __ __ _ +| __| / _ \ / _ \ | | / _ \ | '_ \ / _` | +| |_ | (_) || (_) | | || (_) || | | || (_| | _ + \__| \___/ \___/ |_| \___/ |_| |_| \__, |( ) + __/ ||/ + |___/ + _ _ _ _ _ _ _ + | |(_) | | ( )| | | | | | + __| | _ __| | _ __ |/ | |_ __ __ __ _ | |_ ___ | |__ + / _` || | / _` || '_ \ | __| \ \ /\ / / / _` || __| / __|| '_ \ +| (_| || || (_| || | | | | |_ \ V V / | (_| || |_ | (__ | | | | + \__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_| +""") + +####################################################################################################################### +# System Checks +# +# + +# Perform Platform Check +userOS = "" + +global summary + +def platform_check(): + global userOS + if platform.system() == "Linux": + print("Linux OS detected \n Running Linux appropriate commands") + userOS = "Linux" + elif platform.system() == "Windows": + print("Windows OS detected \n Running Windows appropriate commands") + userOS = "Windows" + else: + print("Other OS detected \n Maybe try running things manually?") + exit() + + +# Check for NVIDIA GPU and CUDA availability +def cuda_check(): + global processing_choice + try: + nvidia_smi = subprocess.check_output("nvidia-smi", shell=True).decode() + if "NVIDIA-SMI" in nvidia_smi: + print("NVIDIA GPU with CUDA is available.") + processing_choice = "cuda" # Set processing_choice to gpu if NVIDIA GPU with CUDA is available + else: + print("NVIDIA GPU with CUDA is not available.\nYou either have an AMD GPU, or you're stuck with CPU only.") + processing_choice = "cpu" # Set processing_choice to cpu if NVIDIA GPU with CUDA is not available + except subprocess.CalledProcessError: + print("NVIDIA GPU with CUDA is not available.\nYou either have an AMD GPU, or you're stuck with CPU only.") + processing_choice = "cpu" # Set processing_choice to cpu if nvidia-smi command fails + + +# Ask user if they would like to use either their GPU or their CPU for transcription +def decide_cpugpu(): + global processing_choice + processing_input = input("Would you like to use your GPU or CPU for transcription? (1/cuda)GPU/(2/cpu)CPU): ") + if processing_choice == "cuda" and (processing_input.lower() == "cuda" or processing_input == "1"): + print("You've chosen to use the GPU.") + logging.debug("GPU is being used for processing") + processing_choice = "cuda" + elif processing_input.lower() == "cpu" or processing_input == "2": + print("You've chosen to use the CPU.") + logging.debug("CPU is being used for processing") + processing_choice = "cpu" + else: + print("Invalid choice. Please select either GPU or CPU.") + + +# check for existence of ffmpeg +def check_ffmpeg(): + if shutil.which("ffmpeg") or (os.path.exists("Bin") and os.path.isfile(".\\Bin\\ffmpeg.exe")): + logging.debug("ffmpeg found installed on the local system, in the local PATH, or in the './Bin' folder") + pass + else: + logging.debug("ffmpeg not installed on the local system/in local PATH") + print( + "ffmpeg is not installed.\n\n You can either install it manually, or through your package manager of " + "choice.\n Windows users, builds are here: https://www.gyan.dev/ffmpeg/builds/") + if userOS == "Windows": + download_ffmpeg() + elif userOS == "Linux": + print( + "You should install ffmpeg using your platform's appropriate package manager, 'apt install ffmpeg'," + "'dnf install ffmpeg' or 'pacman', etc.") + else: + logging.debug("running an unsupported OS") + print("You're running an unsupported/Un-tested OS") + exit_script = input("Let's exit the script, unless you're feeling lucky? (y/n)") + if exit_script == "y" or "yes" or "1": + exit() + + +# Download ffmpeg +def download_ffmpeg(): + user_choice = input("Do you want to download ffmpeg? (y)Yes/(n)No: ") + if user_choice.lower() == 'yes' or 'y' or '1': + print("Downloading ffmpeg") + url = "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip" + response = requests.get(url) + + if response.status_code == 200: + print("Saving ffmpeg zip file") + logging.debug("Saving ffmpeg zip file") + zip_path = "ffmpeg-release-essentials.zip" + with open(zip_path, 'wb') as file: + file.write(response.content) + + logging.debug("Extracting the 'ffmpeg.exe' file from the zip") + print("Extracting ffmpeg.exe from zip file to '/Bin' folder") + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + ffmpeg_path = "ffmpeg-7.0-essentials_build/bin/ffmpeg.exe" + + logging.debug("checking if the './Bin' folder exists, creating if not") + bin_folder = "Bin" + if not os.path.exists(bin_folder): + logging.debug("Creating a folder for './Bin', it didn't previously exist") + os.makedirs(bin_folder) + + logging.debug("Extracting 'ffmpeg.exe' to the './Bin' folder") + zip_ref.extract(ffmpeg_path, path=bin_folder) + + logging.debug("Moving 'ffmpeg.exe' to the './Bin' folder") + src_path = os.path.join(bin_folder, ffmpeg_path) + dst_path = os.path.join(bin_folder, "ffmpeg.exe") + shutil.move(src_path, dst_path) + + logging.debug("Removing ffmpeg zip file") + print("Deleting zip file (we've already extracted ffmpeg.exe, no worries)") + os.remove(zip_path) + + logging.debug("ffmpeg.exe has been downloaded and extracted to the './Bin' folder.") + print("ffmpeg.exe has been successfully downloaded and extracted to the './Bin' folder.") + else: + logging.error("Failed to download the zip file.") + print("Failed to download the zip file.") + else: + logging.debug("User chose to not download ffmpeg") + print("ffmpeg will not be downloaded.") + + +# +# +######################################################################################################################## + + +####################################################################################################################### +# Processing Paths and local file handling +# +# + +def read_paths_from_file(file_path): + """ Reads a file containing URLs or local file paths and returns them as a list. """ + paths = [] # Initialize paths as an empty list + with open(file_path, 'r') as file: + for line in file: + line = line.strip() + if line and not os.path.exists( + os.path.join('results', normalize_title(line.split('/')[-1].split('.')[0]) + '.json')): + logging.debug("line successfully imported from file and added to list to be transcribed") + paths.append(line) + return paths + + +def process_path(path): + """ Decides whether the path is a URL or a local file and processes accordingly. """ + if path.startswith('http'): + logging.debug("file is a URL") + info_dict = get_youtube(path) + if info_dict: + return info_dict + else: + logging.error("Failed to get Video info") + return None + elif os.path.exists(path): + logging.debug("File is a path") + return process_local_file(path) # For local files, define a function to handle them + else: + logging.error(f"Path does not exist: {path}") + return None + + +# +def process_local_file(file_path): + logging.info(f"Processing local file: {file_path}") + title = normalize_title(os.path.splitext(os.path.basename(file_path))[0]) + info_dict = {'title': title} + logging.debug(f"Creating {title} directory...") + download_path = create_download_directory(title) + logging.debug(f"Converting '{title}' to an audio file (wav).") + audio_file = convert_to_wav(file_path) # Assumes input files are videos needing audio extraction + logging.debug(f"'{title}' successfully converted to an audio file (wav).") + return download_path, info_dict, audio_file + + +# +# +######################################################################################################################## + + +####################################################################################################################### +# Video Download/Handling +# + +def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter, + download_video, download_audio, chunk_size): + video_file_path = None + print("API Name received:", api_name) # Debugging line + try: + results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers, + whisper_model=whisper_model, offset=offset, vad_filter=vad_filter, + download_video_flag=download_video, custom_prompt=custom_prompt) + if results: + transcription_result = results[0] + + json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json') + prettified_json_file_path = transcription_result['audio_file'].replace('.wav', '.segments_pretty.json') + + summary_file_path = json_file_path.replace('.segments.json', '_summary.txt') + + json_file_path = format_file_path(json_file_path) + prettified_json_file_path = format_file_path(prettified_json_file_path, fallback_path=json_file_path) + + summary_file_path = format_file_path(summary_file_path) + + if download_video: + video_file_path = transcription_result['video_path'] if 'video_path' in transcription_result else None + + formatted_transcription = format_transcription(transcription_result) + + summary_text = transcription_result.get('summary', 'Summary not available') + + if summary_file_path and os.path.exists(summary_file_path): + return formatted_transcription, summary_text, prettified_json_file_path, summary_file_path, video_file_path, None + else: + return formatted_transcription, summary_text, prettified_json_file_path, None, video_file_path, None + else: + return "No results found.", "Summary not available", None, None, None, None + except Exception as e: + return str(e), "Error processing the request.", None, None, None, None + + +def create_download_directory(title): + base_dir = "Results" + # Remove characters that are illegal in Windows filenames and normalize + safe_title = normalize_title(title) + logging.debug(f"{title} successfully normalized") + session_path = os.path.join(base_dir, safe_title) + if not os.path.exists(session_path): + os.makedirs(session_path, exist_ok=True) + logging.debug(f"Created directory for downloaded video: {session_path}") + else: + logging.debug(f"Directory already exists for downloaded video: {session_path}") + return session_path + + +def normalize_title(title): + # Normalize the string to 'NFKD' form and encode to 'ascii' ignoring non-ascii characters + title = unicodedata.normalize('NFKD', title).encode('ascii', 'ignore').decode('ascii') + title = title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('"', '').replace('*', '').replace('?', + '').replace( + '<', '').replace('>', '').replace('|', '') + return title + + +def get_youtube(video_url): + ydl_opts = { + 'format': 'bestaudio[ext=m4a]', + 'noplaylist': False, + 'quiet': True, + 'extract_flat': True + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + logging.debug("About to extract youtube info") + info_dict = ydl.extract_info(video_url, download=False) + logging.debug(f"Youtube info successfully extracted: {info_dict}") + if isinstance(info_dict, dict): + return info_dict + else: + logging.error("Invalid info_dict format") + return None + + +def get_playlist_videos(playlist_url): + ydl_opts = { + 'extract_flat': True, + 'skip_download': True, + 'quiet': True + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(playlist_url, download=False) + + if 'entries' in info: + video_urls = [entry['url'] for entry in info['entries']] + playlist_title = info['title'] + return video_urls, playlist_title + else: + print("No videos found in the playlist.") + return [], None + + +def save_to_file(video_urls, filename): + with open(filename, 'w') as file: + file.write('\n'.join(video_urls)) + print(f"Video URLs saved to {filename}") + + +def download_video(video_url, download_path, info_dict, download_video_flag): + logging.debug("About to normalize downloaded video title") + title = normalize_title(info_dict['title']) + + if not download_video_flag: + file_path = os.path.join(download_path, f"{title}.m4a") + ydl_opts = { + 'format': 'bestaudio[ext=m4a]', + 'outtmpl': file_path, + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + logging.debug("yt_dlp: About to download audio with youtube-dl") + ydl.download([video_url]) + logging.debug("yt_dlp: Audio successfully downloaded with youtube-dl") + return file_path + else: + video_file_path = os.path.join(download_path, f"{title}_video.mp4") + audio_file_path = os.path.join(download_path, f"{title}_audio.m4a") + ydl_opts_video = { + 'format': 'bestvideo[ext=mp4]', + 'outtmpl': video_file_path, + } + ydl_opts_audio = { + 'format': 'bestaudio[ext=m4a]', + 'outtmpl': audio_file_path, + } + + with yt_dlp.YoutubeDL(ydl_opts_video) as ydl: + logging.debug("yt_dlp: About to download video with youtube-dl") + ydl.download([video_url]) + logging.debug("yt_dlp: Video successfully downloaded with youtube-dl") + + with yt_dlp.YoutubeDL(ydl_opts_audio) as ydl: + logging.debug("yt_dlp: About to download audio with youtube-dl") + ydl.download([video_url]) + logging.debug("yt_dlp: Audio successfully downloaded with youtube-dl") + + output_file_path = os.path.join(download_path, f"{title}.mp4") + + if userOS == "Windows": + logging.debug("Running ffmpeg on Windows...") + ffmpeg_command = [ + '.\\Bin\\ffmpeg.exe', + '-i', video_file_path, + '-i', audio_file_path, + '-c:v', 'copy', + '-c:a', 'copy', + output_file_path + ] + subprocess.run(ffmpeg_command, check=True) + elif userOS == "Linux": + logging.debug("Running ffmpeg on Linux...") + ffmpeg_command = [ + 'ffmpeg', + '-i', video_file_path, + '-i', audio_file_path, + '-c:v', 'copy', + '-c:a', 'copy', + output_file_path + ] + subprocess.run(ffmpeg_command, check=True) + else: + logging.error("ffmpeg: Unsupported operating system for video download and merging.") + raise RuntimeError("ffmpeg: Unsupported operating system for video download and merging.") + os.remove(video_file_path) + os.remove(audio_file_path) + + return output_file_path + + +# +# +####################################################################################################################### + + +###################################################################################################################### +# Audio Transcription +# +# Convert video .m4a into .wav using ffmpeg +# ffmpeg -i "example.mp4" -ar 16000 -ac 1 -c:a pcm_s16le "output.wav" +# https://www.gyan.dev/ffmpeg/builds/ +# + +# os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"') +def convert_to_wav(video_file_path, offset=0, overwrite=False): + out_path = os.path.splitext(video_file_path)[0] + ".wav" + + if os.path.exists(out_path) and not overwrite: + print(f"File '{out_path}' already exists. Skipping conversion.") + logging.info(f"Skipping conversion as file already exists: {out_path}") + return out_path + print("Starting conversion process of .m4a to .WAV") + out_path = os.path.splitext(video_file_path)[0] + ".wav" + + try: + if os.name == "nt": + logging.debug("ffmpeg being ran on windows") + + if sys.platform.startswith('win'): + ffmpeg_cmd = "..\\Bin\\ffmpeg.exe" + logging.debug(f"ffmpeg_cmd: {ffmpeg_cmd}") + else: + ffmpeg_cmd = 'ffmpeg' # Assume 'ffmpeg' is in PATH for non-Windows systems + + command = [ + ffmpeg_cmd, # Assuming the working directory is correctly set where .\Bin exists + "-ss", "00:00:00", # Start at the beginning of the video + "-i", video_file_path, + "-ar", "16000", # Audio sample rate + "-ac", "1", # Number of audio channels + "-c:a", "pcm_s16le", # Audio codec + out_path + ] + try: + # Redirect stdin from null device to prevent ffmpeg from waiting for input + with open(os.devnull, 'rb') as null_file: + result = subprocess.run(command, stdin=null_file, text=True, capture_output=True) + if result.returncode == 0: + logging.info("FFmpeg executed successfully") + logging.debug("FFmpeg output: %s", result.stdout) + else: + logging.error("Error in running FFmpeg") + logging.error("FFmpeg stderr: %s", result.stderr) + raise RuntimeError(f"FFmpeg error: {result.stderr}") + except Exception as e: + logging.error("Error occurred - ffmpeg doesn't like windows") + raise RuntimeError("ffmpeg failed") + elif os.name == "posix": + os.system(f'ffmpeg -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"') + else: + raise RuntimeError("Unsupported operating system") + logging.info("Conversion to WAV completed: %s", out_path) + except subprocess.CalledProcessError as e: + logging.error("Error executing FFmpeg command: %s", str(e)) + raise RuntimeError("Error converting video file to WAV") + except Exception as e: + logging.error("Unexpected error occurred: %s", str(e)) + raise RuntimeError("Error converting video file to WAV") + return out_path + + +# Transcribe .wav into .segments.json +def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='small.en', vad_filter=False): + logging.info('Loading faster_whisper model: %s', whisper_model) + from faster_whisper import WhisperModel + model = WhisperModel(whisper_model, device=f"{processing_choice}") + time_start = time.time() + if audio_file_path is None: + raise ValueError("No audio file provided") + logging.info("Audio file path: %s", audio_file_path) + + try: + _, file_ending = os.path.splitext(audio_file_path) + out_file = audio_file_path.replace(file_ending, ".segments.json") + if os.path.exists(out_file): + logging.info("Segments file already exists: %s", out_file) + with open(out_file) as f: + segments = json.load(f) + return segments + + logging.info('Starting transcription...') + options = dict(language=selected_source_lang, beam_size=5, best_of=5, vad_filter=vad_filter) + transcribe_options = dict(task="transcribe", **options) + segments_raw, info = model.transcribe(audio_file_path, **transcribe_options) + + segments = [] + for segment_chunk in segments_raw: + chunk = { + "start": segment_chunk.start, + "end": segment_chunk.end, + "text": segment_chunk.text + } + logging.debug("Segment: %s", chunk) + segments.append(chunk) + logging.info("Transcription completed with faster_whisper") + with open(out_file, 'w') as f: + json.dump(segments, f, indent=2) + except Exception as e: + logging.error("Error transcribing audio: %s", str(e)) + raise RuntimeError("Error transcribing audio") + return segments + + +# +# +###################################################################################################################### + + +####################################################################################################################### +# Diarization +# +# TODO: https://huggingface.co/pyannote/speaker-diarization-3.1 +# embedding_model = "pyannote/embedding", embedding_size=512 +# embedding_model = "speechbrain/spkrec-ecapa-voxceleb", embedding_size=192 +# def speaker_diarize(video_file_path, segments, embedding_model = "pyannote/embedding", embedding_size=512, num_speakers=0): +# """ +# 1. Generating speaker embeddings for each segments. +# 2. Applying agglomerative clustering on the embeddings to identify the speaker for each segment. +# """ +# try: +# from pyannote.audio import Audio +# from pyannote.core import Segment +# from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding +# import numpy as np +# import pandas as pd +# from sklearn.cluster import AgglomerativeClustering +# from sklearn.metrics import silhouette_score +# import tqdm +# import wave +# +# embedding_model = PretrainedSpeakerEmbedding( embedding_model, device=torch.device("cuda" if +# torch.cuda.is_available() else "cpu")) +# +# +# _,file_ending = os.path.splitext(f'{video_file_path}') +# audio_file = video_file_path.replace(file_ending, ".wav") +# out_file = video_file_path.replace(file_ending, ".diarize.json") +# +# logging.debug("getting duration of audio file") +# with contextlib.closing(wave.open(audio_file,'r')) as f: +# frames = f.getnframes() +# rate = f.getframerate() +# duration = frames / float(rate) +# logging.debug("duration of audio file obtained") +# print(f"duration of audio file: {duration}") +# +# def segment_embedding(segment): +# logging.debug("Creating embedding") +# audio = Audio() +# start = segment["start"] +# end = segment["end"] +# +# # Enforcing a minimum segment length +# if end-start < 0.3: +# padding = 0.3-(end-start) +# start -= padding/2 +# end += padding/2 +# print('Padded segment because it was too short:',segment) +# +# # Whisper overshoots the end timestamp in the last segment +# end = min(duration, end) +# # clip audio and embed +# clip = Segment(start, end) +# waveform, sample_rate = audio.crop(audio_file, clip) +# return embedding_model(waveform[None]) +# +# embeddings = np.zeros(shape=(len(segments), embedding_size)) +# for i, segment in enumerate(tqdm.tqdm(segments)): +# embeddings[i] = segment_embedding(segment) +# embeddings = np.nan_to_num(embeddings) +# print(f'Embedding shape: {embeddings.shape}') +# +# if num_speakers == 0: +# # Find the best number of speakers +# score_num_speakers = {} +# +# for num_speakers in range(2, 10+1): +# clustering = AgglomerativeClustering(num_speakers).fit(embeddings) +# score = silhouette_score(embeddings, clustering.labels_, metric='euclidean') +# score_num_speakers[num_speakers] = score +# best_num_speaker = max(score_num_speakers, key=lambda x:score_num_speakers[x]) +# print(f"The best number of speakers: {best_num_speaker} with {score_num_speakers[best_num_speaker]} score") +# else: +# best_num_speaker = num_speakers +# +# # Assign speaker label +# clustering = AgglomerativeClustering(best_num_speaker).fit(embeddings) +# labels = clustering.labels_ +# for i in range(len(segments)): +# segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1) +# +# with open(out_file,'w') as f: +# f.write(json.dumps(segments, indent=2)) +# +# # Make CSV output +# def convert_time(secs): +# return datetime.timedelta(seconds=round(secs)) +# +# objects = { +# 'Start' : [], +# 'End': [], +# 'Speaker': [], +# 'Text': [] +# } +# text = '' +# for (i, segment) in enumerate(segments): +# if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]: +# objects['Start'].append(str(convert_time(segment["start"]))) +# objects['Speaker'].append(segment["speaker"]) +# if i != 0: +# objects['End'].append(str(convert_time(segments[i - 1]["end"]))) +# objects['Text'].append(text) +# text = '' +# text += segment["text"] + ' ' +# objects['End'].append(str(convert_time(segments[i - 1]["end"]))) +# objects['Text'].append(text) +# +# save_path = video_file_path.replace(file_ending, ".csv") +# df_results = pd.DataFrame(objects) +# df_results.to_csv(save_path) +# return df_results, save_path +# +# except Exception as e: +# raise RuntimeError("Error Running inference with local model", e) +# +# +###################################################################################################################### + + +####################################################################################################################### +# Summarizers +# +# + +def extract_text_from_segments(segments): + logging.debug(f"Main: extracting text from {segments}") + text = ' '.join([segment['text'] for segment in segments]) + logging.debug(f"Main: Successfully extracted text from {segments}") + return text + + +def summarize_with_openai(api_key, file_path, model, custom_prompt): + try: + logging.debug("openai: Loading json data for summarization") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug("openai: Extracting text from the segments") + text = extract_text_from_segments(segments) + + headers = { + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json' + } + # headers = { + # 'Authorization': f'Bearer {api_key}', + # 'Content-Type': 'application/json' + # } + + logging.debug(f"openai: API Key is: {api_key}") + logging.debug("openai: Preparing data + prompt for submittal") + openai_prompt = f"{text} \n\n\n\n{custom_prompt}" + data = { + "model": model, + "messages": [ + { + "role": "system", + "content": "You are a professional summarizer." + }, + { + "role": "user", + "content": openai_prompt + } + ], + "max_tokens": 4096, # Adjust tokens as needed + "temperature": 0.7 + } + logging.debug("openai: Posting request") + response = requests.post('https://api.openai.com/v1/chat/completions', headers=headers, json=data) + + if response.status_code == 200: + global summary + summary = response.json()['choices'][0]['message']['content'].strip() + logging.debug("openai: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.debug("openai: Summarization failed") + print("Failed to process summary:", response.text) + return None + except Exception as e: + logging.debug("openai: Error in processing: %s", str(e)) + print("Error occurred while processing summary with openai:", str(e)) + return None + + +def summarize_with_claude(api_key, file_path, model, custom_prompt): + try: + logging.debug("anthropic: Loading JSON data") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug("anthropic: Extracting text from the segments file") + text = extract_text_from_segments(segments) + + headers = { + 'x-api-key': api_key, + 'anthropic-version': '2023-06-01', + 'Content-Type': 'application/json' + } + + anthropic_prompt = custom_prompt + logging.debug("anthropic: Prompt is {anthropic_prompt}") + user_message = { + "role": "user", + "content": f"{text} \n\n\n\n{anthropic_prompt}" + } + + data = { + "model": model, + "max_tokens": 4096, # max _possible_ tokens to return + "messages": [user_message], + "stop_sequences": ["\n\nHuman:"], + "temperature": 0.7, + "top_k": 0, + "top_p": 1.0, + "metadata": { + "user_id": "example_user_id", + }, + "stream": False, + "system": "You are a professional summarizer." + } + + logging.debug("anthropic: Posting request to API") + response = requests.post('https://api.anthropic.com/v1/messages', headers=headers, json=data) + + # Check if the status code indicates success + if response.status_code == 200: + logging.debug("anthropic: Post submittal successful") + response_data = response.json() + try: + global summary + summary = response_data['content'][0]['text'].strip() + logging.debug("anthropic: Summarization successful") + print("Summary processed successfully.") + return summary + except (IndexError, KeyError) as e: + logging.debug("anthropic: Unexpected data in response") + print("Unexpected response format from Claude API:", response.text) + return None + elif response.status_code == 500: # Handle internal server error specifically + logging.debug("anthropic: Internal server error") + print("Internal server error from API. Retrying may be necessary.") + return None + else: + logging.debug(f"anthropic: Failed to summarize, status code {response.status_code}: {response.text}") + print(f"Failed to process summary, status code {response.status_code}: {response.text}") + return None + + except Exception as e: + logging.debug("anthropic: Error in processing: %s", str(e)) + print("Error occurred while processing summary with anthropic:", str(e)) + return None + + +# Summarize with Cohere +def summarize_with_cohere(api_key, file_path, model, custom_prompt): + try: + logging.basicConfig(level=logging.DEBUG) + logging.debug("cohere: Loading JSON data") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug(f"cohere: Extracting text from segments file") + text = extract_text_from_segments(segments) + + headers = { + 'accept': 'application/json', + 'content-type': 'application/json', + 'Authorization': f'Bearer {api_key}' + } + + cohere_prompt = f"{text} \n\n\n\n{custom_prompt}" + logging.debug("cohere: Prompt being sent is {cohere_prompt}") + + data = { + "chat_history": [ + {"role": "USER", "message": cohere_prompt} + ], + "message": "Please provide a summary.", + "model": model, + "connectors": [{"id": "web-search"}] + } + + logging.debug("cohere: Submitting request to API endpoint") + print("cohere: Submitting request to API endpoint") + response = requests.post('https://api.cohere.ai/v1/chat', headers=headers, json=data) + response_data = response.json() + logging.debug("API Response Data: %s", response_data) + + if response.status_code == 200: + if 'text' in response_data: + global summary + summary = response_data['text'].strip() + logging.debug(f"cohere: Summarization successful:\n\n{summary}\n\n") + print("Summary processed successfully.") + return summary + else: + logging.error("Expected data not found in API response.") + return "Expected data not found in API response." + else: + logging.error(f"cohere: API request failed with status code {response.status_code}: {response.text}") + print(f"Failed to process summary, status code {response.status_code}: {response.text}") + return f"cohere: API request failed: {response.text}" + + except Exception as e: + logging.error("cohere: Error in processing: %s", str(e)) + return f"cohere: Error occurred while processing summary with Cohere: {str(e)}" + + +# https://console.groq.com/docs/quickstart +def summarize_with_groq(api_key, file_path, model, custom_prompt): + try: + logging.debug("groq: Loading JSON data") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug(f"groq: Extracting text from segments file") + text = extract_text_from_segments(segments) + + headers = { + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json' + } + + groq_prompt = f"{text} \n\n\n\n{custom_prompt}" + logging.debug("groq: Prompt being sent is {groq_prompt}") + + data = { + "messages": [ + { + "role": "user", + "content": groq_prompt + } + ], + "model": model + } + + logging.debug("groq: Submitting request to API endpoint") + print("groq: Submitting request to API endpoint") + response = requests.post('https://api.groq.com/openai/v1/chat/completions', headers=headers, json=data) + + response_data = response.json() + logging.debug("API Response Data: %s", response_data) + + if response.status_code == 200: + if 'choices' in response_data and len(response_data['choices']) > 0: + global summary + summary = response_data['choices'][0]['message']['content'].strip() + logging.debug("groq: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.error("Expected data not found in API response.") + return "Expected data not found in API response." + else: + logging.error(f"groq: API request failed with status code {response.status_code}: {response.text}") + return f"groq: API request failed: {response.text}" + + except Exception as e: + logging.error("groq: Error in processing: %s", str(e)) + return f"groq: Error occurred while processing summary with groq: {str(e)}" + + +################################# +# +# Local Summarization + +def summarize_with_llama(api_url, file_path, token, custom_prompt): + try: + logging.debug("llama: Loading JSON data") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug(f"llama: Extracting text from segments file") + text = extract_text_from_segments(segments) # Define this function to extract text properly + + headers = { + 'accept': 'application/json', + 'content-type': 'application/json', + } + if len(token) > 5: + headers['Authorization'] = f'Bearer {token}' + + llama_prompt = f"{text} \n\n\n\n{custom_prompt}" + logging.debug("llama: Prompt being sent is {llama_prompt}") + + data = { + "prompt": llama_prompt + } + + logging.debug("llama: Submitting request to API endpoint") + print("llama: Submitting request to API endpoint") + response = requests.post(api_url, headers=headers, json=data) + response_data = response.json() + logging.debug("API Response Data: %s", response_data) + + if response.status_code == 200: + # if 'X' in response_data: + logging.debug(response_data) + global summary + summary = response_data['content'].strip() + logging.debug("llama: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.error(f"llama: API request failed with status code {response.status_code}: {response.text}") + return f"llama: API request failed: {response.text}" + + except Exception as e: + logging.error("llama: Error in processing: %s", str(e)) + return f"llama: Error occurred while processing summary with llama: {str(e)}" + + +# https://lite.koboldai.net/koboldcpp_api#/api%2Fv1/post_api_v1_generate +def summarize_with_kobold(kobold_ip, json_file_path, kobold_token, custom_prompt): + try: + logging.debug("kobold: Loading JSON data") + with open(json_file_path, 'r') as file: + segments = json.load(file) + + logging.debug(f"kobold: Extracting text from segments file") + text = extract_text_from_segments(segments) + + # FIXME - API Key generated from copilot...kobold.cpp doesn't mention the header for it either... + headers = { + 'accept': 'application/json', + 'content-type': 'application/json', + 'X_API_KEY': kobold_token + } + + kobold_prompt = f"{text} \n\n\n\n{custom_prompt}" + logging.debug("kobold: Prompt being sent is {kobold_prompt}") + + # FIXME + # Values literally c/p from the api docs.... + data = { + "max_context_length": 8096, + "max_length": 4096, + "prompt": kobold_prompt, + } + + logging.debug("kobold: Submitting request to API endpoint") + print("kobold: Submitting request to API endpoint") + response = requests.post(kobold_ip, headers=headers, json=data) + response_data = response.json() + logging.debug("kobold: API Response Data: %s", response_data) + + if response.status_code == 200: + if 'results' in response_data and len(response_data['results']) > 0: + global summary + summary = response_data['results'][0]['text'].strip() + logging.debug("kobold: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.error("Expected data not found in API response.") + return "Expected data not found in API response." + else: + logging.error(f"kobold: API request failed with status code {response.status_code}: {response.text}") + return f"kobold: API request failed: {response.text}" + + except Exception as e: + logging.error("kobold: Error in processing: %s", str(e)) + return f"kobold: Error occurred while processing summary with kobold: {str(e)}" + + +# https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API +def summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt): + try: + logging.debug("ooba: Loading JSON data") + with open(json_file_path, 'r') as file: + segments = json.load(file) + + logging.debug(f"ooba: Extracting text from segments file\n\n\n") + text = extract_text_from_segments(segments) + logging.debug(f"ooba: Finished extracting text from segments file") + + # FIXME - Add headers for ooba auth + headers = { + 'accept': 'application/json', + 'content-type': 'application/json', + } + + # prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a French bakery baking cakes. It + # is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are + # my favorite." prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable + ooba_prompt = f"{text}\n\n\n\n{custom_prompt}" + logging.debug("ooba: Prompt being sent is {ooba_prompt}") + + data = { + "mode": "chat", + "character": "Example", + "messages": [{"role": "user", "content": ooba_prompt}] + } + + logging.debug("ooba: Submitting request to API endpoint") + print("ooba: Submitting request to API endpoint") + response = requests.post(ooba_ip, headers=headers, json=data, verify=False) + logging.debug("ooba: API Response Data: %s", response) + + if response.status_code == 200: + response_data = response.json() + global summary + summary = response.json()['choices'][0]['message']['content'] + logging.debug("ooba: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.error(f"oobabooga: API request failed with status code {response.status_code}: {response.text}") + return f"ooba: API request failed with status code {response.status_code}: {response.text}" + + except Exception as e: + logging.error("ooba: Error in processing: %s", str(e)) + return f"ooba: Error occurred while processing summary with oobabooga: {str(e)}" + + +def save_summary_to_file(summary, file_path): + summary_file_path = file_path.replace('.segments.json', '_summary.txt') + logging.debug("Opening summary file for writing, *segments.json with *_summary.txt") + with open(summary_file_path, 'w') as file: + file.write(summary) + logging.info(f"Summary saved to file: {summary_file_path}") + + +# +# +######################################################################################################################## + + +####################################################################################################################### +# Gradio UI +# + +# Only to be used when configured with Gradio for HF Space +def summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt): + logging.debug(f"huggingface: Summarization process starting...") + client = InferenceClient() + + #model = "microsoft/Phi-3-mini-128k-instruct" + model = "CohereForAI/c4ai-command-r-plus" + API_URL = f"https://api-inference.huggingface.co/models/{model}" + headers = {"Authorization": f"Bearer {huggingface_api_key}"} + + client = InferenceClient(model=f"{model}", token=f"{huggingface_api_key}") + + response = client.post(json={"inputs": "The goal of life is [MASK]."}, model="bert-base-uncased") + + with open(json_file_path, 'r') as file: + segments = json.load(file) + text = ''.join([segment['text'] for segment in segments]) + + hf_prompt = text + "\n\n\n\n" + custom_prompt + + if huggingface_api_key == "": + api_key = os.getenv(HF_TOKEN) + logging.debug("HUGGINGFACE API KEY CHECK: " + huggingface_api_key) + try: + logging.debug("huggingface: Loading json data for summarization") + with open(json_file_path, 'r') as file: + segments = json.load(file) + + logging.debug("huggingface: Extracting text from the segments") + text = ' '.join([segment['text'] for segment in segments]) + + #api_key = os.getenv('HF_TOKEN').replace('"', '') + logging.debug("HUGGINGFACE API KEY CHECK #2: " + huggingface_api_key) + + logging.debug("huggingface: Submitting request...") + response = client.text_generation(prompt=hf_prompt, max_new_tokens=4096) + if response is not None: + return response + #if response == FIXME: + #logging.debug("huggingface: Summarization successful") + #print("Summarization successful.") + #return response + #elif Bad Stuff: + # logging.debug(f"huggingface: Model is currently loading...{response.status_code}: {response.text}") + # global waiting_summary + # pretty_json = json.dumps(json.loads(response.text), indent=4) # Prettify JSON + # waiting_summary = f" {pretty_json} " # Use prettified JSON + # return waiting_summary + else: + logging.error(f"huggingface: Summarization failed with status code {response}") + return f"Failed to process summary, huggingface library error: {response}" + except Exception as e: + logging.error("huggingface: Error in processing: %s", str(e)) + print(f"Error occurred while processing summary with huggingface: {str(e)}") + return None + + # FIXME + # This is here for gradio authentication + # Its just not setup. + #def same_auth(username, password): + # return username == password + + +def format_transcription(transcription_result): + if transcription_result: + json_data = transcription_result['transcription'] + return json.dumps(json_data, indent=2) + else: + return "" + + +def format_file_path(file_path, fallback_path=None): + if file_path and os.path.exists(file_path): + logging.debug(f"File exists: {file_path}") + return file_path + elif fallback_path and os.path.exists(fallback_path): + logging.debug(f"File does not exist: {file_path}. Returning fallback path: {fallback_path}") + return fallback_path + else: + logging.debug(f"File does not exist: {file_path}. No fallback path available.") + return None + + +def update_visibility(mode): + if mode == "Advanced": + # Show all inputs below URL + return [gr.update(visible=True)] * 9 + else: + # Hide all inputs below URL + return [gr.update(visible=False)] * 9 + + +# https://www.gradio.app/guides/controlling-layout +def launch_ui(demo_mode=False): + whisper_models = ["small.en", "medium.en", "large"] + + with gr.Blocks() as iface: + with gr.Tab("Audio Transcription + Summarization"): + with gr.Row(): + # Light/Dark mode toggle switch + theme_toggle = gr.Radio(choices=["Light", "Dark"], value="Light", + label="Light/Dark Mode Toggle (Toggle to change UI color scheme) (WIP)") + + # UI Mode toggle switch + ui_mode_toggle = gr.Radio(choices=["Simple", "Advanced"], value="Simple", + label="UI Mode (Toggle to show all options) (WIP)") + + # URL input is always visible + url_input = gr.Textbox(label="URL (Mandatory)", placeholder="Enter the video URL here") + + # Inputs to be shown or hidden + num_speakers_input = gr.Number(value=2, label="Number of Speakers(Optional - Currently has no effect)", + visible=False) + whisper_model_input = gr.Dropdown(choices=whisper_models, value="small.en", + label="Whisper Model(This is the ML model used for transcription.)", + visible=False) + custom_prompt_input = gr.Textbox( + label="Custom Prompt (Customize your summarization, or ask a question about the video and have it answered)", + placeholder="Here is the transcript of a video: {{TRANSCRIPT}} Please read " + "through the transcript carefully. Identify the main topics that are discussed over the " + "course of the transcript. Then, summarize the key points about each main topic in a " + "concise bullet point. The bullet points should cover the key information conveyed about " + "each topic in the video, but should be much shorter than the full transcript. Please " + "output your bullet point summary inside tags.", + lines=3, visible=True) + offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)", + visible=False) + api_name_input = gr.Dropdown( + choices=[None, "huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], + value=None, + label="API Name (Mandatory Unless you just want a Transcription - Can use Cohere with no API Key)", visible=True) + api_key_input = gr.Textbox(label="API Key (Mandatory if API Name is specified)", + placeholder="Enter your API key here", visible=True) + vad_filter_input = gr.Checkbox(label="VAD Filter(Can safely ignore)", value=False, visible=False) + download_video_input = gr.Checkbox( + label="Download Video(Select to allow for file download of selected video)", value=False, visible=False) + download_audio_input = gr.Checkbox( + label="Download Audio(Select to allow for file download of selected Video's Audio)", value=False, + visible=False) + detail_level_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=True, + label="Detail Level (Slide me)", visible=False) + + inputs = [num_speakers_input, whisper_model_input, custom_prompt_input, offset_input, api_name_input, + api_key_input, vad_filter_input, download_video_input, download_audio_input, detail_level_input] + + # Function to toggle Light/Dark Mode + def toggle_light(mode): + dark = (mode == "Dark") + return {"__theme": "dark" if dark else "light"} + + # Set the event listener for the Light/Dark mode toggle switch + theme_toggle.change(fn=toggle_light, inputs=theme_toggle, outputs=None) + + # Function to toggle visibility of advanced inputs + def toggle_ui(mode): + visible = (mode == "Advanced") + return [visible] * len(inputs) + + # Set the event listener for the UI Mode toggle switch + ui_mode_toggle.change(fn=toggle_ui, inputs=ui_mode_toggle, outputs=inputs) + + # Combine URL input and inputs + all_inputs = [url_input] + inputs + + outputs = [ + gr.Textbox(label="Transcription (Resulting Transcription from your input URL)"), + gr.Textbox(label="Summary or Status Message (Current status of Summary or Summary itself)"), + gr.File(label="Download Transcription as JSON (Download the Transcription as a file)",container=False,visible=True,render=False), + gr.File(label="Download Summary as Text (Download the Summary as a file)",container=False,visible=True,render=False), + # FIXME + # https://www.gradio.app/docs/gradio/file + gr.File(label="Download Video (Download the Video as a file)",container=False,visible=False,render=False), + gr.File(label="Download Audio (Download the Audio as a file)",container=False,visible=False,render=False) + ] + + gr.Interface( + fn=process_url, + inputs=all_inputs, + outputs=outputs, + title="TL/DW: Video Transcription and Summarization with Custom Prompt Support (Demo Page)", + description="Submit a video URL for transcription and summarization. Ensure you input all necessary " + "information including API keys." + ) + + with gr.Tab("Transcription & Summarization History"): + gr.Markdown("Plan to put access to SQLite DB here") + gr.Markdown("Allow for searching/retrieval/re-prompting of previous transcriptions") + gr.Markdown("Also allow for re-transcribing videos if they're still online, while updating/adding to prior entry") + gr.Markdown("RAG here we come....:/") + + + with gr.Accordion("Open for More!", open=False): + gr.Markdown("Plan to put Prompt Samples/Templates down here") + + iface.launch(share=False) + + +# +# +##################################################################################################################################### + + +#################################################################################################################################### +# Main() +# + +def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, + download_video_flag=False, demo_mode=False, custom_prompt=None, overwrite=False): + if input_path is None and args.user_interface: + return [] + start_time = time.monotonic() + paths = [] # Initialize paths as an empty list + if os.path.isfile(input_path) and input_path.endswith('.txt'): + logging.debug("MAIN: User passed in a text file, processing text file...") + paths = read_paths_from_file(input_path) + elif os.path.exists(input_path): + logging.debug("MAIN: Local file path detected") + paths = [input_path] + elif (info_dict := get_youtube(input_path)) and 'entries' in info_dict: + logging.debug("MAIN: YouTube playlist detected") + print( + "\n\nSorry, but playlists aren't currently supported. You can run the following command to generate a " + "text file that you can then pass into this script though! (It may not work... playlist support seems " + "spotty)" + """\n\n\tpython Get_Playlist_URLs.py \n\n\tThen,\n\n\tpython + diarizer.py \n\n""") + return + else: + paths = [input_path] + results = [] + + for path in paths: + try: + if path.startswith('http'): + logging.debug("MAIN: URL Detected") + info_dict = get_youtube(path) + if info_dict: + logging.debug("MAIN: Creating path for video file...") + download_path = create_download_directory(info_dict['title']) + logging.debug("MAIN: Path created successfully\n MAIN: Now Downloading video from yt_dlp...") + try: + video_path = download_video(path, download_path, info_dict, download_video_flag) + except RuntimeError as e: + logging.error(f"Error downloading video: {str(e)}") + #FIXME - figure something out for handling this situation.... + continue + logging.debug("MAIN: Video downloaded successfully") + logging.debug("MAIN: Converting video file to WAV...") + audio_file = convert_to_wav(video_path, offset) + logging.debug("MAIN: Audio file converted successfully") + else: + if os.path.exists(path): + logging.debug("MAIN: Local file path detected") + download_path, info_dict, audio_file = process_local_file(path) + else: + logging.error(f"File does not exist: {path}") + continue + + if info_dict: + logging.debug("MAIN: Creating transcription file from WAV") + segments = speech_to_text(audio_file, whisper_model=whisper_model, vad_filter=vad_filter) + transcription_result = { + 'video_path': path, + 'audio_file': audio_file, + 'transcription': segments + } + results.append(transcription_result) + logging.info(f"Transcription complete: {audio_file}") + + # Perform summarization based on the specified API + logging.debug(f"MAIN: Summarization being performed by {api_name} API") + json_file_path = audio_file.replace('.wav', '.segments.json') + # UNDO + #prettified_json_file_path = transcription_result['audio_file'].replace('.wav', '.segments_pretty.json') + json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json') + prettified_json_file_path = transcription_result['audio_file'].replace('.wav', '.segments_pretty.json') + + json_file_path = format_file_path(json_file_path) + prettified_json_file_path = format_file_path(prettified_json_file_path, fallback_path=json_file_path) + if api_name == "huggingface": + huggingface_api_key = os.getenv('HF_TOKEN').replace('"', '') + if huggingface_api_key is None: + huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', + fallback=None) + try: + logging.debug(f"MAIN: Trying to summarize with huggingface") + summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name == "cohere": + cohere_api_key = os.getenv('COHERE_TOKEN').replace('"', '') + if cohere_api_key is None: + cohere_api_key = api_key if api_key else config.get('API', 'cohere_api_key', + fallback=None) + try: + global summary + logging.debug(f"MAIN: Trying to summarize with Cohere on HuggingFace Spaces") + summary = summarize_with_cohere(cohere_api_key, json_file_path, cohere_model, custom_prompt) + transcription_result['summary'] = summary + logging.info(f"Summary generated using {api_name} API") + save_summary_to_file(summary, json_file_path) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name and api_key: + logging.debug(f"MAIN: Summarization being performed by {api_name}") + json_file_path = audio_file.replace('.wav', '.segments.json') + if api_name.lower() == 'openai': + openai_api_key = api_key if api_key else config.get('API', 'openai_api_key', fallback=None) + try: + logging.debug(f"MAIN: trying to summarize with openAI") + summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "huggingface": + huggingface_api_key = os.getenv(HF_TOKEN) + if huggingface_api_key is None: + huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', + fallback=None) + try: + logging.debug(f"MAIN: Trying to summarize with huggingface") + summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "anthropic": + anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', + fallback=None) + try: + logging.debug(f"MAIN: Trying to summarize with anthropic") + summary = summarize_with_claude(anthropic_api_key, json_file_path, anthropic_model, + custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "cohere": + cohere_api_key = api_key if api_key else config.get('API', 'cohere_api_key', fallback=None) + try: + logging.debug(f"MAIN: Trying to summarize with cohere") + summary = summarize_with_cohere(cohere_api_key, json_file_path, cohere_model, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "groq": + groq_api_key = api_key if api_key else config.get('API', 'groq_api_key', fallback=None) + try: + logging.debug(f"MAIN: Trying to summarize with Groq") + summary = summarize_with_groq(groq_api_key, json_file_path, groq_model, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "llama": + llama_token = api_key if api_key else config.get('API', 'llama_api_key', fallback=None) + llama_ip = llama_api_IP + try: + logging.debug(f"MAIN: Trying to summarize with Llama.cpp") + summary = summarize_with_llama(llama_ip, json_file_path, llama_token, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "kobold": + kobold_token = api_key if api_key else config.get('API', 'kobold_api_key', fallback=None) + kobold_ip = kobold_api_IP + try: + logging.debug(f"MAIN: Trying to summarize with kobold.cpp") + summary = summarize_with_kobold(kobold_ip, json_file_path, kobold_token, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "ooba": + ooba_token = api_key if api_key else config.get('API', 'ooba_api_key', fallback=None) + ooba_ip = ooba_api_IP + try: + logging.debug(f"MAIN: Trying to summarize with oobabooga") + summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + else: + logging.warning(f"Unsupported API: {api_name}") + summary = None + + print(f"MAIN: #1 - Summary: {summary}") + if summary: + transcription_result['summary'] = summary + logging.info(f"Summary generated using {api_name} API") + save_summary_to_file(summary, json_file_path) + else: + logging.warning(f"Failed to generate summary using {api_name} API") + else: + logging.info("MAIN: #2 - No API specified. Summarization will not be performed") + except Exception as e: + logging.error(f"Error processing path: {path}") + logging.error(str(e)) + continue + # end_time = time.monotonic() + # print("Total program execution time: " + timedelta(seconds=end_time - start_time)) + + return results + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Transcribe and summarize videos.') + parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?') + parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio') + parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)') + parser.add_argument('--overwrite', action='store_true', help='Overwrite existing audio files') + parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)') + parser.add_argument('-wm', '--whisper_model', type=str, default='small.en', + help='Whisper model (default: small.en)') + parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)') + parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter') + parser.add_argument('-log', '--log_level', type=str, default='INFO', + choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)') + parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface') + parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode') + parser.add_argument('-prompt', '--custom_prompt', type=str, + help='Pass in a custom prompt to be used in place of the existing one.(Probably should just modify the script itself...)') + # parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)') + args = parser.parse_args() + + custom_prompt = args.custom_prompt + if custom_prompt == "": + logging.debug(f"Custom prompt defined, will use \n\nf{custom_prompt} \n\nas the prompt") + print(f"Custom Prompt has been defined. Custom prompt: \n\n {args.custom_prompt}") + else: + logging.debug("No custom prompt defined, will use default") + args.custom_prompt = ("\n\nAbove is the transcript of a video. Please " + "read through the transcript carefully. Identify the main topics that are discussed " + "over the course of the transcript. Then, summarize the key points about each main " + "topic in a concise bullet point. The bullet points should cover the key information " + "conveyed about each topic in the video, but should be much shorter than the full " + "transcript. Please output your bullet point summary inside tags.") + print("No custom prompt defined, will use default") + + # print(f"Is CUDA available: {torch.cuda.is_available()}") + # True + # print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") + # Tesla T4 + + # Since this is running in HF.... + args.user_interface = True + if args.user_interface: + launch_ui(demo_mode=args.demo_mode) + else: + if not args.input_path: + parser.print_help() + sys.exit(1) + + logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s') + + logging.info('Starting the transcription and summarization process.') + logging.info(f'Input path: {args.input_path}') + logging.info(f'API Name: {args.api_name}') + logging.debug(f'API Key: {args.api_key}') # ehhhhh + logging.info(f'Number of speakers: {args.num_speakers}') + logging.info(f'Whisper model: {args.whisper_model}') + logging.info(f'Offset: {args.offset}') + logging.info(f'VAD filter: {args.vad_filter}') + logging.info(f'Log Level: {args.log_level}') # lol + + if args.api_name and args.api_key: + logging.info(f'API: {args.api_name}') + logging.info('Summarization will be performed.') + else: + logging.info('MAIN: #1 No API specified. Summarization will not be performed.') + + logging.debug("Platform check being performed...") + platform_check() + logging.debug("CUDA check being performed...") + cuda_check() + logging.debug("ffmpeg check being performed...") + check_ffmpeg() + + # Hey, we're in HuggingFace + launch_ui(demo_mode=args.demo_mode) + + try: + results = main(args.input_path, api_name=args.api_name, api_key=args.api_key, + num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset, + vad_filter=args.vad_filter, download_video_flag=args.video, overwrite=args.overwrite) + logging.info('Transcription process completed.') + except Exception as e: + logging.error('An error occurred during the transcription process.') + logging.error(str(e)) + sys.exit(1) diff --git a/HF/requirements.txt b/HF/requirements.txt new file mode 100644 index 00000000..3ae9a875 --- /dev/null +++ b/HF/requirements.txt @@ -0,0 +1,112 @@ +accelerate==0.29.2 +aiofiles==23.2.1 +altair==5.3.0 +annotated-types==0.6.0 +anyio==4.3.0 +attrs==23.2.0 +av==11.0.0 +Brotli==1.1.0 +certifi==2024.2.2 +charset-normalizer==3.3.2 +click==8.1.7 +colorama==0.4.6 +coloredlogs==15.0.1 +contourpy==1.2.1 +ctranslate2==4.2.1 +cycler==0.12.1 +dnspython==2.6.1 +email_validator==2.1.1 +fastapi==0.111.0 +fastapi-cli==0.0.3 +faster-whisper==1.0.1 +ffmpeg==1.4 +ffmpy==0.3.2 +filelock==3.13.4 +fire==0.6.0 +flatbuffers==24.3.25 +fonttools==4.51.0 +fsspec==2024.3.1 +gradio +#gradio==4.29.0 +gradio_client +#gradio_client==0.16.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +huggingface-hub==0.22.2 +humanfriendly==10.0 +idna==3.7 +importlib_resources==6.4.0 +Jinja2==3.1.3 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +kiwisolver==1.4.5 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +matplotlib==3.8.4 +mdurl==0.1.2 +mpmath==1.3.0 +mutagen==1.47.0 +networkx==3.3 +numpy==1.26.4 +onnxruntime==1.17.3 +orjson==3.10.3 +packaging==24.0 +pandas==2.2.2 +pillow==10.3.0 +protobuf==5.26.1 +psutil==5.9.8 +pycryptodomex==3.20.0 +pydantic==2.7.1 +pydantic_core==2.18.2 +pydub==0.25.1 +pyee==11.1.0 +Pygments==2.18.0 +pyparsing==3.1.2 +pyreadline3==3.4.1 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-ffmpeg==2.0.12 +python-multipart==0.0.9 +pytz==2024.1 +PyYAML==6.0.1 +referencing==0.35.1 +regex==2023.12.25 +requests==2.31.0 +rich==13.7.1 +rpds-py==0.18.1 +ruff==0.4.3 +safetensors==0.4.2 +semantic-version==2.10.0 +sentencepiece==0.2.0 +setuptools==69.5.1 +shellingham==1.5.4 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12 +termcolor==2.4.0 +timm==0.9.16 +tokenizers==0.15.2 +tomlkit==0.12.0 +toolz==0.12.1 +torchvision==0.17.2 +tqdm==4.66.2 +transformers==4.39.3 +typer==0.12.3 +typing_extensions==4.11.0 +tzdata==2024.1 +ujson==5.9.0 +urllib3==2.2.1 +uvicorn==0.29.0 +watchfiles==0.21.0 +websockets +#websockets==11.0.3 +yt-dlp +#yt-dlp==2024.4.9 +--extra-index-url https://download.pytorch.org/whl/cu113 +torch +torchaudio +#torch==2.2.2+cu121 +#torchaudio==2.2.2+cu121 \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..5fbc1291 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 rmusser01 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 6434913d..fb05d34e 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,256 @@ -# Too Long, Didnt Watch +# **TL/DW: Too Long, Didnt Watch** +## Download, Transcribe & Summarize Videos. All automated +## [Public Demo](https://huggingface.co/spaces/oceansweep/Vid-Summarizer) -YouTube contains an incredible amount of knowledge, much of which is locked inside multi-hour videos. Let's extract and summarize with AI! +![License](https://img.shields.io/badge/license-apache2.0-green) -- `diarize.py` - download, transrcibe and diarize audio - - [yt-dlp](https://github.com/yt-dlp/yt-dlp) - download audio tracks of youtube videos - - [ffmpeg](https://github.com/FFmpeg/FFmpeg) - decompress audio - - [faster_whisper](https://github.com/SYSTRAN/faster-whisper) - speech to text - - [pyannote](https://github.com/pyannote/pyannote-audio) - diarization +### What is TL/DW? -- `chunker.py` - break text into parts and prepare each part for LLM summarization +- Take a URL, single video, list of URLs, or list of local videos + URLs and feed it into the script and have each video transcribed (and audio downloaded if not local) using faster-whisper. +- Transcriptions can then be shuffled off to an LLM API endpoint of your choice, whether that be local or remote. +- Any site supported by yt-dl is supported, so you can use this with sites besides just youtube. ( https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md ) -- `roller-*.py` - rolling summarization - - [can-ai-code](https://github.com/the-crypt-keeper/can-ai-code) - interview executors to run LLM inference +I personally recommend Sonnet. It's great quality and relatively inexpensive. +* To be clear, Microsoft Phi-3 Mini 128k is great if you don't have a lot of VRAM and want to self-host. (I think it's better than anything up to 70B for summarization...) -- `compare.py` - prepare LLM outputs for webapp -- `compare-app.py` - summary viewer webapp +### Application Demo +CLI +![tldw-summarization-cli-demo](cli-demo-video) -This project is under active development and is not ready for production use. +GUI +![tldw-summarization-gui-demo](./Tests/Capture.PNG) + +---------- + +### Table of Contents +- [What?](#what) +- [Using](#using) +- [Setup](#setup) +- [Pieces/What's in the Repo](#what) +- [Setting up a Local LLM Inference Engine](#localllm) +- [Credits](#credits) + + + +### Quickstart after Installation +- **Download Audio only from URL -> Transcribe audio:** + * `python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s` +- **Download Audio+Video from URL -> Transcribe audio from Video:** + * `python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s` +- **Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:** + * `python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api ` - Make sure to put your API key into `config.txt` under the appropriate API variable +- **Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:** + * `python summarize.py ./local/file_on_your/system --api_name ` +- **Run it as a WebApp** + * `python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it. + * Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server. + + +### What? +- **Use the script to (download->)transcribe(->summarize) a local file or remote url.** +- **What can you transcribe and summarize?** + * **Any youtube video.** Or video hosted at any of these sites: https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md + * (Playlists you have to use the `Get_Playlist_URLs.py` with `Get_Playlist_URLs.py ` and it'll create a text file with all the URLs for each video, so you can pass the text file as input and they'll all be downloaded. Pull requests are welcome.) + * Any url youtube-dl supports _should_ work. + * **Local Videos** + * Pass in the filepath to any local video file, and it will be transcribed. + * You can also pass in a text file containing a list of videos for batch processing. +- **How does it Summarize?** + - **Remote Summarization** + * Pass an API name (anthropic/cohere/grok/openai/) as an argument, ex: `-api anthropic` + * Add your API key to the `config.txt` file + * The script when ran, will detect that you passed an API name, and will perform summarization of the resulting transcription. + - **Local Summarization** + * Alternatively, you can pass `llama`/`ooba`/`kobold`/`tabby` as the API name and have the script perform a request to your local API endpoint for summarization. + * You will need to modify the `_api_IP` value in the `config.txt` to reflect the `IP:Port` of your local server. + * Or pass the `--api_url` argument with the `IP:Port` to avoid making changes to the `config.txt` file. + * If the self-hosted server requires an API key, modify the appropriate api_key variable in the `config.txt` file. + * The current approach to summarization is currently 'dumb'/naive, and will likely be replaced or additional functionality added to reflect actual practices and not just 'dump txt in and get an answer' approach. This works for big context LLMs, but not everyone has access to them, and some transcriptions may be even longer, so we need to have an approach that can handle those cases. +- **APIs Currently Supported** + 1. Anthropic + 2. Cohere + 3. Groq + 4. Llama.cpp + 5. Kobold.cpp + 6. Oobabooga + 7. HuggingFace +- **Planned to Support** + 1. TabbyAPI + +---------- + +### Setup +- **Linux** + 1. Download necessary packages (Python3, ffmpeg[sudo apt install ffmpeg / dnf install ffmpeg], ?) + 2. Create a virtual env: `python -m venv ./` + 3. Launch/activate your virtual env: `. .\scripts\activate.sh` + 4. See `Linux && Windows` +- **Windows** + 1. Download necessary packages (Python3, [ffmpeg](https://www.gyan.dev/ffmpeg/builds/), ?) + 2. Create a virtual env: `python -m venv .\` + 3. Launch/activate your virtual env: `. .\scripts\activate.ps1` + 4. See `Linux && Windows` +- **Linux && Windows** + 1. `pip install -r requirements.txt` - may take a bit of time... + 2. Run `python ./summarize.py ` - The video URL does _not_ have to be a youtube URL. It can be any site that ytdl supports. + 3. You'll then be asked if you'd like to run the transcription through GPU(1) or CPU(2). + 4. Next, the video will be downloaded to the local directory by ytdl. + 5. Then the video will be transcribed by faster_whisper. (You can see this in the console output) + * The resulting transcription output will be stored as both a json file with timestamps, as well as a txt file with no timestamps. + 6. Finally, you can have the transcription summarized through feeding it into an LLM of your choice. + 7. For running it locally, here's the commands to do so: + * FIXME + 8. For feeding the transcriptions to the API of your choice, simply use the corresponding script for your API provider. + * FIXME: add scripts for OpenAI api (generic) and others + + + +### Using +- Single file (remote URL) transcription + * Single URL: `python summarize.py https://example.com/video.mp4` +- Single file (local) transcription) + * Transcribe a local file: `python summarize.py /path/to/your/localfile.mp4` +- Multiple files (local & remote) + * List of Files(can be URLs and local files mixed): `python summarize.py ./path/to/your/text_file.txt"` + + +Save time and use the `config.txt` file, it allows you to set these settings and have them used when ran. +``` +usage: summarize.py [-h] [-v] [-api API_NAME] [-ns NUM_SPEAKERS] [-wm WHISPER_MODEL] [-off OFFSET] [-vad] + [-log {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [-ui] [-demo] + [input_path] + +Transcribe and summarize videos. + +positional arguments: + input_path Path or URL of the video + +options: + -h, --help show this help message and exit + -v, --video Download the video instead of just the audio + -api API_NAME, --api_name API_NAME + API name for summarization (optional) + -ns NUM_SPEAKERS, --num_speakers NUM_SPEAKERS + Number of speakers (default: 2) + -wm WHISPER_MODEL, --whisper_model WHISPER_MODEL + Whisper model (default: small.en) + -off OFFSET, --offset OFFSET + Offset in seconds (default: 0) + -vad, --vad_filter Enable VAD filter + -log {DEBUG,INFO,WARNING,ERROR,CRITICAL}, --log_level {DEBUG,INFO,WARNING,ERROR,CRITICAL} + Log level (default: INFO) + -ui, --user_interface + Launch the Gradio user interface + -demo, --demo_mode Enable demo mode + + +-Download Audio only from URL -> Transcribe audio: +>python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s + +-Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` i.e. llama.cpp/`ooba`/`kobold`/`tabby`) API: +>python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s -api + +-Download Audio+Video from URL -> Transcribe audio from Video: +>python summarize.py --video https://www.youtube.com/watch?v=4nd1CDZP21s + +-Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized: +>python summarize.py --video ./local/file_on_your/system --api_name + +By default videos, transcriptions and summaries are stored in a folder with the video's name under './Results', unless otherwise specified in the config file. +``` + + +------------ + +### Setting up a Local LLM Inference Engine +- **Setting up Local LLM Runner** + - **Llama.cpp** + - **Linux & Mac** + 1. `git clone https://github.com/ggerganov/llama.cpp` + 2. `make` in the `llama.cpp` folder + 3. `./server -m ../path/to/model -c ` + - **Windows** + 1. `git clone https://github.com/ggerganov/llama.cpp` + 2. Download + Run: https://github.com/skeeto/w64devkit/releases + 3. cd to `llama.cpp` folder make` in the `llama.cpp` folder + 4. `server.exe -m ..\path\to\model -c ` + - **Kobold.cpp** - c/p'd from: https://github.com/LostRuins/koboldcpp/wiki + - **Windows** + 1. Download from here: https://github.com/LostRuins/koboldcpp/releases/latest + 2. `Double click KoboldCPP.exe and select model OR run "KoboldCPP.exe --help" in CMD prompt to get command line arguments for more control.` + 3. `Generally you don't have to change much besides the Presets and GPU Layers. Run with CuBLAS or CLBlast for GPU acceleration.` + 4. `Select your GGUF or GGML model you downloaded earlier, and connect to the displayed URL once it finishes loading.` + - **Linux** + 1. `On Linux, we provide a koboldcpp-linux-x64 PyInstaller prebuilt binary on the releases page for modern systems. Simply download and run the binary.` + * Alternatively, you can also install koboldcpp to the current directory by running the following terminal command: `curl -fLo koboldcpp https://github.com/LostRuins/koboldcpp/releases/latest/download/koboldcpp-linux-x64 && chmod +x koboldcpp` + 2. When you can't use the precompiled binary directly, we provide an automated build script which uses conda to obtain all dependencies, and generates (from source) a ready-to-use a pyinstaller binary for linux users. Simply execute the build script with `./koboldcpp.sh dist` and run the generated binary. + - **oobabooga - text-generation-webui** - https://github.com/oobabooga/text-generation-webui + 1. Clone or download the repository. + * Clone: `git clone https://github.com/oobabooga/text-generation-webui` + * Download: https://github.com/oobabooga/text-generation-webui/releases/latest -> Download the `Soruce code (zip)` file -> Extract -> Continue below. + 2. Run the `start_linux.sh`, `start_windows.bat`, `start_macos.sh`, or `start_wsl.bat` script depending on your OS. + 3. Select your GPU vendor when asked. + 4. Once the installation ends, browse to http://localhost:7860/?__theme=dark. + - **Exvllama2** +- **Setting up a Local LLM Model** + 1. microsoft/Phi-3-mini-128k-instruct - 3.8B Model/7GB base, 4GB Q8 - https://huggingface.co/microsoft/Phi-3-mini-128k-instruct + * GGUF Quants: https://huggingface.co/pjh64/Phi-3-mini-128K-Instruct.gguf + 2. Meta Llama3-8B - 8B Model/16GB base, 8.5GB Q8 - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct + * GGUF Quants: https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF + + +---------- + + +### Pieces & What's in the repo? +- **Workflow** + 1. Setup python + packages + 2. Setup ffmpeg + 3. Run `python summarize.py ` or `python summarize.py ` + 4. If you want summarization, add your API keys (if not using a local LLM) to the `config.txt` file, and then re-run the script, passing in the name of the API [or URL endpoint - to be added] to the script. + * `python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s --api_name anthropic` - This will attempt to download the video, then upload the resulting json file to the anthropic API endpoint, referring to values set in the config file (API key and model) to request summarization. + - Anthropic: + * `claude-3-opus-20240229` + * `claude-3-sonnet-20240229` + * `claude-3-haiku-20240307` + - Cohere: + * `command-r` + * `command-r-plus` + - Groq + * `llama3-8b-8192` + * `llama3-70b-8192` + * `mixtral-8x7b-32768` + - HuggingFace: + * `CohereForAI/c4ai-command-r-plus` + * `meta-llama/Meta-Llama-3-70B-Instruct` + * `meta-llama/Meta-Llama-3-8B-Instruct` + * Supposedly you can use any model on there, but this is for reference for the free demo instance, in case you'd like to host your own. + - OpenAI: + * `gpt-4-turbo` + * `gpt-4-turbo-preview` + * `gpt-4` +- **What's in the repo?** + - `summarize.py` - download, transcribe and summarize audio + 1. First uses [yt-dlp](https://github.com/yt-dlp/yt-dlp) to download audio(optionally video) from supplied URL + 2. Next, it uses [ffmpeg](https://github.com/FFmpeg/FFmpeg) to convert the resulting `.m4a` file to `.wav` + 3. Then it uses [faster_whisper](https://github.com/SYSTRAN/faster-whisper) to transcribe the `.wav` file to `.txt` + 4. After that, it uses [pyannote](https://github.com/pyannote/pyannote-audio) to perform 'diarorization' + 5. Finally, it'll send the resulting txt to an LLM endpoint of your choice for summarization of the text. + - `chunker.py` - break text into parts and prepare each part for LLM summarization + - `roller-*.py` - rolling summarization + - [can-ai-code](https://github.com/the-crypt-keeper/can-ai-code) - interview executors to run LLM inference + - `compare.py` - prepare LLM outputs for webapp + - `compare-app.py` - summary viewer webapp + +------------ +### Similar/Other projects: +- https://github.com/Dicklesworthstone/bulk_transcribe_youtube_videos_from_playlist/tree/main +- https://github.com/akashe/YoutubeSummarizer +------------ + +### Credits +- [original](https://github.com/the-crypt-keeper/tldw) +- [yt-dlp](https://github.com/yt-dlp/yt-dlp) +- [ffmpeg](https://github.com/FFmpeg/FFmpeg) +- [faster_whisper](https://github.com/SYSTRAN/faster-whisper) +- [pyannote](https://github.com/pyannote/pyannote-audio) \ No newline at end of file diff --git a/Tests/Capture.PNG b/Tests/Capture.PNG new file mode 100644 index 00000000..e9b0ce43 Binary files /dev/null and b/Tests/Capture.PNG differ diff --git a/Tests/general_tests.py b/Tests/general_tests.py new file mode 100644 index 00000000..c1e68df3 --- /dev/null +++ b/Tests/general_tests.py @@ -0,0 +1,144 @@ +import os +import sys +import tempfile +import unittest +from unittest.mock import patch, MagicMock, PropertyMock, call, ANY +import faster_whisper + +sys.path.append("../") + +# Import the necessary functions and classes from your script +import summarize +from summarize import ( + read_paths_from_file, + process_path, + get_youtube, + download_video, + convert_to_wav, + speech_to_text, + summarize_with_openai, + summarize_with_claude, + summarize_with_cohere, + summarize_with_groq, + summarize_with_llama, + summarize_with_kobold, + summarize_with_oobabooga, + main +) + + +class TestTranscriptionScript(unittest.TestCase): + def setUp(self): + # Set up any necessary resources before each test + pass + + def tearDown(self): + # Clean up any resources after each test + pass + + def test_read_paths_from_file(self): + with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file: + temp_file.write("http://example.com/video1.mp4\n") + temp_file.write("http://example.com/video2.mp4\n") + temp_file.write("http://example.com/video3.mp4\n") + temp_file_path = temp_file.name + + paths = read_paths_from_file(temp_file_path) + expected_paths = [ + "http://example.com/video1.mp4", + "http://example.com/video2.mp4", + "http://example.com/video3.mp4" + ] + self.assertListEqual(paths, expected_paths) + + os.unlink(temp_file_path) + + def test_process_path(self): + with patch('summarize.get_youtube', return_value={'title': 'Sample Video'}): + result = process_path("http://example.com/video.mp4") + self.assertIsNotNone(result) + + with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file: + temp_file_path = temp_file.name + result = process_path(temp_file_path) + self.assertIsNotNone(result) + os.unlink(temp_file_path) + + result = process_path("non_existent_path") + self.assertIsNone(result) + + def test_get_youtube(self): + with patch('yt_dlp.YoutubeDL.extract_info', return_value={'title': 'Sample YouTube Video'}): + info_dict = get_youtube("http://example.com/youtube_video.mp4") + self.assertIsNotNone(info_dict) + self.assertEqual(info_dict['title'], 'Sample YouTube Video') + + def test_download_video(self): + with patch('yt_dlp.YoutubeDL.download') as mock_download: + video_path = download_video("http://example.com/video.mp4", "download_path", {'title': 'Sample Video'}, False) + self.assertIsNotNone(video_path) + mock_download.assert_called_once() + + def test_convert_to_wav(self): + with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file: + temp_file_path = temp_file.name + with patch('subprocess.run', return_value=MagicMock(returncode=0)): + wav_path = convert_to_wav(temp_file_path) + self.assertIsNotNone(wav_path) + self.assertTrue(wav_path.endswith(".wav")) + os.unlink(temp_file_path) + + def test_speech_to_text(self): + with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file: + temp_file_path = temp_file.name + with patch('faster_whisper.WhisperModel.transcribe', return_value=([], {})): + segments = speech_to_text(temp_file_path) + self.assertIsInstance(segments, list) + os.unlink(temp_file_path) + + def test_summarize_with_openai(self): + with patch('requests.post') as mock_post: + mock_post.return_value = MagicMock(status_code=200, json=lambda: {'choices': [{'message': {'content': 'Sample summary'}}]}) + summary = summarize_with_openai("api_key", "file_path", "model") + self.assertEqual(summary, 'Sample summary') + mock_post.assert_called_once_with( + 'https://api.openai.com/v1/chat/completions', + headers=ANY, + json=ANY + ) + + # Add similar tests for other summarization functions + # ... + + def test_main(self): + with patch('summarize.process_path', return_value=("download_path", {'title': 'Sample Video'}, "audio_file")): + with patch('summarize.speech_to_text', return_value=[]): + with patch('summarize.summarize_with_openai', return_value='Sample summary'): + results = main("https://www.youtube.com/watch?v=YRfN-UGoKJY", api_name="openai", api_key="api_key") + self.assertIsInstance(results, list) + self.assertEqual(len(results), 1) + self.assertIn('summary', results[0]) + + + # Add more integration tests for different scenarios + # ... + + def test_error_handling(self): + with self.assertRaises(ValueError): + speech_to_text(None) + + with self.assertRaises(RuntimeError): + with patch('subprocess.run', side_effect=subprocess.CalledProcessError(1, 'ffmpeg')): + convert_to_wav("invalid_path") + + + def test_warnings(self): + with self.assertWarns(UserWarning): + # Code that triggers a warning + pass + + # Add more tests for different aspects of the script + # ... + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/Tests/list_of_videos.txt b/Tests/list_of_videos.txt new file mode 100644 index 00000000..95cf2afd --- /dev/null +++ b/Tests/list_of_videos.txt @@ -0,0 +1,3 @@ +https://www.youtube.com/shorts/siPhZvKk0xE +https://www.youtube.com/shorts/oNM-YLoVMKI +https://www.youtube.com/shorts/quuWzw2Ih6M \ No newline at end of file diff --git a/Tests/test_transcription_summarization.py b/Tests/test_transcription_summarization.py new file mode 100644 index 00000000..c52a3259 --- /dev/null +++ b/Tests/test_transcription_summarization.py @@ -0,0 +1,191 @@ +import os +import shutil +import tempfile +import unittest +from unittest.mock import patch, MagicMock + +# Run this: python -m unittest test_transcription_summarization.py +""" +1. Test File/Path parsing +2. Test if file is identified as existing/not existing successfully +3. Implement tests for each API endpoint +4. Test wav conversion + In the test_convert_to_wav test, you can enhance it by creating a temporary video file using a library like cv2 or moviepy and then testing the conversion with the actual file. This will make the test more comprehensive. +5. Error handling + You mentioned testing error handling in the comments, but there are no specific tests for it in the script. Consider adding tests that intentionally raise exceptions or simulate error conditions to ensure that the script handles them gracefully. Use assertRaises to check if the expected exceptions are raised. +6. Integration tests: + The test_main function is a good example of an integration test. Consider adding more integration tests that cover different scenarios and combinations of input paths, API names, and configurations to ensure the script works end-to-end. +7. Test summarization functions: + You have a test for summarize_with_openai, but there are no tests for other summarization functions like summarize_with_claude, summarize_with_cohere, etc. Consider adding tests for each summarization function to ensure they work as expected. +8. Test coverage: + Use a test coverage tool like coverage to measure the test coverage of your script. This helps identify areas that may require additional testing. You can run the tests with coverage and generate a coverage report to see which lines of code are covered by the tests. +9. Naming conventions: +10. Docstrings and comments: +11. Parameterized tests: + Use parameterized tests to test the script with different input paths, API names, models, and other configurations. + This allows you to cover a wide range of scenarios without duplicating test code. +""" + +# Import the necessary functions and classes from your script +from diarize import ( + read_paths_from_file, + process_path, + process_local_file, + create_download_directory, + normalize_title, + get_youtube, + download_video, + convert_to_wav, + speech_to_text, + summarize_with_openai, + summarize_with_claude, + summarize_with_cohere, + summarize_with_groq, + summarize_with_llama, + summarize_with_oobabooga, + save_summary_to_file, + main +) + +class TestTranscriptionSummarization(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_read_paths_from_file(self): + # Create a temporary file with sample paths + file_path = os.path.join(self.temp_dir, 'paths.txt') + with open(file_path, 'w') as file: + file.write('path1\npath2\npath3') + + # Call the function and check the returned paths + paths = read_paths_from_file(file_path) + self.assertEqual(paths, ['path1', 'path2', 'path3']) + + + + @patch('diarize.process_local_file') + def test_process_local_file(self, mock_process_local_file): + mock_process_local_file.return_value = ('/path/to/download', {'title': 'Local Video'}, '/path/to/audio.wav') + result = process_path('/path/to/local/video.mp4') + self.assertEqual(result, ('/path/to/download', {'title': 'Local Video'}, '/path/to/audio.wav')) + + + + def test_normalize_title(self): + title = 'Video Title / with \\ Special: Characters*' + normalized_title = normalize_title(title) + self.assertEqual(normalized_title, 'Video Title _ with _ Special_ Characters') + + + + @patch('diarize.subprocess.run') + def test_convert_to_wav(self, mock_subprocess_run): + video_file_path = '/path/to/video.mp4' + audio_file_path = convert_to_wav(video_file_path) + self.assertEqual(audio_file_path, '/path/to/video.wav') + mock_subprocess_run.assert_called_once() + + + + @patch('diarize.process_local_file') + def test_process_path(self, mock_process_local_file, mock_get_youtube): + # Test processing a URL + mock_get_youtube.return_value = {'title': 'Video Title'} + result = process_path('https://example.com/video.mp4') + self.assertEqual(result, {'title': 'Video Title'}) + + # Test processing a local file + mock_process_local_file.return_value = ('/path/to/download', {'title': 'Local Video'}, '/path/to/audio.wav') + result = process_path('/path/to/local/video.mp4') + self.assertEqual(result, ('/path/to/download', {'title': 'Local Video'}, '/path/to/audio.wav')) + + + + def test_speech_to_text(self): + audio_file_path = '/path/to/audio.wav' + segments = speech_to_text(audio_file_path) + self.assertIsInstance(segments, list) + self.assertTrue(len(segments) > 0) + self.assertIn('start', segments[0]) + self.assertIn('end', segments[0]) + self.assertIn('text', segments[0]) + + + + @patch('diarize.requests.post') + def test_summarize_with_openai(self, mock_post): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {'choices': [{'message': {'content': 'Summary'}}]} + mock_post.return_value = mock_response + + summary = summarize_with_openai('api_key', '/path/to/audio.wav.segments.json', 'gpt-4-turbo') + self.assertEqual(summary, 'Summary') + + + + def test_integration_local_file(self): + # Create a temporary video file + video_file_path = os.path.join(self.temp_dir, 'video.mp4') + with open(video_file_path, 'wb') as file: + file.write(b'dummy video content') + + # Call the main function with the local file path + results = main(video_file_path) + + # Check the expected results + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['video_path'], video_file_path) + self.assertIsNotNone(results[0]['audio_file']) + self.assertIsInstance(results[0]['transcription'], list) + + + + def test_save_summary_to_file(self): + summary = 'This is a summary.' + file_path = '/path/to/audio.wav.segments.json' + save_summary_to_file(summary, file_path) + summary_file_path = file_path. self.assertTrue(os.path.exists(summary_file_path)) + with open(summary_file_path, 'r') as file: + content = file.read() + self.assertEqual(content, summary) + + + + @patch('diarize.get_youtube') + @patch('diarize.download_video') + @patch('diarize.convert_to_wav') + @patch('diarize.speech_to_text') + @patch('diarize.summarize_with_openai') + def test_main(self, mock_summarize, mock_speech_to_text, mock_convert_to_wav, mock_download_video, mock_get_youtube): + # Set up mock return values + mock_get_youtube.return_value = {'title': 'Video Title'} + mock_download_video.return_value = '/path/to/video.mp4' + mock_convert_to_wav.return_value = '/path/to/audio.wav' + mock_speech_to_text.return_value = [{'start': 0, 'end': 5, 'text': 'Hello'}] + mock_summarize.return_value = 'This is a summary.' + + # Call the main function with sample arguments + results = main('https://example.com/video.mp4', api_name='openai', api_key='api_key') + + # Check the expected results + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['video_path'], 'https://example.com/video.mp4') + self.assertEqual(results[0]['audio_file'], '/path/to/audio.wav') + self.assertEqual(results[0]['transcription'], [{'start': 0, 'end': 5, 'text': 'Hello'}]) + self.assertEqual(results[0]['summary'], 'This is a summary.') + + # Check that the expected functions were called with the correct arguments + mock_get_youtube.assert_called_once_with('https://example.com/video.mp4') + mock_download_video.assert_called_once() + mock_convert_to_wav.assert_called_once() + mock_speech_to_text.assert_called_once() + mock_summarize.assert_called_once_with('api_key', '/path/to/audio.wav.segments.json', 'gpt-4-turbo') + + # Add more test methods for other functions... + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/config.txt b/config.txt new file mode 100644 index 00000000..a90803a9 --- /dev/null +++ b/config.txt @@ -0,0 +1,29 @@ +[API] +anthropic_api_key = +cohere_model = command-r-plus +groq_api_key = +groq_model = llama3-70b-8192 +openai_api_key = +openai_model = gpt-4-turbo +huggingface_api_token = +huggingface_model = CohereForAI/c4ai-command-r-plus + + +[Local-API] +kobold_api_key = +kobold_api_IP = http://127.0.0.1:5001/api/v1/generate +llama_api_key = +llama_api_IP = http://127.0.0.1:8080/completion +ooba_api_key = +ooba_api_IP = http://127.0.0.1:5000/v1/chat/completions + + +[Paths] +output_path = Results +logging_file = Logs + + +[Processing] +processing_choice = cuda \ No newline at end of file diff --git a/list_of_videos.txt b/list_of_videos.txt new file mode 100644 index 00000000..95cf2afd --- /dev/null +++ b/list_of_videos.txt @@ -0,0 +1,3 @@ +https://www.youtube.com/shorts/siPhZvKk0xE +https://www.youtube.com/shorts/oNM-YLoVMKI +https://www.youtube.com/shorts/quuWzw2Ih6M \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..ca424cbe --- /dev/null +++ b/requirements.txt @@ -0,0 +1,115 @@ +accelerate==0.29.2 +aiofiles==23.2.1 +altair==5.3.0 +annotated-types==0.6.0 +anyio==4.3.0 +attrs==23.2.0 +av==11.0.0 +Brotli==1.1.0 +certifi==2024.2.2 +charset-normalizer==3.3.2 +click==8.1.7 +colorama==0.4.6 +coloredlogs==15.0.1 +contourpy==1.2.1 +ctranslate2==4.2.1 +cycler==0.12.1 +dnspython==2.6.1 +email_validator==2.1.1 +fastapi==0.111.0 +fastapi-cli==0.0.3 +faster-whisper==1.0.1 +ffmpeg==1.4 +ffmpy==0.3.2 +filelock==3.13.4 +fire==0.6.0 +flatbuffers==24.3.25 +fonttools==4.51.0 +fsspec==2024.3.1 +gradio +#gradio==4.29.0 +gradio_client +#gradio_client==0.16.1 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +huggingface-hub==0.22.2 +humanfriendly==10.0 +idna==3.7 +importlib_resources==6.4.0 +Jinja2==3.1.3 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +kiwisolver==1.4.5 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +matplotlib==3.8.4 +mdurl==0.1.2 +mpmath==1.3.0 +mutagen==1.47.0 +networkx==3.3 +numpy==1.26.4 +onnxruntime==1.17.3 +openai +orjson==3.10.3 +packaging==24.0 +pandas==2.2.2 +pillow==10.3.0 +protobuf==5.26.1 +psutil==5.9.8 +pycryptodomex==3.20.0 +pydantic==2.7.1 +pydantic_core==2.18.2 +pydub==0.25.1 +pyee==11.1.0 +Pygments==2.18.0 +pyparsing==3.1.2 +pyreadline3==3.4.1 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-ffmpeg==2.0.12 +python-multipart==0.0.9 +pytz==2024.1 +PyYAML==6.0.1 +referencing==0.35.1 +regex==2023.12.25 +requests==2.31.0 +rich==13.7.1 +rpds-py==0.18.1 +ruff==0.4.3 +safetensors==0.4.2 +semantic-version==2.10.0 +sentencepiece==0.2.0 +setuptools==69.5.1 +shellingham==1.5.4 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +sympy==1.12 +termcolor==2.4.0 +timm==0.9.16 +tokenizers==0.15.2 +tomlkit==0.12.0 +toolz==0.12.1 +torchvision==0.17.2 +tqdm==4.66.2 +transformers==4.39.3 +typer==0.12.3 +typing_extensions==4.11.0 +tzdata==2024.1 +ujson==5.9.0 +urllib3==2.2.1 +uvicorn==0.29.0 +watchfiles==0.21.0 +websockets +#websockets==11.0.3 +yt-dlp +#yt-dlp==2024.4.9 +--extra-index-url https://download.pytorch.org/whl/cu113 +torch +torchaudio +#torch==2.2.2+cu121 +#torchaudio==2.2.2+cu121 +tiktoken~=0.6.0 +openai~=1.28.1 \ No newline at end of file diff --git a/summarize.py b/summarize.py new file mode 100644 index 00000000..e6acc786 --- /dev/null +++ b/summarize.py @@ -0,0 +1,1870 @@ +#!/usr/bin/env python3 +import argparse +import configparser +import json +import logging +import os +import platform +import shutil +import subprocess +import sys +import time +from typing import List, Tuple, Optional +import zipfile + +import gradio as gr +import requests +import unicodedata +import yt_dlp + +# OpenAI Tokenizer support +from openai import OpenAI +from tqdm import tqdm +import tiktoken +####################### + +log_level = "INFO" +logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s') +os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" + +####### +# Function Sections +# +# Config Loading +# System Checks +# Processing Paths and local file handling +# Video Download/Handling +# Audio Transcription +# Diarization +# Chunking-related Techniques & Functions +# Tokenization-related Techniques & Functions +# Summarizers +# Gradio UI +# Main +# +####### + +# To Do +# Offline diarization - https://github.com/pyannote/pyannote-audio/blob/develop/tutorials/community/offline_usage_speaker_diarization.ipynb + + +#### +# +# TL/DW: Too Long Didn't Watch +# +# Project originally created by https://github.com/the-crypt-keeper +# Modifications made by https://github.com/rmusser01 +# All credit to the original authors, I've just glued shit together. +# +# +# Usage: +# +# Download Audio only from URL -> Transcribe audio: +# python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s` +# +# Download Audio+Video from URL -> Transcribe audio from Video:** +# python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s` +# +# Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:** +# python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api ` - Make sure to put your API key into `config.txt` under the appropriate API variable +# +# Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:** +# python summarize.py ./local/file_on_your/system --api_name ` +# +# Run it as a WebApp** +# python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it. +# Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server. +# +### + +####################### +# Random issues I've encountered and how I solved them: +# 1. Something about cuda nn library missing, even though cuda is installed... +# https://github.com/tensorflow/tensorflow/issues/54784 - Basically, installing zlib made it go away. idk. +# +# +# +# +# + +####################### +# Config loading +# + +# Read configuration from file +config = configparser.ConfigParser() +config.read('config.txt') + +# API Keys +anthropic_api_key = config.get('API', 'anthropic_api_key', fallback=None) +logging.debug(f"Loaded Anthropic API Key: {anthropic_api_key}") + +cohere_api_key = config.get('API', 'cohere_api_key', fallback=None) +logging.debug(f"Loaded cohere API Key: {cohere_api_key}") + +groq_api_key = config.get('API', 'groq_api_key', fallback=None) +logging.debug(f"Loaded groq API Key: {groq_api_key}") + +openai_api_key = config.get('API', 'openai_api_key', fallback=None) +logging.debug(f"Loaded openAI Face API Key: {openai_api_key}") + +huggingface_api_key = config.get('API', 'huggingface_api_key', fallback=None) +logging.debug(f"Loaded HuggingFace Face API Key: {huggingface_api_key}") + +# Models +anthropic_model = config.get('API', 'anthropic_model', fallback='claude-3-sonnet-20240229') +cohere_model = config.get('API', 'cohere_model', fallback='command-r-plus') +groq_model = config.get('API', 'groq_model', fallback='FIXME') +openai_model = config.get('API', 'openai_model', fallback='gpt-4-turbo') +huggingface_model = config.get('API', 'huggingface_model', fallback='CohereForAI/c4ai-command-r-plus') + +# Local-Models +kobold_api_IP = config.get('Local-API', 'kobold_api_IP', fallback='http://127.0.0.1:5000/api/v1/generate') +kobold_api_key = config.get('Local-API', 'kobold_api_key', fallback='') +llama_api_IP = config.get('Local-API', 'llama_api_IP', fallback='http://127.0.0.1:8080/v1/chat/completions') +llama_api_key = config.get('Local-API', 'llama_api_key', fallback='') +ooba_api_IP = config.get('Local-API', 'ooba_api_IP', fallback='http://127.0.0.1:5000/v1/chat/completions') +ooba_api_key = config.get('Local-API', 'ooba_api_key', fallback='') + +# Retrieve output paths from the configuration file +output_path = config.get('Paths', 'output_path', fallback='results') + +# Retrieve processing choice from the configuration file +processing_choice = config.get('Processing', 'processing_choice', fallback='cpu') + +# Log file +# logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG) + +# +# +####################### + +# Dirty hack - sue me. +os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' + +whisper_models = ["small", "medium", "small.en", "medium.en"] +source_languages = { + "en": "English", + "zh": "Chinese", + "de": "German", + "es": "Spanish", + "ru": "Russian", + "ko": "Korean", + "fr": "French" +} +source_language_list = [key[0] for key in source_languages.items()] + +print(r"""_____ _ ________ _ _ +|_ _|| | / /| _ \| | | | _ + | | | | / / | | | || | | |(_) + | | | | / / | | | || |/\| | + | | | |____ / / | |/ / \ /\ / _ + \_/ \_____//_/ |___/ \/ \/ (_) + + + _ _ +| | | | +| |_ ___ ___ | | ___ _ __ __ _ +| __| / _ \ / _ \ | | / _ \ | '_ \ / _` | +| |_ | (_) || (_) | | || (_) || | | || (_| | _ + \__| \___/ \___/ |_| \___/ |_| |_| \__, |( ) + __/ ||/ + |___/ + _ _ _ _ _ _ _ + | |(_) | | ( )| | | | | | + __| | _ __| | _ __ |/ | |_ __ __ __ _ | |_ ___ | |__ + / _` || | / _` || '_ \ | __| \ \ /\ / / / _` || __| / __|| '_ \ +| (_| || || (_| || | | | | |_ \ V V / | (_| || |_ | (__ | | | | + \__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_| +""") + +####################################################################################################################### +# System Checks +# +# + +# Perform Platform Check +userOS = "" + + +def platform_check(): + global userOS + if platform.system() == "Linux": + print("Linux OS detected \n Running Linux appropriate commands") + userOS = "Linux" + elif platform.system() == "Windows": + print("Windows OS detected \n Running Windows appropriate commands") + userOS = "Windows" + else: + print("Other OS detected \n Maybe try running things manually?") + exit() + + +# Check for NVIDIA GPU and CUDA availability +def cuda_check(): + global processing_choice + try: + nvidia_smi = subprocess.check_output("nvidia-smi", shell=True).decode() + if "NVIDIA-SMI" in nvidia_smi: + print("NVIDIA GPU with CUDA is available.") + processing_choice = "cuda" # Set processing_choice to gpu if NVIDIA GPU with CUDA is available + else: + print("NVIDIA GPU with CUDA is not available.\nYou either have an AMD GPU, or you're stuck with CPU only.") + processing_choice = "cpu" # Set processing_choice to cpu if NVIDIA GPU with CUDA is not available + except subprocess.CalledProcessError: + print("NVIDIA GPU with CUDA is not available.\nYou either have an AMD GPU, or you're stuck with CPU only.") + processing_choice = "cpu" # Set processing_choice to cpu if nvidia-smi command fails + + +# Ask user if they would like to use either their GPU or their CPU for transcription +def decide_cpugpu(): + global processing_choice + processing_input = input("Would you like to use your GPU or CPU for transcription? (1/cuda)GPU/(2/cpu)CPU): ") + if processing_choice == "cuda" and (processing_input.lower() == "cuda" or processing_input == "1"): + print("You've chosen to use the GPU.") + logging.debug("GPU is being used for processing") + processing_choice = "cuda" + elif processing_input.lower() == "cpu" or processing_input == "2": + print("You've chosen to use the CPU.") + logging.debug("CPU is being used for processing") + processing_choice = "cpu" + else: + print("Invalid choice. Please select either GPU or CPU.") + + +# check for existence of ffmpeg +def check_ffmpeg(): + if shutil.which("ffmpeg") or (os.path.exists("Bin") and os.path.isfile(".\\Bin\\ffmpeg.exe")): + logging.debug("ffmpeg found installed on the local system, in the local PATH, or in the './Bin' folder") + pass + else: + logging.debug("ffmpeg not installed on the local system/in local PATH") + print( + "ffmpeg is not installed.\n\n You can either install it manually, or through your package manager of " + "choice.\n Windows users, builds are here: https://www.gyan.dev/ffmpeg/builds/") + if userOS == "Windows": + download_ffmpeg() + elif userOS == "Linux": + print( + "You should install ffmpeg using your platform's appropriate package manager, 'apt install ffmpeg'," + "'dnf install ffmpeg' or 'pacman', etc.") + else: + logging.debug("running an unsupported OS") + print("You're running an unspported/Un-tested OS") + exit_script = input("Let's exit the script, unless you're feeling lucky? (y/n)") + if exit_script == "y" or "yes" or "1": + exit() + + +# Download ffmpeg +def download_ffmpeg(): + user_choice = input("Do you want to download ffmpeg? (y)Yes/(n)No: ") + if user_choice.lower() == 'yes' or 'y' or '1': + print("Downloading ffmpeg") + url = "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip" + response = requests.get(url) + + if response.status_code == 200: + print("Saving ffmpeg zip file") + logging.debug("Saving ffmpeg zip file") + zip_path = "ffmpeg-release-essentials.zip" + with open(zip_path, 'wb') as file: + file.write(response.content) + + logging.debug("Extracting the 'ffmpeg.exe' file from the zip") + print("Extracting ffmpeg.exe from zip file to '/Bin' folder") + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + ffmpeg_path = "ffmpeg-7.0-essentials_build/bin/ffmpeg.exe" + + logging.debug("checking if the './Bin' folder exists, creating if not") + bin_folder = "Bin" + if not os.path.exists(bin_folder): + logging.debug("Creating a folder for './Bin', it didn't previously exist") + os.makedirs(bin_folder) + + logging.debug("Extracting 'ffmpeg.exe' to the './Bin' folder") + zip_ref.extract(ffmpeg_path, path=bin_folder) + + logging.debug("Moving 'ffmpeg.exe' to the './Bin' folder") + src_path = os.path.join(bin_folder, ffmpeg_path) + dst_path = os.path.join(bin_folder, "ffmpeg.exe") + shutil.move(src_path, dst_path) + + logging.debug("Removing ffmpeg zip file") + print("Deleting zip file (we've already extracted ffmpeg.exe, no worries)") + os.remove(zip_path) + + logging.debug("ffmpeg.exe has been downloaded and extracted to the './Bin' folder.") + print("ffmpeg.exe has been successfully downloaded and extracted to the './Bin' folder.") + else: + logging.error("Failed to download the zip file.") + print("Failed to download the zip file.") + else: + logging.debug("User chose to not download ffmpeg") + print("ffmpeg will not be downloaded.") + + +# +# +####################################################################################################################### + + +####################################################################################################################### +# Processing Paths and local file handling +# +# + +def read_paths_from_file(file_path): + """ Reads a file containing URLs or local file paths and returns them as a list. """ + paths = [] # Initialize paths as an empty list + with open(file_path, 'r') as file: + paths = [line.strip() for line in file] + return paths + + +def process_path(path): + """ Decides whether the path is a URL or a local file and processes accordingly. """ + if path.startswith('http'): + logging.debug("file is a URL") + # For YouTube URLs, modify to download and extract info + return get_youtube(path) + elif os.path.exists(path): + logging.debug("File is a path") + # For local files, define a function to handle them + return process_local_file(path) + else: + logging.error(f"Path does not exist: {path}") + return None + + +# FIXME +def process_local_file(file_path): + logging.info(f"Processing local file: {file_path}") + title = normalize_title(os.path.splitext(os.path.basename(file_path))[0]) + info_dict = {'title': title} + logging.debug(f"Creating {title} directory...") + download_path = create_download_directory(title) + logging.debug(f"Converting '{title}' to an audio file (wav).") + audio_file = convert_to_wav(file_path) # Assumes input files are videos needing audio extraction + logging.debug(f"'{title}' successfully converted to an audio file (wav).") + return download_path, info_dict, audio_file + + +# +# +####################################################################################################################### + + +####################################################################################################################### +# Video Download/Handling +# + +def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter, + download_video, download_audio, chunk_size): + video_file_path = None + print("API Name received:", api_name) # Debugging line + try: + results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers, whisper_model=whisper_model, + offset=offset, vad_filter=vad_filter, download_video_flag=download_video, + custom_prompt=custom_prompt) + if results: + transcription_result = results[0] + + json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json') + prettified_json_file_path = transcription_result['audio_file'].replace('.wav', '.segments_pretty.json') + + summary_file_path = json_file_path.replace('.segments.json', '_summary.txt') + + json_file_path = format_file_path(json_file_path) + prettified_json_file_path = format_file_path(prettified_json_file_path, fallback_path=json_file_path) + + summary_file_path = format_file_path(summary_file_path) + + if download_video: + video_file_path = transcription_result['video_path'] if 'video_path' in transcription_result else None + + formatted_transcription = format_transcription(transcription_result) + + summary_text = transcription_result.get('summary', 'Summary not available') + + if summary_file_path and os.path.exists(summary_file_path): + return formatted_transcription, summary_text, prettified_json_file_path, summary_file_path, video_file_path, None + else: + return formatted_transcription, summary_text, prettified_json_file_path, None, video_file_path, None + else: + return "No results found.", "Summary not available", None, None, None, None + except Exception as e: + return str(e), "Error processing the request.", None, None, None, None + + +def create_download_directory(title): + base_dir = "Results" + # Remove characters that are illegal in Windows filenames and normalize + safe_title = normalize_title(title) + logging.debug(f"{title} successfully normalized") + session_path = os.path.join(base_dir, safe_title) + if not os.path.exists(session_path): + os.makedirs(session_path, exist_ok=True) + logging.debug(f"Created directory for downloaded video: {session_path}") + else: + logging.debug(f"Directory already exists for downloaded video: {session_path}") + return session_path + + +def normalize_title(title): + # Normalize the string to 'NFKD' form and encode to 'ascii' ignoring non-ascii characters + title = unicodedata.normalize('NFKD', title).encode('ascii', 'ignore').decode('ascii') + title = title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('"', '').replace('*', '').replace('?', + '').replace( + '<', '').replace('>', '').replace('|', '') + return title + + +def get_youtube(video_url): + ydl_opts = { + 'format': 'bestaudio[ext=m4a]', + 'noplaylist': False, + 'quiet': True, + 'extract_flat': True + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + logging.debug("About to extract youtube info") + info_dict = ydl.extract_info(video_url, download=False) + logging.debug("Youtube info successfully extracted") + return info_dict + + +def get_playlist_videos(playlist_url): + ydl_opts = { + 'extract_flat': True, + 'skip_download': True, + 'quiet': True + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(playlist_url, download=False) + + if 'entries' in info: + video_urls = [entry['url'] for entry in info['entries']] + playlist_title = info['title'] + return video_urls, playlist_title + else: + print("No videos found in the playlist.") + return [], None + + +def save_to_file(video_urls, filename): + with open(filename, 'w') as file: + file.write('\n'.join(video_urls)) + print(f"Video URLs saved to {filename}") + + +def download_video(video_url, download_path, info_dict, download_video_flag): + logging.debug("About to normalize downloaded video title") + title = normalize_title(info_dict['title']) + + if not download_video_flag: + file_path = os.path.join(download_path, f"{title}.m4a") + ydl_opts = { + 'format': 'bestaudio[ext=m4a]', + 'outtmpl': file_path, + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + logging.debug("yt_dlp: About to download audio with youtube-dl") + ydl.download([video_url]) + logging.debug("yt_dlp: Audio successfully downloaded with youtube-dl") + return file_path + else: + video_file_path = os.path.join(download_path, f"{title}_video.mp4") + audio_file_path = os.path.join(download_path, f"{title}_audio.m4a") + ydl_opts_video = { + 'format': 'bestvideo[ext=mp4]', + 'outtmpl': video_file_path, + } + ydl_opts_audio = { + 'format': 'bestaudio[ext=m4a]', + 'outtmpl': audio_file_path, + } + + with yt_dlp.YoutubeDL(ydl_opts_video) as ydl: + logging.debug("yt_dlp: About to download video with youtube-dl") + ydl.download([video_url]) + logging.debug("yt_dlp: Video successfully downloaded with youtube-dl") + + with yt_dlp.YoutubeDL(ydl_opts_audio) as ydl: + logging.debug("yt_dlp: About to download audio with youtube-dl") + ydl.download([video_url]) + logging.debug("yt_dlp: Audio successfully downloaded with youtube-dl") + + output_file_path = os.path.join(download_path, f"{title}.mp4") + + if sys.platform.startswith('win'): + logging.debug("Running ffmpeg on Windows...") + ffmpeg_command = [ + '.\\Bin\\ffmpeg.exe', + '-i', video_file_path, + '-i', audio_file_path, + '-c:v', 'copy', + '-c:a', 'copy', + output_file_path + ] + subprocess.run(ffmpeg_command, check=True) + elif userOS == "Linux": + logging.debug("Running ffmpeg on Linux...") + ffmpeg_command = [ + 'ffmpeg', + '-i', video_file_path, + '-i', audio_file_path, + '-c:v', 'copy', + '-c:a', 'copy', + output_file_path + ] + subprocess.run(ffmpeg_command, check=True) + else: + logging.error("ffmpeg: Unsupported operating system for video download and merging.") + raise RuntimeError("ffmpeg: Unsupported operating system for video download and merging.") + os.remove(video_file_path) + os.remove(audio_file_path) + + return output_file_path + + +# +# +####################################################################################################################### + + +####################################################################################################################### +# Audio Transcription +# +# Convert video .m4a into .wav using ffmpeg +# ffmpeg -i "example.mp4" -ar 16000 -ac 1 -c:a pcm_s16le "output.wav" +# https://www.gyan.dev/ffmpeg/builds/ +# + + +# os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"') +def convert_to_wav(video_file_path, offset=0, overwrite=False): + out_path = os.path.splitext(video_file_path)[0] + ".wav" + + if os.path.exists(out_path) and not overwrite: + print(f"File '{out_path}' already exists. Skipping conversion.") + logging.info(f"Skipping conversion as file already exists: {out_path}") + return out_path + print("Starting conversion process of .m4a to .WAV") + out_path = os.path.splitext(video_file_path)[0] + ".wav" + + try: + if os.name == "nt": + logging.debug("ffmpeg being ran on windows") + + if sys.platform.startswith('win'): + ffmpeg_cmd = ".\\Bin\\ffmpeg.exe" + logging.debug(f"ffmpeg_cmd: {ffmpeg_cmd}") + else: + ffmpeg_cmd = 'ffmpeg' # Assume 'ffmpeg' is in PATH for non-Windows systems + + command = [ + ffmpeg_cmd, # Assuming the working directory is correctly set where .\Bin exists + "-ss", "00:00:00", # Start at the beginning of the video + "-i", video_file_path, + "-ar", "16000", # Audio sample rate + "-ac", "1", # Number of audio channels + "-c:a", "pcm_s16le", # Audio codec + out_path + ] + try: + # Redirect stdin from null device to prevent ffmpeg from waiting for input + with open(os.devnull, 'rb') as null_file: + result = subprocess.run(command, stdin=null_file, text=True, capture_output=True) + if result.returncode == 0: + logging.info("FFmpeg executed successfully") + logging.debug("FFmpeg output: %s", result.stdout) + else: + logging.error("Error in running FFmpeg") + logging.error("FFmpeg stderr: %s", result.stderr) + raise RuntimeError(f"FFmpeg error: {result.stderr}") + except Exception as e: + logging.error("Error occurred - ffmpeg doesn't like windows") + raise RuntimeError("ffmpeg failed") + elif os.name == "posix": + os.system(f'ffmpeg -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"') + else: + raise RuntimeError("Unsupported operating system") + logging.info("Conversion to WAV completed: %s", out_path) + except subprocess.CalledProcessError as e: + logging.error("Error executing FFmpeg command: %s", str(e)) + raise RuntimeError("Error converting video file to WAV") + except Exception as e: + logging.error("Unexpected error occurred: %s", str(e)) + raise RuntimeError("Error converting video file to WAV") + return out_path + + +# Transcribe .wav into .segments.json +def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='small.en', vad_filter=False): + logging.info('speech-to-text: Loading faster_whisper model: %s', whisper_model) + from faster_whisper import WhisperModel + model = WhisperModel(whisper_model, device=f"{processing_choice}") + time_start = time.time() + if audio_file_path is None: + raise ValueError("speech-to-text: No audio file provided") + logging.info("speech-to-text: Audio file path: %s", audio_file_path) + + try: + _, file_ending = os.path.splitext(audio_file_path) + out_file = audio_file_path.replace(file_ending, ".segments.json") + prettified_out_file = audio_file_path.replace(file_ending, ".segments_pretty.json") + if os.path.exists(out_file): + logging.info("speech-to-text: Segments file already exists: %s", out_file) + with open(out_file) as f: + segments = json.load(f) + return segments + + logging.info('speech-to-text: Starting transcription...') + options = dict(language=selected_source_lang, beam_size=5, best_of=5, vad_filter=vad_filter) + transcribe_options = dict(task="transcribe", **options) + segments_raw, info = model.transcribe(audio_file_path, **transcribe_options) + + segments = [] + for segment_chunk in segments_raw: + chunk = { + "start": segment_chunk.start, + "end": segment_chunk.end, + "text": segment_chunk.text + } + logging.debug("Segment: %s", chunk) + segments.append(chunk) + logging.info("speech-to-text: Transcription completed with faster_whisper") + + # Save prettified JSON + with open(prettified_out_file, 'w') as f: + json.dump(segments, f, indent=2) + + # Save non-prettified JSON + with open(out_file, 'w') as f: + json.dump(segments, f) + + except Exception as e: + logging.error("speech-to-text: Error transcribing audio: %s", str(e)) + raise RuntimeError("speech-to-text: Error transcribing audio") + return segments + + +# +# +####################################################################################################################### + + +####################################################################################################################### +# Diarization +# +# TODO: https://huggingface.co/pyannote/speaker-diarization-3.1 +# embedding_model = "pyannote/embedding", embedding_size=512 +# embedding_model = "speechbrain/spkrec-ecapa-voxceleb", embedding_size=192 +# def speaker_diarize(video_file_path, segments, embedding_model = "pyannote/embedding", embedding_size=512, num_speakers=0): +# """ +# 1. Generating speaker embeddings for each segments. +# 2. Applying agglomerative clustering on the embeddings to identify the speaker for each segment. +# """ +# try: +# from pyannote.audio import Audio +# from pyannote.core import Segment +# from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding +# import numpy as np +# import pandas as pd +# from sklearn.cluster import AgglomerativeClustering +# from sklearn.metrics import silhouette_score +# import tqdm +# import wave +# +# embedding_model = PretrainedSpeakerEmbedding( embedding_model, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")) +# +# +# _,file_ending = os.path.splitext(f'{video_file_path}') +# audio_file = video_file_path.replace(file_ending, ".wav") +# out_file = video_file_path.replace(file_ending, ".diarize.json") +# +# logging.debug("getting duration of audio file") +# with contextlib.closing(wave.open(audio_file,'r')) as f: +# frames = f.getnframes() +# rate = f.getframerate() +# duration = frames / float(rate) +# logging.debug("duration of audio file obtained") +# print(f"duration of audio file: {duration}") +# +# def segment_embedding(segment): +# logging.debug("Creating embedding") +# audio = Audio() +# start = segment["start"] +# end = segment["end"] +# +# # Enforcing a minimum segment length +# if end-start < 0.3: +# padding = 0.3-(end-start) +# start -= padding/2 +# end += padding/2 +# print('Padded segment because it was too short:',segment) +# +# # Whisper overshoots the end timestamp in the last segment +# end = min(duration, end) +# # clip audio and embed +# clip = Segment(start, end) +# waveform, sample_rate = audio.crop(audio_file, clip) +# return embedding_model(waveform[None]) +# +# embeddings = np.zeros(shape=(len(segments), embedding_size)) +# for i, segment in enumerate(tqdm.tqdm(segments)): +# embeddings[i] = segment_embedding(segment) +# embeddings = np.nan_to_num(embeddings) +# print(f'Embedding shape: {embeddings.shape}') +# +# if num_speakers == 0: +# # Find the best number of speakers +# score_num_speakers = {} +# +# for num_speakers in range(2, 10+1): +# clustering = AgglomerativeClustering(num_speakers).fit(embeddings) +# score = silhouette_score(embeddings, clustering.labels_, metric='euclidean') +# score_num_speakers[num_speakers] = score +# best_num_speaker = max(score_num_speakers, key=lambda x:score_num_speakers[x]) +# print(f"The best number of speakers: {best_num_speaker} with {score_num_speakers[best_num_speaker]} score") +# else: +# best_num_speaker = num_speakers +# +# # Assign speaker label +# clustering = AgglomerativeClustering(best_num_speaker).fit(embeddings) +# labels = clustering.labels_ +# for i in range(len(segments)): +# segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1) +# +# with open(out_file,'w') as f: +# f.write(json.dumps(segments, indent=2)) +# +# # Make CSV output +# def convert_time(secs): +# return datetime.timedelta(seconds=round(secs)) +# +# objects = { +# 'Start' : [], +# 'End': [], +# 'Speaker': [], +# 'Text': [] +# } +# text = '' +# for (i, segment) in enumerate(segments): +# if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]: +# objects['Start'].append(str(convert_time(segment["start"]))) +# objects['Speaker'].append(segment["speaker"]) +# if i != 0: +# objects['End'].append(str(convert_time(segments[i - 1]["end"]))) +# objects['Text'].append(text) +# text = '' +# text += segment["text"] + ' ' +# objects['End'].append(str(convert_time(segments[i - 1]["end"]))) +# objects['Text'].append(text) +# +# save_path = video_file_path.replace(file_ending, ".csv") +# df_results = pd.DataFrame(objects) +# df_results.to_csv(save_path) +# return df_results, save_path +# +# except Exception as e: +# raise RuntimeError("Error Running inference with local model", e) +# +# +####################################################################################################################### + + +####################################################################################################################### +# Chunking-related Techniques & Functions +# +# + + +# This is dirty and shameful and terrible. It should be replaced with a proper implementation. +# anyways lets get to it.... +client = OpenAI(api_key=openai_api_key) +def get_chat_completion(messages, model='gpt-4-turbo'): + response = client.chat.completions.create( + model=model, + messages=messages, + temperature=0, + ) + return response.choices[0].message.content + + +# This function chunks a text into smaller pieces based on a maximum token count and a delimiter +def chunk_on_delimiter(input_string: str, + max_tokens: int, + delimiter: str) -> List[str]: + chunks = input_string.split(delimiter) + combined_chunks, _, dropped_chunk_count = combine_chunks_with_no_minimum( + chunks, max_tokens, chunk_delimiter=delimiter, add_ellipsis_for_overflow=True) + if dropped_chunk_count > 0: + print(f"Warning: {dropped_chunk_count} chunks were dropped due to exceeding the token limit.") + combined_chunks = [f"{chunk}{delimiter}" for chunk in combined_chunks] + return combined_chunks + + +# This function combines text chunks into larger blocks without exceeding a specified token count. +# It returns the combined chunks, their original indices, and the number of dropped chunks due to overflow. +def combine_chunks_with_no_minimum( + chunks: List[str], + max_tokens: int, + chunk_delimiter="\n\n", + header: Optional[str] = None, + add_ellipsis_for_overflow=False, +) -> Tuple[List[str], List[int]]: + dropped_chunk_count = 0 + output = [] # list to hold the final combined chunks + output_indices = [] # list to hold the indices of the final combined chunks + candidate = ( + [] if header is None else [header] + ) # list to hold the current combined chunk candidate + candidate_indices = [] + for chunk_i, chunk in enumerate(chunks): + chunk_with_header = [chunk] if header is None else [header, chunk] + #FIXME MAKE NOT OPENAI SPECIFIC + if len(openai_tokenize(chunk_delimiter.join(chunk_with_header))) > max_tokens: + print(f"warning: chunk overflow") + if ( + add_ellipsis_for_overflow + # FIXME MAKE NOT OPENAI SPECIFIC + and len(openai_tokenize(chunk_delimiter.join(candidate + ["..."]))) <= max_tokens + ): + candidate.append("...") + dropped_chunk_count += 1 + continue # this case would break downstream assumptions + # estimate token count with the current chunk added + # FIXME MAKE NOT OPENAI SPECIFIC + extended_candidate_token_count = len(openai_tokenize(chunk_delimiter.join(candidate + [chunk]))) + # If the token count exceeds max_tokens, add the current candidate to output and start a new candidate + if extended_candidate_token_count > max_tokens: + output.append(chunk_delimiter.join(candidate)) + output_indices.append(candidate_indices) + candidate = chunk_with_header # re-initialize candidate + candidate_indices = [chunk_i] + # otherwise keep extending the candidate + else: + candidate.append(chunk) + candidate_indices.append(chunk_i) + # add the remaining candidate to output if it's not empty + if (header is not None and len(candidate) > 1) or (header is None and len(candidate) > 0): + output.append(chunk_delimiter.join(candidate)) + output_indices.append(candidate_indices) + return output, output_indices, dropped_chunk_count + + +def rolling_summarize(text: str, + detail: float = 0, + model: str = 'gpt-4-turbo', + additional_instructions: Optional[str] = None, + minimum_chunk_size: Optional[int] = 500, + chunk_delimiter: str = ".", + summarize_recursively=False, + verbose=False): + """ + Summarizes a given text by splitting it into chunks, each of which is summarized individually. + The level of detail in the summary can be adjusted, and the process can optionally be made recursive. + + Parameters: - text (str): The text to be summarized. - detail (float, optional): A value between 0 and 1 + indicating the desired level of detail in the summary. 0 leads to a higher level summary, and 1 results in a more + detailed summary. Defaults to 0. - model (str, optional): The model to use for generating summaries. Defaults to + 'gpt-3.5-turbo'. - additional_instructions (Optional[str], optional): Additional instructions to provide to the + model for customizing summaries. - minimum_chunk_size (Optional[int], optional): The minimum size for text + chunks. Defaults to 500. - chunk_delimiter (str, optional): The delimiter used to split the text into chunks. + Defaults to ".". - summarize_recursively (bool, optional): If True, summaries are generated recursively, + using previous summaries for context. - verbose (bool, optional): If True, prints detailed information about the + chunking process. + + Returns: + - str: The final compiled summary of the text. + + The function first determines the number of chunks by interpolating between a minimum and a maximum chunk count + based on the `detail` parameter. It then splits the text into chunks and summarizes each chunk. If + `summarize_recursively` is True, each summary is based on the previous summaries, adding more context to the + summarization process. The function returns a compiled summary of all chunks. + """ + + # check detail is set correctly + assert 0 <= detail <= 1 + + # interpolate the number of chunks based to get specified level of detail + max_chunks = len(chunk_on_delimiter(text, minimum_chunk_size, chunk_delimiter)) + min_chunks = 1 + num_chunks = int(min_chunks + detail * (max_chunks - min_chunks)) + + # adjust chunk_size based on interpolated number of chunks + # FIXME MAKE NOT OPENAI SPECIFIC + document_length = len(openai_tokenize(text)) + chunk_size = max(minimum_chunk_size, document_length // num_chunks) + text_chunks = chunk_on_delimiter(text, chunk_size, chunk_delimiter) + if verbose: + print(f"Splitting the text into {len(text_chunks)} chunks to be summarized.") + # FIXME MAKE NOT OPENAI SPECIFIC + print(f"Chunk lengths are {[len(openai_tokenize(x)) for x in text_chunks]}") + + # set system message + system_message_content = "Rewrite this text in summarized form." + if additional_instructions is not None: + system_message_content += f"\n\n{additional_instructions}" + + accumulated_summaries = [] + for chunk in tqdm(text_chunks): + if summarize_recursively and accumulated_summaries: + # Creating a structured prompt for recursive summarization + accumulated_summaries_string = '\n\n'.join(accumulated_summaries) + user_message_content = f"Previous summaries:\n\n{accumulated_summaries_string}\n\nText to summarize next:\n\n{chunk}" + else: + # Directly passing the chunk for summarization without recursive context + user_message_content = chunk + + # Constructing messages based on whether recursive summarization is applied + messages = [ + {"role": "system", "content": system_message_content}, + {"role": "user", "content": user_message_content} + ] + + # Assuming this function gets the completion and works as expected + response = get_chat_completion(messages, model=model) + accumulated_summaries.append(response) + + # Compile final summary from partial summaries + global final_summary + final_summary = '\n\n'.join(accumulated_summaries) + + return final_summary + +# +# +####################################################################################################################### + + +####################################################################################################################### +# Tokenization-related Techniques & Functions +# +# + +def openai_tokenize(text: str) -> List[str]: + encoding = tiktoken.encoding_for_model('gpt-4-turbo') + return encoding.encode(text) + +# openai summarize chunks + +# +# +####################################################################################################################### + + + +####################################################################################################################### +# Summarizers +# +# + + +def extract_text_from_segments(segments): + logging.debug(f"Main: extracting text from {segments}") + text = ' '.join([segment['text'] for segment in segments]) + logging.debug(f"Main: Successfully extracted text from {segments}") + return text + + +def summarize_with_openai(api_key, file_path, model, custom_prompt): + try: + logging.debug("openai: Loading json data for summarization") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug("openai: Extracting text from the segments") + text = extract_text_from_segments(segments) + + headers = { + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json' + } + + logging.debug(f"openai: API Key is: {api_key}") + logging.debug("openai: Preparing data + prompt for submittal") + openai_prompt = f"{text} \n\n\n\n{custom_prompt}" + data = { + "model": model, + "messages": [ + { + "role": "system", + "content": "You are a professional summarizer." + }, + { + "role": "user", + "content": openai_prompt + } + ], + "max_tokens": 4096, # Adjust tokens as needed + "temperature": 0.7 + } + logging.debug("openai: Posting request") + response = requests.post('https://api.openai.com/v1/chat/completions', headers=headers, json=data) + + if response.status_code == 200: + summary = response.json()['choices'][0]['message']['content'].strip() + logging.debug("openai: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.debug("openai: Summarization failed") + print("Failed to process summary:", response.text) + return None + except Exception as e: + logging.debug("openai: Error in processing: %s", str(e)) + print("Error occurred while processing summary with openai:", str(e)) + return None + + + +def summarize_with_claude(api_key, file_path, model, custom_prompt): + try: + logging.debug("anthropic: Loading JSON data") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug("anthropic: Extracting text from the segments file") + text = extract_text_from_segments(segments) + + headers = { + 'x-api-key': api_key, + 'anthropic-version': '2023-06-01', + 'Content-Type': 'application/json' + } + + anthropic_prompt = custom_prompt + logging.debug("anthropic: Prompt is {anthropic_prompt}") + user_message = { + "role": "user", + "content": f"{text} \n\n\n\n{anthropic_prompt}" + } + + data = { + "model": model, + "max_tokens": 4096, # max _possible_ tokens to return + "messages": [user_message], + "stop_sequences": ["\n\nHuman:"], + "temperature": 0.7, + "top_k": 0, + "top_p": 1.0, + "metadata": { + "user_id": "example_user_id", + }, + "stream": False, + "system": "You are a professional summarizer." + } + + logging.debug("anthropic: Posting request to API") + response = requests.post('https://api.anthropic.com/v1/messages', headers=headers, json=data) + + # Check if the status code indicates success + if response.status_code == 200: + logging.debug("anthropic: Post submittal successful") + response_data = response.json() + try: + summary = response_data['content'][0]['text'].strip() + logging.debug("anthropic: Summarization successful") + print("Summary processed successfully.") + return summary + except (IndexError, KeyError) as e: + logging.debug("anthropic: Unexpected data in response") + print("Unexpected response format from Claude API:", response.text) + return None + elif response.status_code == 500: # Handle internal server error specifically + logging.debug("anthropic: Internal server error") + print("Internal server error from API. Retrying may be necessary.") + return None + else: + logging.debug(f"anthropic: Failed to summarize, status code {response.status_code}: {response.text}") + print(f"Failed to process summary, status code {response.status_code}: {response.text}") + return None + + except Exception as e: + logging.debug("anthropic: Error in processing: %s", str(e)) + print("Error occurred while processing summary with anthropic:", str(e)) + return None + + +# Summarize with Cohere +def summarize_with_cohere(api_key, file_path, model, custom_prompt): + try: + logging.debug("cohere: Loading JSON data") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug(f"cohere: Extracting text from segments file") + text = extract_text_from_segments(segments) + + headers = { + 'accept': 'application/json', + 'content-type': 'application/json', + 'Authorization': f'Bearer {api_key}' + } + + cohere_prompt = f"{text} \n\n\n\n{custom_prompt}" + logging.debug("cohere: Prompt being sent is {cohere_prompt}") + + data = { + "chat_history": [ + {"role": "USER", "message": cohere_prompt} + ], + "message": "Please provide a summary.", + "model": model, + "connectors": [{"id": "web-search"}] + } + + logging.debug("cohere: Submitting request to API endpoint") + print("cohere: Submitting request to API endpoint") + response = requests.post('https://api.cohere.ai/v1/chat', headers=headers, json=data) + response_data = response.json() + logging.debug("API Response Data: %s", response_data) + + if response.status_code == 200: + if 'text' in response_data: + summary = response_data['text'].strip() + logging.debug("cohere: Summarization successful") + print("Summary processed successfully.") + return summary + else: + logging.error("Expected data not found in API response.") + return "Expected data not found in API response." + else: + logging.error(f"cohere: API request failed with status code {response.status_code}: {response.text}") + print(f"Failed to process summary, status code {response.status_code}: {response.text}") + return f"cohere: API request failed: {response.text}" + + except Exception as e: + logging.error("cohere: Error in processing: %s", str(e)) + return f"cohere: Error occurred while processing summary with Cohere: {str(e)}" + + +# https://console.groq.com/docs/quickstart +def summarize_with_groq(api_key, file_path, model, custom_prompt): + try: + logging.debug("groq: Loading JSON data") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug(f"groq: Extracting text from segments file") + text = extract_text_from_segments(segments) + + headers = { + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json' + } + + groq_prompt = f"{text} \n\n\n\n{custom_prompt}" + logging.debug("groq: Prompt being sent is {groq_prompt}") + + data = { + "messages": [ + { + "role": "user", + "content": groq_prompt + } + ], + "model": model + } + + logging.debug("groq: Submitting request to API endpoint") + print("groq: Submitting request to API endpoint") + response = requests.post('https://api.groq.com/openai/v1/chat/completions', headers=headers, json=data) + + response_data = response.json() + logging.debug("API Response Data: %s", response_data) + + if response.status_code == 200: + if 'choices' in response_data and len(response_data['choices']) > 0: + summary = response_data['choices'][0]['message']['content'].strip() + logging.debug("groq: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.error("Expected data not found in API response.") + return "Expected data not found in API response." + else: + logging.error(f"groq: API request failed with status code {response.status_code}: {response.text}") + return f"groq: API request failed: {response.text}" + + except Exception as e: + logging.error("groq: Error in processing: %s", str(e)) + return f"groq: Error occurred while processing summary with groq: {str(e)}" + + +################################# +# +# Local Summarization + +def summarize_with_llama(api_url, file_path, token, custom_prompt): + try: + logging.debug("llama: Loading JSON data") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug(f"llama: Extracting text from segments file") + text = extract_text_from_segments(segments) # Define this function to extract text properly + + headers = { + 'accept': 'application/json', + 'content-type': 'application/json', + } + if len(token) > 5: + headers['Authorization'] = f'Bearer {token}' + + llama_prompt = f"{text} \n\n\n\n{custom_prompt}" + logging.debug("llama: Prompt being sent is {llama_prompt}") + + data = { + "prompt": llama_prompt + } + + logging.debug("llama: Submitting request to API endpoint") + print("llama: Submitting request to API endpoint") + response = requests.post(api_url, headers=headers, json=data) + response_data = response.json() + logging.debug("API Response Data: %s", response_data) + + if response.status_code == 200: + # if 'X' in response_data: + logging.debug(response_data) + summary = response_data['content'].strip() + logging.debug("llama: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.error(f"llama: API request failed with status code {response.status_code}: {response.text}") + return f"llama: API request failed: {response.text}" + + except Exception as e: + logging.error("llama: Error in processing: %s", str(e)) + return f"llama: Error occurred while processing summary with llama: {str(e)}" + + +# https://lite.koboldai.net/koboldcpp_api#/api%2Fv1/post_api_v1_generate +def summarize_with_kobold(api_url, file_path, kobold_api_token, custom_prompt): + try: + logging.debug("kobold: Loading JSON data") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug(f"kobold: Extracting text from segments file") + text = extract_text_from_segments(segments) + + headers = { + 'accept': 'application/json', + 'content-type': 'application/json', + } + + kobold_prompt = f"{text} \n\n\n\n{custom_prompt}" + logging.debug("kobold: Prompt being sent is {kobold_prompt}") + + # FIXME + # Values literally c/p from the api docs.... + data = { + "max_context_length": 8096, + "max_length": 4096, + "prompt": kobold_prompt, + } + + logging.debug("kobold: Submitting request to API endpoint") + print("kobold: Submitting request to API endpoint") + response = requests.post(api_url, headers=headers, json=data) + response_data = response.json() + logging.debug("kobold: API Response Data: %s", response_data) + + if response.status_code == 200: + if 'results' in response_data and len(response_data['results']) > 0: + summary = response_data['results'][0]['text'].strip() + logging.debug("kobold: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.error("Expected data not found in API response.") + return "Expected data not found in API response." + else: + logging.error(f"kobold: API request failed with status code {response.status_code}: {response.text}") + return f"kobold: API request failed: {response.text}" + + except Exception as e: + logging.error("kobold: Error in processing: %s", str(e)) + return f"kobold: Error occurred while processing summary with kobold: {str(e)}" + + +# https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API +def summarize_with_oobabooga(api_url, file_path, ooba_api_token, custom_prompt): + try: + logging.debug("ooba: Loading JSON data") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug(f"ooba: Extracting text from segments file\n\n\n") + text = extract_text_from_segments(segments) + logging.debug(f"ooba: Finished extracting text from segments file") + + headers = { + 'accept': 'application/json', + 'content-type': 'application/json', + } + + # prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a French bakery baking cakes. It + # is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are + # my favorite." prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable + ooba_prompt = "{text}\n\n\n\n{custom_prompt}" + logging.debug("ooba: Prompt being sent is {ooba_prompt}") + + data = { + "mode": "chat", + "character": "Example", + "messages": [{"role": "user", "content": ooba_prompt}] + } + + logging.debug("ooba: Submitting request to API endpoint") + print("ooba: Submitting request to API endpoint") + response = requests.post(api_url, headers=headers, json=data, verify=False) + logging.debug("ooba: API Response Data: %s", response) + + if response.status_code == 200: + response_data = response.json() + summary = response.json()['choices'][0]['message']['content'] + logging.debug("ooba: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.error(f"oobabooga: API request failed with status code {response.status_code}: {response.text}") + return f"ooba: API request failed with status code {response.status_code}: {response.text}" + + except Exception as e: + logging.error("ooba: Error in processing: %s", str(e)) + return f"ooba: Error occurred while processing summary with oobabooga: {str(e)}" + + +def save_summary_to_file(summary, file_path): + logging.debug("Now saving summary to file...") + summary_file_path = file_path.replace('.segments.json', '_summary.txt') + logging.debug("Opening summary file for writing, *segments.json with *_summary.txt") + with open(summary_file_path, 'w') as file: + file.write(summary) + logging.info(f"Summary saved to file: {summary_file_path}") + + +# +# +####################################################################################################################### + + +####################################################################################################################### +# Summarization with Detail +# + +def summarize_with_detail_openai(text, detail, verbose=False): + # FIXME MAKE function not specific to the artifiical intelligence example + summary_with_detail_variable = rolling_summarize(text, detail=detail, verbose=True) + print(len(openai_tokenize(summary_with_detail_variable))) + return summary_with_detail_variable + + +def summarize_with_detail_recursive_openai(text, detail, verbose=False): + summary_with_recursive_summarization = rolling_summarize(text, detail=detail, summarize_recursively=True) + print(summary_with_recursive_summarization) + +# +# +####################################################################################################################### + + +####################################################################################################################### +# Gradio UI +# + +# Only to be used when configured with Gradio for HF Space +def summarize_with_huggingface(api_key, file_path, custom_prompt): + logging.debug(f"huggingface: Summarization process starting...") + try: + logging.debug("huggingface: Loading json data for summarization") + with open(file_path, 'r') as file: + segments = json.load(file) + + logging.debug("huggingface: Extracting text from the segments") + logging.debug(f"huggingface: Segments: {segments}") + text = ' '.join([segment['text'] for segment in segments]) + + print(f"huggingface: lets make sure the HF api key exists...\n\t {api_key}") + headers = { + "Authorization": f"Bearer {api_key}" + } + + model = "microsoft/Phi-3-mini-128k-instruct" + API_URL = f"https://api-inference.huggingface.co/models/{model}" + + huggingface_prompt = f"{text}\n\n\n\n{custom_prompt}" + logging.debug("huggingface: Prompt being sent is {huggingface_prompt}") + data = { + "inputs": text, + "parameters": {"max_length": 512, "min_length": 100} # You can adjust max_length and min_length as needed + } + + print(f"huggingface: lets make sure the HF api key is the same..\n\t {huggingface_api_key}") + + logging.debug("huggingface: Submitting request...") + + response = requests.post(API_URL, headers=headers, json=data) + + if response.status_code == 200: + summary = response.json()[0]['summary_text'] + logging.debug("huggingface: Summarization successful") + print("Summarization successful.") + return summary + else: + logging.error(f"huggingface: Summarization failed with status code {response.status_code}: {response.text}") + return f"Failed to process summary, status code {response.status_code}: {response.text}" + except Exception as e: + logging.error("huggingface: Error in processing: %s", str(e)) + print(f"Error occurred while processing summary with huggingface: {str(e)}") + return None + + # FIXME + # This is here for gradio authentication + # Its just not setup. + #def same_auth(username, password): + # return username == password + + +def format_transcription(transcription_result): + if transcription_result: + json_data = transcription_result['transcription'] + return json.dumps(json_data, indent=2) + else: + return "" + + + +def format_file_path(file_path, fallback_path=None): + if file_path and os.path.exists(file_path): + logging.debug(f"File exists: {file_path}") + return file_path + elif fallback_path and os.path.exists(fallback_path): + logging.debug(f"File does not exist: {file_path}. Returning fallback path: {fallback_path}") + return fallback_path + else: + logging.debug(f"File does not exist: {file_path}. No fallback path available.") + return None + + +def launch_ui(demo_mode=False): + whisper_models = ["small.en", "medium.en", "large"] + + with gr.Blocks() as iface: + with gr.Tab("Audio Transcription + Summarization"): + with gr.Row(): + # Light/Dark mode toggle switch + theme_toggle = gr.Radio(choices=["Light", "Dark"], value="Light", + label="Light/Dark Mode Toggle (Toggle to change UI color scheme)") + + # UI Mode toggle switch + ui_mode_toggle = gr.Radio(choices=["Simple", "Advanced"], value="Simple", + label="UI Mode (Toggle to show all options)") + + # URL input is always visible + url_input = gr.Textbox(label="URL (Mandatory)", placeholder="Enter the video URL here") + + # Inputs to be shown or hidden + num_speakers_input = gr.Number(value=2, label="Number of Speakers(Optional - Currently has no effect)", + visible=True) + whisper_model_input = gr.Dropdown(choices=whisper_models, value="small.en", + label="Whisper Model(This is the ML model used for transcription.)", + visible=True) + custom_prompt_input = gr.Textbox( + label="Custom Prompt (Customize your summarization, or ask a question about the video and have it " + "answered)", + placeholder="Above is the transcript of a video. Please read " + "through the transcript carefully. Identify the main topics that are discussed over the " + "course of the transcript. Then, summarize the key points about each main topic in a " + "concise bullet point. The bullet points should cover the key information conveyed about " + "each topic in the video, but should be much shorter than the full transcript. Please " + "output your bullet point summary inside tags.", + lines=3, visible=True) + offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)", + visible=True) + api_name_input = gr.Dropdown( + choices=[None,"huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], + value=None, + label="API Name (Mandatory Unless you just want a Transcription)", visible=True) + api_key_input = gr.Textbox(label="API Key (Mandatory if API Name is specified)", + placeholder="Enter your API key here; Ignore if using Local API or Built-in API", visible=True) + vad_filter_input = gr.Checkbox(label="VAD Filter (WIP)", value=False, visible=True) + download_video_input = gr.Checkbox( + label="Download Video(Select to allow for file download of selected video)", value=False, visible=True) + download_audio_input = gr.Checkbox( + label="Download Audio(Select to allow for file download of selected Video's Audio)", value=False, + visible=True) + # FIXME - Hide unless advance menu shown + detail_level_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=True, + label="Summary Detail Level (Slide me) (WIP)", visible=True) + + inputs = [num_speakers_input, whisper_model_input, custom_prompt_input, offset_input, api_name_input, + api_key_input, vad_filter_input, download_video_input, download_audio_input, detail_level_input] + + # Function to toggle Light/Dark Mode + def toggle_light(mode): + dark = (mode == "Dark") + return {"__theme": "dark" if dark else "light"} + + # Set the event listener for the Light/Dark mode toggle switch + theme_toggle.change(fn=toggle_light, inputs=theme_toggle, outputs=None) + + # Function to toggle visibility of advanced inputs + def toggle_ui(mode): + visible = (mode == "Advanced") + return [visible] * len(inputs) + + # Set the event listener for the UI Mode toggle switch + ui_mode_toggle.change(fn=toggle_ui, inputs=ui_mode_toggle, outputs=inputs) + + # Combine URL input and inputs + all_inputs = [url_input] + inputs + + outputs = [ + gr.Textbox(label="Transcription (Resulting Transcription from your input URL)"), + gr.Textbox(label="Summary or Status Message (Current status of Summary or Summary itself)"), + gr.File(label="Download Transcription as JSON (Download the Transcription as a file)"), + gr.File(label="Download Summary as Text (Download the Summary as a file)"), + gr.File(label="Download Video (Download the Video as a file)"), + gr.File(label="Download Audio (Download the Audio as a file)") + ] + + gr.Interface( + fn=process_url, + inputs=all_inputs, + outputs=outputs, + title="Video Transcription and Summarization", + description="Submit a video URL for transcription and summarization. Ensure you input all necessary " + "information including API keys." + ) + + with gr.Tab("Transcription & Summarization History"): + gr.Markdown("Plan to put access to SQLite DB here") + gr.Markdown("Allow for searching/retrieval/re-prompting of previous transcriptions") + gr.Markdown("Also allow for re-transcribing videos if they're still online, while updating/adding to prior entry") + gr.Markdown("RAG here we come....:/") + + with gr.Accordion("Open for More!", open=False): + gr.Markdown("Plan to put Prompt Samples/Templates down here") + + iface.launch(share=False) + + +# +# +####################################################################################################################### + + +####################################################################################################################### +# Main() +# +def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, + download_video_flag=False, demo_mode=False, custom_prompt=None, overwrite=False, + rolling_summarization=None, detail=0.01): + global summary, audio_file + if input_path is None and args.user_interface: + return [] + start_time = time.monotonic() + paths = [] # Initialize paths as an empty list + if os.path.isfile(input_path) and input_path.endswith('.txt'): + logging.debug("MAIN: User passed in a text file, processing text file...") + paths = read_paths_from_file(input_path) + elif os.path.exists(input_path): + logging.debug("MAIN: Local file path detected") + paths = [input_path] + elif (info_dict := get_youtube(input_path)) and 'entries' in info_dict: + logging.debug("MAIN: YouTube playlist detected") + print( + "\n\nSorry, but playlists aren't currently supported. You can run the following command to generate a " + "text file that you can then pass into this script though! (It may not work... playlist support seems " + "spotty)" + """\n\n\tpython Get_Playlist_URLs.py \n\n\tThen,\n\n\tpython + diarizer.py \n\n""") + return + else: + paths = [input_path] + results = [] + + for path in paths: + try: + if path.startswith('http'): + logging.debug("MAIN: URL Detected") + info_dict = get_youtube(path) + json_file_path = None + if info_dict: + logging.debug("MAIN: Creating path for video file...") + download_path = create_download_directory(info_dict['title']) + logging.debug("MAIN: Path created successfully\n MAIN: Now Downloading video from yt_dlp...") + try: + video_path = download_video(path, download_path, info_dict, download_video_flag) + except RuntimeError as e: + logging.error(f"Error downloading video: {str(e)}") + #FIXME - figure something out for handling this situation.... + continue + logging.debug("MAIN: Video downloaded successfully") + logging.debug("MAIN: Converting video file to WAV...") + audio_file = convert_to_wav(video_path, offset) + logging.debug("MAIN: Audio file converted successfully") + else: + if os.path.exists(path): + logging.debug("MAIN: Local file path detected") + download_path, info_dict, audio_file = process_local_file(path) + else: + logging.error(f"File does not exist: {path}") + continue + + if info_dict: + logging.debug("MAIN: Creating transcription file from WAV") + segments = speech_to_text(audio_file, whisper_model=whisper_model, vad_filter=vad_filter) + transcription_result = { + 'video_path': path, + 'audio_file': audio_file, + 'transcription': segments + } + results.append(transcription_result) + logging.info(f"MAIN: Transcription complete: {audio_file}") + + # Perform rolling summarization based on API Name, detail level, and if an API key exists + # Will remove the API key once rolling is added for llama.cpp + if rolling_summarization: + logging.info("MAIN: Rolling Summarization") + + # Extract the text from the segments + text = extract_text_from_segments(segments) + + # Set the json_file_path + json_file_path = audio_file.replace('.wav', '.segments.json') + + # Perform rolling summarization + summary = summarize_with_detail_openai(text, detail=args.detail_level, verbose=False) + + # Handle the summarized output + if summary: + transcription_result['summary'] = summary + logging.info("MAIN: Rolling Summarization successful.") + save_summary_to_file(summary, json_file_path) + else: + logging.warning("MAIN: Rolling Summarization failed.") + + # if api_name and api_key: + # logging.debug(f"MAIN: Rolling summarization being performed by {api_name}") + # json_file_path = audio_file.replace('.wav', '.segments.json') + # if api_name.lower() == 'openai': + # openai_api_key = api_key if api_key else config.get('API', 'openai_api_key', + # fallback=None) + # try: + # logging.debug(f"MAIN: trying to summarize with openAI") + # summary = (openai_api_key, json_file_path, openai_model, custom_prompt) + # except requests.exceptions.ConnectionError: + # requests.status_code = "Connection: " + # Perform summarization based on the specified API + elif api_name: + logging.debug(f"MAIN: Summarization being performed by {api_name}") + json_file_path = audio_file.replace('.wav', '.segments.json') + if api_name.lower() == 'openai': + openai_api_key = api_key if api_key else config.get('API', 'openai_api_key', + fallback=None) + try: + logging.debug(f"MAIN: trying to summarize with openAI") + summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "anthropic": + anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', + fallback=None) + try: + logging.debug(f"MAIN: Trying to summarize with anthropic") + summary = summarize_with_claude(anthropic_api_key, json_file_path, anthropic_model, + custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "cohere": + cohere_api_key = api_key if api_key else config.get('API', 'cohere_api_key', fallback=None) + try: + logging.debug(f"MAIN: Trying to summarize with cohere") + summary = summarize_with_cohere(cohere_api_key, json_file_path, cohere_model, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "groq": + groq_api_key = api_key if api_key else config.get('API', 'groq_api_key', fallback=None) + try: + logging.debug(f"MAIN: Trying to summarize with Groq") + summary = summarize_with_groq(groq_api_key, json_file_path, groq_model, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "llama": + llama_token = api_key if api_key else config.get('API', 'llama_api_key', fallback=None) + llama_ip = llama_api_IP + try: + logging.debug(f"MAIN: Trying to summarize with Llama.cpp") + summary = summarize_with_llama(llama_ip, json_file_path, llama_token, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "kobold": + kobold_token = api_key if api_key else config.get('API', 'kobold_api_key', fallback=None) + kobold_ip = kobold_api_IP + try: + logging.debug(f"MAIN: Trying to summarize with kobold.cpp") + summary = summarize_with_kobold(kobold_ip, json_file_path, kobold_token, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "ooba": + ooba_token = api_key if api_key else config.get('API', 'ooba_api_key', fallback=None) + ooba_ip = ooba_api_IP + try: + logging.debug(f"MAIN: Trying to summarize with oobabooga") + summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + elif api_name.lower() == "huggingface": + huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', + fallback=None) + try: + logging.debug(f"MAIN: Trying to summarize with huggingface") + summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt) + except requests.exceptions.ConnectionError: + requests.status_code = "Connection: " + + else: + logging.warning(f"Unsupported API: {api_name}") + summary = None + + if summary: + transcription_result['summary'] = summary + logging.info(f"Summary generated using {api_name} API") + save_summary_to_file(summary, json_file_path) + elif final_summary: + logging.info(f"Rolling summary generated using {api_name} API") + logging.info(f"Final Rolling summary is {final_summary}\n\n") + save_summary_to_file(final_summary, json_file_path) + else: + logging.warning(f"Failed to generate summary using {api_name} API") + else: + logging.info("MAIN: #2 - No API specified. Summarization will not be performed") + except Exception as e: + logging.error(f"Error processing path: {path}") + logging.error(str(e)) + continue + #end_time = time.monotonic() + # print("Total program execution time: " + timedelta(seconds=end_time - start_time)) + + return results + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Transcribe and summarize videos.') + parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?') + parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio') + parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)') + parser.add_argument('-key', '--api_key', type=str, help='API key for summarization (optional)') + parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)') + parser.add_argument('-wm', '--whisper_model', type=str, default='small.en', + help='Whisper model (default: small.en)') + parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)') + parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter') + parser.add_argument('-log', '--log_level', type=str, default='INFO', + choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)') + parser.add_argument('-ui', '--user_interface', action='store_true', help="Launch the Gradio user interface") + parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode') + parser.add_argument('-prompt', '--custom_prompt', type=str, + help='Pass in a custom prompt to be used in place of the existing one.\n (Probably should just ' + 'modify the script itself...)') + parser.add_argument('-overwrite', '--overwrite', action='store_true', help='Overwrite existing files') + parser.add_argument('-roll', '--rolling_summarization', action='store_true', help='Enable rolling summarization') + parser.add_argument('-detail', '--detail_level', type=float, help='Mandatory if rolling summarization is enabled, ' + 'defines the chunk size.\n Default is 0.01(lots ' + 'of chunks) -> 1.00 (few chunks)\n Currently ' + 'only OpenAI works. ', + default=0.01,) + # parser.add_argument('-o', '--output_path', type=str, help='Path to save the output file') + # parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)') + args = parser.parse_args() + + logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s') + + custom_prompt = args.custom_prompt + + if custom_prompt == "": + logging.debug(f"Custom prompt defined, will use \n\nf{custom_prompt} \n\nas the prompt") + print(f"Custom Prompt has been defined. Custom prompt: \n\n {args.custom_prompt}") + else: + logging.debug("No custom prompt defined, will use default") + args.custom_prompt = ("\n\nabove is the transcript of a video " + "Please read through the transcript carefully. Identify the main topics that are " + "discussed over the course of the transcript. Then, summarize the key points about each " + "main topic in a concise bullet point. The bullet points should cover the key " + "information conveyed about each topic in the video, but should be much shorter than " + "the full transcript. Please output your bullet point summary inside " + "tags.") + print("No custom prompt defined, will use default") + + if args.user_interface: + launch_ui(demo_mode=False) + else: + if not args.input_path: + parser.print_help() + sys.exit(1) + + logging.info('Starting the transcription and summarization process.') + logging.info(f'Input path: {args.input_path}') + logging.info(f'API Name: {args.api_name}') + logging.info(f'Number of speakers: {args.num_speakers}') + logging.info(f'Whisper model: {args.whisper_model}') + logging.info(f'Offset: {args.offset}') + logging.info(f'VAD filter: {args.vad_filter}') + logging.info(f'Log Level: {args.log_level}') # lol + logging.info(f'Demo Mode: {args.demo_mode}') + logging.info(f'Custom Prompt: {args.custom_prompt}') + logging.info(f'Overwrite: {args.overwrite}') + logging.info(f'Rolling Summarization: {args.rolling_summarization}') + logging.info(f'User Interface: {args.user_interface}') + logging.info(f'Video Download: {args.video}') + # logging.info(f'Save File location: {args.output_path}') + # logging.info(f'Log File location: {args.log_file}') + + # Get all API keys from the config + api_keys = {key: value for key, value in config.items('API') if key.endswith('_api_key')} + + api_name = args.api_name + + # Rolling Summarization will only be performed if an API is specified and the API key is available + # and the rolling summarization flag is set + # + summary = None # Initialize to ensure it's always defined + if args.api_name and args.rolling_summarization and any( + key.startswith(args.api_name) and value is not None for key, value in api_keys.items()): + logging.info(f'MAIN: API used: {args.api_name}') + logging.info('MAIN: Rolling Summarization will be performed.') + + elif args.api_name: + logging.info(f'MAIN: API used: {args.api_name}') + logging.info('MAIN: Summarization (not rolling) will be performed.') + + else: + logging.info('No API specified. Summarization will not be performed.') + + logging.debug("Platform check being performed...") + platform_check() + logging.debug("CUDA check being performed...") + cuda_check() + logging.debug("ffmpeg check being performed...") + check_ffmpeg() + + try: + results = main(args.input_path, api_name=args.api_name, api_key=args.api_key, + num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset, + vad_filter=args.vad_filter, download_video_flag=args.video, overwrite=args.overwrite, + rolling_summarization=args.rolling_summarization, custom_prompt=args.custom_prompt, + demo_mode=args.demo_mode, detail=args.detail_level) + logging.info('Transcription process completed.') + except Exception as e: + logging.error('An error occurred during the transcription process.') + logging.error(str(e)) + sys.exit(1) diff --git a/tldw-original-scripts/README b/tldw-original-scripts/README new file mode 100644 index 00000000..c3217997 --- /dev/null +++ b/tldw-original-scripts/README @@ -0,0 +1,4 @@ +The files contained in this folder are under whole ownership of the-crypt-keeper. +I claim no ownership or copyright over these files. + +They are here for reproduction purposes and attribution of original authorship. \ No newline at end of file diff --git a/tldw-original-scripts/README.md b/tldw-original-scripts/README.md new file mode 100644 index 00000000..6434913d --- /dev/null +++ b/tldw-original-scripts/README.md @@ -0,0 +1,19 @@ +# Too Long, Didnt Watch + +YouTube contains an incredible amount of knowledge, much of which is locked inside multi-hour videos. Let's extract and summarize with AI! + +- `diarize.py` - download, transrcibe and diarize audio + - [yt-dlp](https://github.com/yt-dlp/yt-dlp) - download audio tracks of youtube videos + - [ffmpeg](https://github.com/FFmpeg/FFmpeg) - decompress audio + - [faster_whisper](https://github.com/SYSTRAN/faster-whisper) - speech to text + - [pyannote](https://github.com/pyannote/pyannote-audio) - diarization + +- `chunker.py` - break text into parts and prepare each part for LLM summarization + +- `roller-*.py` - rolling summarization + - [can-ai-code](https://github.com/the-crypt-keeper/can-ai-code) - interview executors to run LLM inference + +- `compare.py` - prepare LLM outputs for webapp +- `compare-app.py` - summary viewer webapp + +This project is under active development and is not ready for production use. diff --git a/chunker.py b/tldw-original-scripts/chunker.py old mode 100755 new mode 100644 similarity index 100% rename from chunker.py rename to tldw-original-scripts/chunker.py diff --git a/compare-app.py b/tldw-original-scripts/compare-app.py old mode 100755 new mode 100644 similarity index 100% rename from compare-app.py rename to tldw-original-scripts/compare-app.py diff --git a/compare.py b/tldw-original-scripts/compare.py old mode 100755 new mode 100644 similarity index 100% rename from compare.py rename to tldw-original-scripts/compare.py diff --git a/compare/aoe-english.json b/tldw-original-scripts/compare/aoe-english.json similarity index 100% rename from compare/aoe-english.json rename to tldw-original-scripts/compare/aoe-english.json diff --git a/compare/aoe.yaml b/tldw-original-scripts/compare/aoe.yaml similarity index 100% rename from compare/aoe.yaml rename to tldw-original-scripts/compare/aoe.yaml diff --git a/compare/ufo-13b-english.json b/tldw-original-scripts/compare/ufo-13b-english.json similarity index 100% rename from compare/ufo-13b-english.json rename to tldw-original-scripts/compare/ufo-13b-english.json diff --git a/compare/ufo-13b.yaml b/tldw-original-scripts/compare/ufo-13b.yaml similarity index 100% rename from compare/ufo-13b.yaml rename to tldw-original-scripts/compare/ufo-13b.yaml diff --git a/compare/ufo-english.json b/tldw-original-scripts/compare/ufo-english.json similarity index 100% rename from compare/ufo-english.json rename to tldw-original-scripts/compare/ufo-english.json diff --git a/compare/ufo.yaml b/tldw-original-scripts/compare/ufo.yaml similarity index 100% rename from compare/ufo.yaml rename to tldw-original-scripts/compare/ufo.yaml diff --git a/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].diarize.json b/tldw-original-scripts/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].diarize.json similarity index 100% rename from data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].diarize.json rename to tldw-original-scripts/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].diarize.json diff --git a/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].info.json b/tldw-original-scripts/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].info.json similarity index 100% rename from data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].info.json rename to tldw-original-scripts/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].info.json diff --git a/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].summary.json b/tldw-original-scripts/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].summary.json similarity index 100% rename from data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].summary.json rename to tldw-original-scripts/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].summary.json diff --git "a/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].diarize.json" "b/tldw-original-scripts/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].diarize.json" similarity index 100% rename from "data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].diarize.json" rename to "tldw-original-scripts/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].diarize.json" diff --git "a/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].info.json" "b/tldw-original-scripts/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].info.json" similarity index 100% rename from "data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].info.json" rename to "tldw-original-scripts/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].info.json" diff --git "a/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].summary.json" "b/tldw-original-scripts/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].summary.json" similarity index 100% rename from "data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].summary.json" rename to "tldw-original-scripts/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].summary.json" diff --git a/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].diarize.json b/tldw-original-scripts/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].diarize.json similarity index 100% rename from data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].diarize.json rename to tldw-original-scripts/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].diarize.json diff --git a/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].info.json b/tldw-original-scripts/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].info.json similarity index 100% rename from data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].info.json rename to tldw-original-scripts/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].info.json diff --git "a/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].diarize.json" "b/tldw-original-scripts/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].diarize.json" similarity index 100% rename from "data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].diarize.json" rename to "tldw-original-scripts/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].diarize.json" diff --git "a/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].info.json" "b/tldw-original-scripts/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].info.json" similarity index 100% rename from "data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].info.json" rename to "tldw-original-scripts/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].info.json" diff --git "a/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].summary.json" "b/tldw-original-scripts/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].summary.json" similarity index 100% rename from "data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].summary.json" rename to "tldw-original-scripts/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].summary.json" diff --git a/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].diarize.json b/tldw-original-scripts/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].diarize.json similarity index 100% rename from data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].diarize.json rename to tldw-original-scripts/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].diarize.json diff --git a/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].info.json b/tldw-original-scripts/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].info.json similarity index 100% rename from data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].info.json rename to tldw-original-scripts/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].info.json diff --git a/diarize.py b/tldw-original-scripts/diarize.py similarity index 100% rename from diarize.py rename to tldw-original-scripts/diarize.py diff --git a/merger.py b/tldw-original-scripts/merger.py similarity index 100% rename from merger.py rename to tldw-original-scripts/merger.py diff --git a/params/summary.json b/tldw-original-scripts/params/summary.json similarity index 100% rename from params/summary.json rename to tldw-original-scripts/params/summary.json diff --git a/prompts/airoboros-l2-context.txt b/tldw-original-scripts/prompts/airoboros-l2-context.txt similarity index 100% rename from prompts/airoboros-l2-context.txt rename to tldw-original-scripts/prompts/airoboros-l2-context.txt diff --git a/pyannote.py b/tldw-original-scripts/pyannote.py similarity index 100% rename from pyannote.py rename to tldw-original-scripts/pyannote.py diff --git a/results/interview_aoe-small-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017407.ndjson b/tldw-original-scripts/results/interview_aoe-small-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017407.ndjson similarity index 100% rename from results/interview_aoe-small-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017407.ndjson rename to tldw-original-scripts/results/interview_aoe-small-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017407.ndjson diff --git a/results/interview_elon-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017434.ndjson b/tldw-original-scripts/results/interview_elon-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017434.ndjson similarity index 100% rename from results/interview_elon-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017434.ndjson rename to tldw-original-scripts/results/interview_elon-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017434.ndjson diff --git a/results/interview_lex-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017487.ndjson b/tldw-original-scripts/results/interview_lex-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017487.ndjson similarity index 100% rename from results/interview_lex-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017487.ndjson rename to tldw-original-scripts/results/interview_lex-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017487.ndjson diff --git a/results/interview_ufo-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017596.ndjson b/tldw-original-scripts/results/interview_ufo-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017596.ndjson similarity index 100% rename from results/interview_ufo-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017596.ndjson rename to tldw-original-scripts/results/interview_ufo-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017596.ndjson diff --git a/results/prepare_airoboros-l2-context-300_english_airoboros-l2-context.ndjson b/tldw-original-scripts/results/prepare_airoboros-l2-context-300_english_airoboros-l2-context.ndjson similarity index 100% rename from results/prepare_airoboros-l2-context-300_english_airoboros-l2-context.ndjson rename to tldw-original-scripts/results/prepare_airoboros-l2-context-300_english_airoboros-l2-context.ndjson diff --git a/results/prepare_aoe-small-300_english_airoboros-l2-context.ndjson b/tldw-original-scripts/results/prepare_aoe-small-300_english_airoboros-l2-context.ndjson similarity index 100% rename from results/prepare_aoe-small-300_english_airoboros-l2-context.ndjson rename to tldw-original-scripts/results/prepare_aoe-small-300_english_airoboros-l2-context.ndjson diff --git a/results/prepare_elon-300_english_airoboros-l2-context.ndjson b/tldw-original-scripts/results/prepare_elon-300_english_airoboros-l2-context.ndjson similarity index 100% rename from results/prepare_elon-300_english_airoboros-l2-context.ndjson rename to tldw-original-scripts/results/prepare_elon-300_english_airoboros-l2-context.ndjson diff --git a/results/prepare_lex-300_english_airoboros-l2-context.ndjson b/tldw-original-scripts/results/prepare_lex-300_english_airoboros-l2-context.ndjson similarity index 100% rename from results/prepare_lex-300_english_airoboros-l2-context.ndjson rename to tldw-original-scripts/results/prepare_lex-300_english_airoboros-l2-context.ndjson diff --git a/results/prepare_ufo-300_english_airoboros-l2-context.ndjson b/tldw-original-scripts/results/prepare_ufo-300_english_airoboros-l2-context.ndjson similarity index 100% rename from results/prepare_ufo-300_english_airoboros-l2-context.ndjson rename to tldw-original-scripts/results/prepare_ufo-300_english_airoboros-l2-context.ndjson diff --git a/roller-chatgpt-v2.py b/tldw-original-scripts/roller-chatgpt-v2.py similarity index 100% rename from roller-chatgpt-v2.py rename to tldw-original-scripts/roller-chatgpt-v2.py diff --git a/roller-chatgpt.py b/tldw-original-scripts/roller-chatgpt.py old mode 100755 new mode 100644 similarity index 100% rename from roller-chatgpt.py rename to tldw-original-scripts/roller-chatgpt.py diff --git a/roller-exllama.py b/tldw-original-scripts/roller-exllama.py similarity index 100% rename from roller-exllama.py rename to tldw-original-scripts/roller-exllama.py diff --git a/roller-vllm.py b/tldw-original-scripts/roller-vllm.py similarity index 100% rename from roller-vllm.py rename to tldw-original-scripts/roller-vllm.py