Files
miku-discord/stt-parakeet/setup_env.sh

182 lines
5.0 KiB
Bash
Executable File

#!/bin/bash
# Setup environment for Parakeet ASR with ONNX Runtime
set -e
echo "=========================================="
echo "Parakeet ASR Setup with onnx-asr"
echo "=========================================="
echo ""
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Detect best Python version (3.10-3.12 for GPU support)
echo "Detecting Python version..."
PYTHON_CMD=""
for py_ver in python3.12 python3.11 python3.10; do
if command -v $py_ver &> /dev/null; then
PYTHON_CMD=$py_ver
break
fi
done
if [ -z "$PYTHON_CMD" ]; then
# Fallback to default python3
PYTHON_CMD=python3
fi
PYTHON_VERSION=$($PYTHON_CMD --version 2>&1 | awk '{print $2}')
echo "Using Python: $PYTHON_CMD ($PYTHON_VERSION)"
# Check if virtual environment exists
if [ ! -d "venv" ]; then
echo ""
echo "Creating virtual environment with $PYTHON_CMD..."
$PYTHON_CMD -m venv venv
echo -e "${GREEN}✓ Virtual environment created${NC}"
else
echo -e "${YELLOW}Virtual environment already exists${NC}"
fi
# Activate virtual environment
echo ""
echo "Activating virtual environment..."
source venv/bin/activate
# Upgrade pip
echo ""
echo "Upgrading pip..."
pip install --upgrade pip
# Check CUDA
echo ""
echo "Checking CUDA installation..."
if command -v nvcc &> /dev/null; then
CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $5}' | cut -c2-)
echo -e "${GREEN}✓ CUDA found: $CUDA_VERSION${NC}"
else
echo -e "${YELLOW}⚠ CUDA compiler (nvcc) not found${NC}"
echo " If you have a GPU, make sure CUDA is installed:"
echo " https://developer.nvidia.com/cuda-downloads"
fi
# Check NVIDIA GPU
echo ""
echo "Checking NVIDIA GPU..."
if command -v nvidia-smi &> /dev/null; then
echo -e "${GREEN}✓ NVIDIA GPU detected${NC}"
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader | while read line; do
echo " $line"
done
else
echo -e "${YELLOW}⚠ nvidia-smi not found${NC}"
echo " Make sure NVIDIA drivers are installed if you have a GPU"
fi
# Install dependencies
echo ""
echo "=========================================="
echo "Installing Python dependencies..."
echo "=========================================="
echo ""
# Check Python version for GPU support
PYTHON_MAJOR=$(python3 -c 'import sys; print(sys.version_info.major)')
PYTHON_MINOR=$(python3 -c 'import sys; print(sys.version_info.minor)')
if [ "$PYTHON_MAJOR" -eq 3 ] && [ "$PYTHON_MINOR" -ge 13 ]; then
echo -e "${YELLOW}⚠ Python 3.13+ detected${NC}"
echo " onnxruntime-gpu is not yet available for Python 3.13+"
echo " Installing CPU version of onnxruntime..."
echo " For GPU support, please use Python 3.10-3.12"
USE_GPU=false
else
echo "Python version supports GPU acceleration"
USE_GPU=true
fi
# Install onnx-asr
echo ""
if [ "$USE_GPU" = true ]; then
echo "Installing onnx-asr with GPU support..."
pip install "onnx-asr[gpu,hub]"
else
echo "Installing onnx-asr (CPU version)..."
pip install "onnx-asr[hub]" onnxruntime
fi
# Install other dependencies
echo ""
echo "Installing additional dependencies..."
pip install numpy\<2.0 websockets sounddevice soundfile
# Optional: Install TensorRT (if available)
echo ""
read -p "Do you want to install TensorRT for faster inference? (y/n) " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo "Installing TensorRT..."
pip install tensorrt tensorrt-cu12-libs || echo -e "${YELLOW}⚠ TensorRT installation failed (optional)${NC}"
fi
# Run diagnostics
echo ""
echo "=========================================="
echo "Running system diagnostics..."
echo "=========================================="
echo ""
python3 tools/diagnose.py
# Test model download (optional)
echo ""
echo "=========================================="
echo "Model Download"
echo "=========================================="
echo ""
echo "The Parakeet model (~600MB) will be downloaded on first use."
read -p "Do you want to download the model now? (y/n) " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo ""
echo "Downloading model..."
python3 -c "
import onnx_asr
print('Loading model (this will download ~600MB)...')
model = onnx_asr.load_model('nemo-parakeet-tdt-0.6b-v3', 'models/parakeet')
print('✓ Model downloaded successfully!')
"
else
echo "Model will be downloaded when you first run the ASR pipeline."
fi
# Create test audio directory
mkdir -p test_audio
echo ""
echo "=========================================="
echo "Setup Complete!"
echo "=========================================="
echo ""
echo -e "${GREEN}✓ Environment setup successful!${NC}"
echo ""
echo "Next steps:"
echo " 1. Activate the virtual environment:"
echo " source venv/bin/activate"
echo ""
echo " 2. Test offline transcription:"
echo " python3 tools/test_offline.py your_audio.wav"
echo ""
echo " 3. Start the WebSocket server:"
echo " python3 server/ws_server.py"
echo ""
echo " 4. In another terminal, start the microphone client:"
echo " python3 client/mic_stream.py"
echo ""
echo "For more information, see README.md"
echo ""