Deploy Open AI Whisper V2 Manage Endpoint — Azure ML

3 min readJul 29, 2023

Steps

Download whisper V2 large model
Create a conda.yaml file for the environment
create a score file for the endpoint
Deploy it as managed endpoint
Sample audio file to test the endpoint

Score file

import os
import logging
import json
import numpy
from typing import  Dict
from transformers.pipelines.audio_utils import ffmpeg_read
import whisper
import torch
import shutil
import base64

SAMPLE_RATE = 16000def init():
    """
    This function is called when the container is initialized/started, typically after create/update of the deployment.
    You can write the logic here to perform init operations like caching the model in memory
    """
    global model
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
    # Please provide your model's folder name if there is one
    # deserialize the model file back into a sklearn model
    model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), 'temp_whisper.pkl')    model = whisper.load_model("large-v2", download_root=os.getenv("AZUREML_MODEL_DIR"))
    logging.info("Init complete")
def run(data):
    """
    This function is called for every invocation of the endpoint to perform the actual scoring/prediction.
    In the example we extract the data from the json input and call the scikit-learn model's predict()
    method and return the result back
    """
    #logging.info(data)
    inputs = base64.b64decode(data)
    audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
    audio_tensor= torch.from_numpy(audio_nparray)
    
    # run inference pipeline
    result = model.transcribe(audio_nparray)    # postprocess the prediction
    return {"text": result["text"]}

Conda.yaml

channels:
- anaconda
- pytorch
- conda-forge
dependencies:
- python=3.8.16
- pip<=23.0.1
- ffmpeg=4.2.2
- pip:
  - mlflow==2.3.1
  - cloudpickle==2.2.1
  - jsonpickle==3.0.1
  - mlflow-skinny==2.3.1
  - azureml-core==1.51.0.post1
  - azureml-mlflow==1.51.0
  - azureml-metrics==0.0.14.post1
  - scikit-learn==0.24.2
  - cryptography==41.0.1
  - python-dateutil==2.8.2
  - datasets==2.11.0
  - soundfile==0.12.1
  - librosa==0.10.0.post2
  - diffusers==0.14.0
  - sentencepiece==0.1.97
  - transformers==4.30.2
  - torch==2.0.1
  - torchaudio
  - accelerate==0.20.3
  - Pillow==9.4.0
  - azureml-evaluate-mlflow==0.0.14.post1
  - wget==3.2
  - more-itertools==9.1.0
  - ffmpeg-python==0.2.0
  - azureml-inference-server-http
  - openai-whisper
name: mlflow-env

Deployment Code

Write code to deploy to Managed endpoint
We are using CPU
install openai-whisper
pip install openai-whisper or pip install git+https://github.com/openai/whisper.git
import libraries

# import required libraries
from azure.ai.ml import MLClient, Input
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, Model, Environment, CodeConfiguration, OnlineRequestSettings, ProbeSettings
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential
import datetime

Create a client

subscription_id = "xxxxxxxxxxxxxx"
resource_group = "rgname"
workspace = "workspacename"

# get a handle to the workspace
ml_client = MLClient(DefaultAzureCredential(), subscription_id, resource_group, workspace)

Set credentials

credential = DefaultAzureCredential()
credential.get_token("https://management.azure.com/.default")
registry_ml_client = MLClient(credential, registry_name="azureml")

download openai-whisper model

import whisper

modelwhisper = whisper.load_model("large-v2")import picklepickle.dump(modelwhisper,open('temp_whisper.pkl','wb'))

Create a model

from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

file_model = Model(
    path="temp_whisper.pkl",
    type=AssetTypes.CUSTOM_MODEL,
    name="temp_whisper",
    description="Model created from Open AI Whispher for speech to text",
)
ml_client.models.create_or_update(file_model)

Create endpoint name

# Define an endpoint name
endpoint_name = "whisper-largecli" + datetime.datetime.now().strftime("%m%d%H%M%f")

Create Endpoint

endpoint = ManagedOnlineEndpoint(
    name=endpoint_name,
    description="An online endpoint for custom deployment of scoring script for batch inference for whisper",
    auth_mode="key",
)

ml_client.begin_create_or_update(endpoint)

get model to deploy

model_name = "temp_whisper"
custommodel = ml_client.models.get(name=model_name, version="1") 
print(custommodel.id)

Create environment

environment = Environment(
    conda_file="conda.yaml",
    image="mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cpu-inference:latest",
)

Create a deployment

blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=endpoint_name,
    model=custommodel.id,
    request_settings=OnlineRequestSettings(
        request_timeout_ms=90000,
        max_concurrent_requests_per_instance=1,
        max_queue_wait_ms=500,
    ),
    environment=environment,
    instance_type="Standard_DS5_v2",
    code_configuration=CodeConfiguration(
       code="./",
       scoring_script="onlineScore.py"
    ),
    instance_count=1,
)

ml_client.online_deployments.begin_create_or_update(blue_deployment)

wait for deployment to complete
then assign traffic

endpoint.traffic = { "blue": 100 }

ml_client.begin_create_or_update(endpoint).result()

now test endpoint

Endpoint configuration

endpoint = "https://endpointname.eastus2.inference.ml.azure.com/score"
key= "xxxxx"

Test endpoint

import urllib.request
import requests as r

import json
import os
import ssl
import base64
import mimetypesdef allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_contextallowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-scripturl = 'https://endpointname.eastus2.inference.ml.azure.com/score'
#api_key = '{{key}}' # Replace this with the API key for the web service
api_key = key# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rulesaudiofile = './sample1.flac'
with open(audiofile, "rb") as i:
      b = i.read()# get mimetype
content_type= mimetypes.guess_type(audiofile)[0]
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'blue' }#response = r.get(url, headers=headers, data=b)
body= base64.b64encode(b)
req = urllib.request.Request(url, body, headers)try:
    response = urllib.request.urlopen(req)    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))

Delete the resources

# delete the endpoint and the deployment
ml_client.online_endpoints.begin_delete(endpoint_name)

original article — Samples2023/AzureML/whisperv2manageendpoint.md at main · balakreshnan/Samples2023 · GitHub

WRITER at MLearning.ai // EEG AI Prediction // Personal AI Art Model

Mlearning.ai Submission Suggestions

How to become a writer on Mlearning.ai

medium.com

Deploy Open AI Whisper V2 Manage Endpoint — Azure ML

Steps

Score file

Conda.yaml

Deployment Code

now test endpoint

WRITER at MLearning.ai // EEG AI Prediction // Personal AI Art Model

Mlearning.ai Submission Suggestions

How to become a writer on Mlearning.ai

Written by Balamurugan Balakreshnan