Azure Machine Learning SDK v2 using AutoML — Titanic dataset

Balamurugan Balakreshnan

6 min readAug 6, 2022

Use Azure Machine Learning SDK v2 to train a model, Register and Deploy a model

Prerequisites

Azure Account
Azure Machine Learning workspace
Azure Storage Account
Get titanic.csv
Create folders
create a data folder
Training folder
Test folder
Validate folder
Upload the files to the above folders

Code

Import libraries

# Import required libraries
from azure.identity import DefaultAzureCredential
from azure.identity import AzureCliCredential
from azure.ai.ml import automl, Input, MLClientfrom azure.ai.ml.constants import AssetTypes
from azure.ai.ml.automl import (
    classification,
    ClassificationPrimaryMetrics,
    ClassificationModels,
)

Load workspace

from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClientcredential = DefaultAzureCredential()
ml_client = None
try:
    ml_client = MLClient.from_config(credential)
except Exception as ex:
    print(ex)
    # Enter details of your AzureML workspace
    subscription_id = "xxxxx-xxxxxx-xxxxxx"
    resource_group = "rgname"
    workspace = "workspacename"
    ml_client = MLClient(credential, subscription_id, resource_group, workspace)

Display the workspace

workspace = ml_client.workspaces.get(name=ml_client.workspace_name)subscription_id = ml_client.connections._subscription_id
resource_group = workspace.resource_group
workspace_name = ml_client.workspace_nameoutput = {}
output["Workspace"] = workspace_name
output["Subscription ID"] = subscription_id
output["Resource Group"] = resource_group
output["Location"] = workspace.location
output

Configure training data

# Create MLTables for training datasetmy_training_data_input = Input(
    type=AssetTypes.MLTABLE, path="./data/training-mltable-folder"
)

Configure experiment name

# General job parameters
compute_name = "cpu-cluster"
max_trials = 5
exp_name = "automlv2-Titanic-classifier-experiment"

Automl Configuration

# Create the AutoML classification job with the related factory-function.classification_job = automl.classification(
    compute=compute_name,
    experiment_name=exp_name,
    training_data=my_training_data_input,
    target_column_name="Survived",
    primary_metric="accuracy",
    n_cross_validations=5,
    enable_model_explainability=True,
    tags={"my_custom_tag": "My Titanic Automl Exp"},
)# Limits are all optional
classification_job.set_limits(
    timeout_minutes=600,
    trial_timeout_minutes=20,
    max_trials=max_trials,
    # max_concurrent_trials = 4,
    # max_cores_per_trial: -1,
    enable_early_termination=True,
)# Training properties are optional
classification_job.set_training(
    blocked_training_algorithms=[ClassificationModels.LOGISTIC_REGRESSION],
    enable_onnx_compatible_models=True,
)

Create a job

# Submit the AutoML job
returned_job = ml_client.jobs.create_or_update(
    classification_job
)  # submit the job to the backendprint(f"Created job: {returned_job}")

Get the details

ml_client.jobs.stream(returned_job.name)

Get the endpoint url

# Get a URL for the status of the job
returned_job.services["Studio"].endpoint
print(returned_job.name)

Display best model

Invoke MLFLow to get details

import mlflow# Obtain the tracking URL from MLClient
MLFLOW_TRACKING_URI = ml_client.workspaces.get(
    name=ml_client.workspace_name
).mlflow_tracking_uriprint(MLFLOW_TRACKING_URI)# Set the MLFLOW TRACKING URImlflow.set_tracking_uri(MLFLOW_TRACKING_URI)print("\nCurrent tracking uri: {}".format(mlflow.get_tracking_uri()))

Enabling MLFLow

from mlflow.tracking.client import MlflowClient# Initialize MLFlow client
mlflow_client = MlflowClient()job_name = returned_job.name# Example if providing an specific Job name/ID
# job_name = "b4e95546-0aa1-448e-9ad6-002e3207b4fc"# Get the parent run
mlflow_parent_run = mlflow_client.get_run(job_name)print("Parent Run: ")
print(mlflow_parent_run)

get tags

# Print parent run tags. 'automl_best_child_run_id' tag should be there.
print(mlflow_parent_run.data.tags)

Get the best run

# Get the best model's child runbest_child_run_id = mlflow_parent_run.data.tags["automl_best_child_run_id"]
print("Found best child run id: ", best_child_run_id)best_run = mlflow_client.get_run(best_child_run_id)print("Best child run: ")
print(best_run)

display the metrics

best_run.data.metrics

Create folder to download machine learning

import os# Create local folder
local_dir = "./artifact_downloads"
if not os.path.exists(local_dir):
    os.mkdir(local_dir)# Download run's artifacts/outputs
local_path = mlflow_client.download_artifacts(
    best_run.info.run_id, "outputs", local_dir
)
print("Artifacts downloaded in: {}".format(local_path))
print("Artifacts: {}".format(os.listdir(local_path)))# Show the contents of the MLFlow model folder
os.listdir("./artifact_downloads/outputs/mlflow-model")

Now create Managed online

# import required libraries
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    ProbeSettings,
)
from azure.ai.ml.constants import ModelTypemodel_name = "titanic-model-v2"
model = Model(
    path=f"azureml://jobs/{best_run.info.run_id}/outputs/artifacts/outputs/model.pkl",
    name=model_name,
    description="my sample titanic mlflow model",
)# for downloaded file
# model = Model(path="artifact_downloads/outputs/model.pkl", name=model_name)registered_model = ml_client.models.create_or_update(model)registered_model.id# Creating a unique endpoint name with current datetime to avoid conflicts
import datetimeonline_endpoint_name = "titanic-" + datetime.datetime.now().strftime("%m%d%H%M%f")# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is a sample online endpoint for titanic mlflow model",
    auth_mode="key",
    tags={"foo": "bar"},
)ml_client.begin_create_or_update(endpoint)

Setup environment

env = Environment(
    name="automl-titanic-tabular-env",
    description="environment for automl inference",
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1",
    conda_file="artifact_downloads/outputs/conda_env_v_1_0_0.yml",
)

bring the scoring script to use for managed endpoint api

code_configuration = CodeConfiguration(
    code="artifact_downloads/outputs/", scoring_script="scoring_file_v_2_0_0.py"
)

Setup the deployment

deployment = ManagedOnlineDeployment(
    name="titanic-deploy",
    endpoint_name=online_endpoint_name,
    model=registered_model.id,
    environment=env,
    code_configuration=code_configuration,
    instance_type="Standard_DS2_V2",
    instance_count=1,
)

update the deployment for above endpoint

ml_client.online_deployments.begin_create_or_update(deployment)

Set the traffic

# bankmarketing deployment to take 100% traffic
endpoint.traffic = {"titanic-deploy": 100}
ml_client.begin_create_or_update(endpoint)

Details of endpoint

Test the endpoint
Configure sample data

{"data": [
    [ 1, "3", "Braund Mr. Owen Harris", "male", "22", "1", "0", "A/5 21171", 7.25, "C76", "S"],
    [ 2, "1", "Cumings Mrs. John Bradley", "female", "38", "1", "0", "PC 17599", 71.2833, "C85", "C"]
]}

call the endpoint

import urllib.request
import json
import os
import ssldef allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_contextallowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data =  {
  "Inputs": {
    "data": [
      {
        "PassengerId": 1,
        "Pclass": "3",
        "Name": "Braund Mr Owen Harris",
        "Sex": "male",
        "Age": 22,
        "SibSp": "1",
        "Parch": "0",
        "Ticket": "A/5 21171",
        "Fare": 22.5,
        "Cabin": "C76",
        "Embarked": "S"
      }
    ]
  },
  "GlobalParameters": {
    "method": "predict"
  }
}body = str.encode(json.dumps(data))url = 'https://xxxxxxxxxxxxxxxxxxxxxx.centralus.inference.ml.azure.com/score'
api_key = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' # Replace this with the API key for the web service# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'titanic-deploy' }req = urllib.request.Request(url, body, headers)try:
    response = urllib.request.urlopen(req)    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))

Predict_proba

get endpoint details

# Get the details for online endpoint
endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)# existing traffic details
print(endpoint.traffic)# Get the scoring URI
print(endpoint.scoring_uri)

Now delete the endpoint

ml_client.online_endpoints.begin_delete(name=online_endpoint_name)

Originally published at https://github.com.

Mlearning.ai Submission Suggestions

How to become a writer on Mlearning.ai

medium.com

Azure Machine Learning SDK v2 using AutoML — Titanic dataset

Use Azure Machine Learning SDK v2 to train a model, Register and Deploy a model

Prerequisites

Code

Mlearning.ai Submission Suggestions

How to become a writer on Mlearning.ai

Written by Balamurugan Balakreshnan