Azure Machine Learning Automated ML Onnx Model

Balamurugan Balakreshnan

2 min readApr 9, 2022

Build Automated ML model with Onnx as output

Prerequisites

Azure Account
Storage Account
Azure ML
Total Size is 1.5 billion rows
Nyc Taxi yellow data set

Code

Log into Azure ML workspace
Create a notebook with Python 3.6 with Azure ML SDK
Bring the data

from azureml.opendatasets import NycTlcGreen
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedeltagreen_taxi_df = pd.DataFrame([])
start = datetime.strptime("1/1/2015","%m/%d/%Y")
end = datetime.strptime("1/31/2015","%m/%d/%Y")for sample_month in range(12):
    temp_df_green = NycTlcGreen(start + relativedelta(months=sample_month), end + relativedelta(months=sample_month)) \
        .to_pandas_dataframe()
    green_taxi_df = green_taxi_df.append(temp_df_green.sample(2000))green_taxi_df.head(10)columns_to_remove = ["lpepDropoffDatetime", "puLocationId", "doLocationId", "extra", "mtaTax",
                     "improvementSurcharge", "tollsAmount", "ehailFee", "tripType", "rateCodeID",
                     "storeAndFwdFlag", "paymentType", "fareAmount", "tipAmount"
                    ]
for col in columns_to_remove:
    green_taxi_df.pop(col)green_taxi_df.head(5)final_df = green_taxi_df.query("pickupLatitude>=40.53 and pickupLatitude<=40.88")
final_df = final_df.query("pickupLongitude>=-74.09 and pickupLongitude<=-73.72")
final_df = final_df.query("tripDistance>=0.25 and tripDistance<31")
final_df = final_df.query("passengerCount>0 and totalAmount>0")columns_to_remove_for_training = ["pickupLongitude", "pickupLatitude", "dropoffLongitude", "dropoffLatitude"]
for col in columns_to_remove_for_training:
    final_df.pop(col)

from azureml.core import Workspace, Datastore, Dataset
import pandas as pd
ws = Workspace.from_config()
datastore = Datastore.get(ws, 'workspaceblobstore')
dataset = Dataset.Tabular.register_pandas_dataframe(final_df, datastore, "Yellow_taxi", show_progress=True)# Get the training dataset
Yellow_taxi = ws.datasets.get('Yellow_taxi')from azureml.core.workspace import Workspace
ws = Workspace.from_config()from sklearn.model_selection import train_test_splitx_train, x_test = train_test_split(final_df, test_size=0.2, random_state=223)from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetExceptioncluster_name = "cpu-cluster"try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)import loggingautoml_settings = {
    "iteration_timeout_minutes": 18,
    "experiment_timeout_hours": 2.0,
    "enable_early_stopping": False,
    "primary_metric": 'spearman_correlation',
    "featurization": 'auto',
    "verbosity": logging.INFO,
    "n_cross_validations": 5
}from azureml.train.automl import AutoMLConfigautoml_config = AutoMLConfig(task='regression',
                             debug_log='automated_ml_errors.log',
                             training_data=Yellow_taxi,
                             label_column_name="totalAmount",
                             compute_target=cluster_name,
                             enable_onnx_compatible_models=True,
                             **automl_settings)from azureml.core.experiment import Experiment
experiment = Experiment(ws, "Tutorial-NYCTaxi")
remote_run = experiment.submit(automl_config, show_output=True)from azureml.widgets import RunDetails
RunDetails(remote_run).show()best_run, fitted_model = remote_run.get_output()
print(best_run)
print(fitted_model)y_test = x_test.pop("totalAmount")y_predict = fitted_model.predict(x_test)
print(y_predict[:10])from sklearn.metrics import mean_squared_error
from math import sqrty_actual = y_test.values.flatten().tolist()
rmse = sqrt(mean_squared_error(y_actual, y_predict))
rmsesum_actuals = sum_errors = 0for actual_val, predict_val in zip(y_actual, y_predict):
    abs_error = actual_val - predict_val
    if abs_error < 0:
        abs_error = abs_error * -1    sum_errors = sum_errors + abs_error
    sum_actuals = sum_actuals + actual_valmean_abs_percent_error = sum_errors / sum_actuals
print("Model MAPE:")
print(mean_abs_percent_error)
print()
print("Model Accuracy:")
print(1 - mean_abs_percent_error)

Originally published at https://github.com.

Azure Machine Learning Automated ML Onnx Model

Build Automated ML model with Onnx as output

Prerequisites

Code

Written by Balamurugan Balakreshnan