In [ ]:

  Copied!     
 
from modal import App, Image as ModalImage, Volume, Secret
from modal import App, Image as ModalImage, Volume, Secret

============================================================================= CONFIGURATION CONSTANTS¶

In [ ]:

  Copied!     
 
# Time constants
HOURS = 60 * 60
# GPU Configuration
GPU_TYPE = "a100-80gb"  # Default GPU type (can be: a100-80gb, a100-40gb, l40s, etc.)
# Training Configuration
WANDB_PROJECT_DEFAULT = "Llama-70b-MultiGPU-finetune"
# Time constants HOURS = 60 * 60 # GPU Configuration GPU_TYPE = "a100-80gb" # Default GPU type (can be: a100-80gb, a100-40gb, l40s, etc.) # Training Configuration WANDB_PROJECT_DEFAULT = "Llama-70b-MultiGPU-finetune"

In [ ]:

  Copied!     
 
app = App("Finetuned_Llama_70b_Axolotl_MultiGPU")
# Create volumes for persistent storage
exp_volume = Volume.from_name("Finetuned_Llama_70b_Axolotl", create_if_missing=True)
# Configure volume mounting points
VOLUME_CONFIG = {
    "/data": exp_volume,
}
huggingface_secret = Secret.from_name("secrets-hf-wandb")
app = App("Finetuned_Llama_70b_Axolotl_MultiGPU") # Create volumes for persistent storage exp_volume = Volume.from_name("Finetuned_Llama_70b_Axolotl", create_if_missing=True) # Configure volume mounting points VOLUME_CONFIG = { "/data": exp_volume, } huggingface_secret = Secret.from_name("secrets-hf-wandb")

============================================================================= MODEL IMAGE SETUP¶

This is the original Axolotl image, it can be used but it opens JupyterLab by default AXOLOTL_IMAGE = ModalImage.from_registry( "axolotlai/axolotl-cloud:main-latest", add_python="3.12" ).env( { "JUPYTER_ENABLE_LAB": "no", # Disable JupyterLab auto-start "JUPYTER_TOKEN": "", # Disable Jupyter token requirement "HF_HOME": "/data/.cache", # Set HF cache root under /data } )

In [ ]:

  Copied!     
 
# Custom CUDA image with Axolotl and dependencies pre-installed
CUDA_VERSION = "12.8.1"
CUDA_FLAVOR = "devel"
CUDA_OS = "ubuntu24.04"
CUDA_TAG = f"{CUDA_VERSION}-{CUDA_FLAVOR}-{CUDA_OS}"
# Custom CUDA image with Axolotl and dependencies pre-installed CUDA_VERSION = "12.8.1" CUDA_FLAVOR = "devel" CUDA_OS = "ubuntu24.04" CUDA_TAG = f"{CUDA_VERSION}-{CUDA_FLAVOR}-{CUDA_OS}"

In [ ]:

  Copied!     
 
# Define the GPU image for fine-tuning with Unsloth
AXOLOTL_IMAGE = (
    ModalImage.from_registry(f"nvidia/cuda:{CUDA_TAG}", add_python="3.12")
    .apt_install(
        "git",
        "build-essential",
    )
    .uv_pip_install(
        [
            "torch",
            "torchvision",
            "torchaudio",  # optional but often bundled with torch
        ]
    )
    .run_commands(
        "uv pip install --no-deps -U packaging setuptools wheel ninja --system"
    )
    .run_commands("uv pip install --no-build-isolation axolotl[deepspeed] --system")
    .run_commands(
        "UV_NO_BUILD_ISOLATION=1 uv pip install flash-attn --no-build-isolation --system"
    )
    .env(
        {
            "HF_HUB_ENABLE_HF_TRANSFER": "1",
            "HF_HOME": "/data/.cache",  # Set HF cache root under /data
        }
    )
)
# Define the GPU image for fine-tuning with Unsloth AXOLOTL_IMAGE = ( ModalImage.from_registry(f"nvidia/cuda:{CUDA_TAG}", add_python="3.12") .apt_install( "git", "build-essential", ) .uv_pip_install( [ "torch", "torchvision", "torchaudio", # optional but often bundled with torch ] ) .run_commands( "uv pip install --no-deps -U packaging setuptools wheel ninja --system" ) .run_commands("uv pip install --no-build-isolation axolotl[deepspeed] --system") .run_commands( "UV_NO_BUILD_ISOLATION=1 uv pip install flash-attn --no-build-isolation --system" ) .env( { "HF_HUB_ENABLE_HF_TRANSFER": "1", "HF_HOME": "/data/.cache", # Set HF cache root under /data } ) )

In [ ]:

  Copied!     
 
# =============================================================================
# HELPER FUNCTIONS
# =============================================================================
def write_config_to_volume(
    train_config_yaml: str,
    config_path: str = "/data/config.yml",
    update_paths: bool = True,
) -> dict:
    """Write YAML configuration to volume with optional path updates."""
    import os
    import yaml

    config_dict = yaml.safe_load(train_config_yaml)

    if update_paths and "output_dir" in config_dict:
        config_dict["output_dir"] = config_dict["output_dir"].replace(
            "./outputs", "/data/outputs"
        )

    os.makedirs(os.path.dirname(config_path), exist_ok=True)

    with open(config_path, "w") as f:
        yaml.dump(config_dict, f, default_flow_style=False)

    exp_volume.commit()
    return config_dict
# ============================================================================= # HELPER FUNCTIONS # ============================================================================= def write_config_to_volume( train_config_yaml: str, config_path: str = "/data/config.yml", update_paths: bool = True, ) -> dict: """Write YAML configuration to volume with optional path updates.""" import os import yaml config_dict = yaml.safe_load(train_config_yaml) if update_paths and "output_dir" in config_dict: config_dict["output_dir"] = config_dict["output_dir"].replace( "./outputs", "/data/outputs" ) os.makedirs(os.path.dirname(config_path), exist_ok=True) with open(config_path, "w") as f: yaml.dump(config_dict, f, default_flow_style=False) exp_volume.commit() return config_dict

============================================================================= TRAINING CONFIGURATION You can fine more Configuration options here: https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples ¶

In [ ]:

  Copied!     
 
TRAIN_CONFIG_YAML = f"""
base_model: NousResearch/Meta-Llama-3-8B-Instruct
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name

load_in_8bit: true
load_in_4bit: false

chat_template: llama3
datasets:
  - path: fozziethebeat/alpaca_messages_2k_test
    type: chat_template
dataset_prepared_path: /data/prepared_datasets/alpaca_2k
val_set_size: 0.05
output_dir: /data/outputs/lora-out

sequence_len: 4096
sample_packing: false

adapter: lora
lora_model_dir:
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05
lora_target_linear: true

wandb_project: {WANDB_PROJECT_DEFAULT}
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 8
num_epochs: 4
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002

bf16: auto
tf32: false

gradient_checkpointing: true
resume_from_checkpoint:
logging_steps: 1
flash_attention: true

warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 4
weight_decay: 0.0
special_tokens:
   pad_token: <|end_of_text|>
"""
TRAIN_CONFIG_YAML = f""" base_model: NousResearch/Meta-Llama-3-8B-Instruct # optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: AutoTokenizer # Automatically upload checkpoint and final model to HF # hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false chat_template: llama3 datasets: - path: fozziethebeat/alpaca_messages_2k_test type: chat_template dataset_prepared_path: /data/prepared_datasets/alpaca_2k val_set_size: 0.05 output_dir: /data/outputs/lora-out sequence_len: 4096 sample_packing: false adapter: lora lora_model_dir: lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 lora_target_linear: true wandb_project: {WANDB_PROJECT_DEFAULT} wandb_entity: wandb_watch: wandb_name: wandb_log_model: gradient_accumulation_steps: 4 micro_batch_size: 8 num_epochs: 4 optimizer: adamw_bnb_8bit lr_scheduler: cosine learning_rate: 0.0002 bf16: auto tf32: false gradient_checkpointing: true resume_from_checkpoint: logging_steps: 1 flash_attention: true warmup_ratio: 0.1 evals_per_epoch: 4 saves_per_epoch: 4 weight_decay: 0.0 special_tokens: pad_token: <|end_of_text|> """

============================================================================= PREPROCESSING FUNCTION¶

In [ ]:

  Copied!     
 
# GPU Configuration for preprocessing (single GPU)
PREPROCESS_NUM_GPUS = 1
PREPROCESS_GPU_CONFIG = f"{GPU_TYPE}:{PREPROCESS_NUM_GPUS}"
# GPU Configuration for preprocessing (single GPU) PREPROCESS_NUM_GPUS = 1 PREPROCESS_GPU_CONFIG = f"{GPU_TYPE}:{PREPROCESS_NUM_GPUS}"

In [ ]:

  Copied!     
 
@app.function(
    image=AXOLOTL_IMAGE,
    volumes=VOLUME_CONFIG,
    secrets=[huggingface_secret],
    timeout=24 * HOURS,
    gpu=PREPROCESS_GPU_CONFIG,
)
def process_datasets(
    train_config_yaml: str = TRAIN_CONFIG_YAML,
    config_path: str = "/data/config.yml",
):
    """Preprocess and tokenize dataset before training using Axolotl."""
    import os
    import subprocess

    os.environ["HF_TOKEN"] = os.environ["HUGGINGFACE_TOKEN"]

    config_dict = write_config_to_volume(train_config_yaml, config_path, True)
    exp_volume.commit()

    print("Starting dataset preprocessing...")
    try:
        subprocess.run(["axolotl", "preprocess", config_path], check=True)
        print("✓ Preprocessing completed")
        exp_volume.commit()

        return {
            "status": "completed",
            "config_path": config_path,
            "preprocessed_data_path": config_dict.get("dataset_prepared_path"),
            "output_dir": config_dict.get("output_dir"),
        }
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"Preprocessing failed: {e}")
@app.function( image=AXOLOTL_IMAGE, volumes=VOLUME_CONFIG, secrets=[huggingface_secret], timeout=24 * HOURS, gpu=PREPROCESS_GPU_CONFIG, ) def process_datasets( train_config_yaml: str = TRAIN_CONFIG_YAML, config_path: str = "/data/config.yml", ): """Preprocess and tokenize dataset before training using Axolotl.""" import os import subprocess os.environ["HF_TOKEN"] = os.environ["HUGGINGFACE_TOKEN"] config_dict = write_config_to_volume(train_config_yaml, config_path, True) exp_volume.commit() print("Starting dataset preprocessing...") try: subprocess.run(["axolotl", "preprocess", config_path], check=True) print("✓ Preprocessing completed") exp_volume.commit() return { "status": "completed", "config_path": config_path, "preprocessed_data_path": config_dict.get("dataset_prepared_path"), "output_dir": config_dict.get("output_dir"), } except subprocess.CalledProcessError as e: raise RuntimeError(f"Preprocessing failed: {e}")

============================================================================= TRAINING FUNCTION¶

In [ ]:

  Copied!     
 
# GPU Configuration for training (2-8 GPUs for multi-GPU training)
TRAIN_NUM_GPUS = 4  # Can be adjusted from 2 to 8
TRAIN_GPU_CONFIG = f"{GPU_TYPE}:{TRAIN_NUM_GPUS}"
# GPU Configuration for training (2-8 GPUs for multi-GPU training) TRAIN_NUM_GPUS = 4 # Can be adjusted from 2 to 8 TRAIN_GPU_CONFIG = f"{GPU_TYPE}:{TRAIN_NUM_GPUS}"

In [ ]:

  Copied!     
 
@app.function(
    image=AXOLOTL_IMAGE,
    volumes=VOLUME_CONFIG,
    secrets=[huggingface_secret],
    timeout=24 * HOURS,
    gpu=TRAIN_GPU_CONFIG,
)
def train_model(
    train_config_yaml: str = TRAIN_CONFIG_YAML,
    config_path: str = "/data/config.yml",
):
    """
    Train or fine-tune a model using Axolotl with multi-GPU support.
    All configuration is defined in the YAML file.
    Uses accelerate for multi-GPU training.

    Args:
        train_config_yaml: YAML configuration content as string
        config_path: Path where config will be written on the volume

    Returns:
        dict: Contains training status and output paths
    """
    import os
    import subprocess

    os.environ["HF_TOKEN"] = os.environ["HUGGINGFACE_TOKEN"]
    os.environ["WANDB_API_KEY"] = os.environ["WANDB_API_KEY"]
    os.environ["WANDB_PROJECT"] = WANDB_PROJECT_DEFAULT

    # Write config to volume using global helper function
    config_dict = write_config_to_volume(
        train_config_yaml=train_config_yaml,
        config_path=config_path,
        update_paths=True,
    )

    exp_volume.commit()

    # Run Axolotl training with accelerate for multi-GPU support
    print(f"Starting training with {TRAIN_NUM_GPUS} GPUs...")
    cmd = [
        "accelerate",
        "launch",
        "--multi_gpu",
        "--num_processes",
        str(TRAIN_NUM_GPUS),
        "--num_machines",
        "1",
        "--mixed_precision",
        "bf16",
        "--dynamo_backend",
        "no",
        "-m",
        "axolotl.cli.train",
        config_path,
    ]

    try:
        subprocess.run(cmd, check=True)
        print("✓ Training completed")

        # Commit trained model to volume
        exp_volume.commit()

        return {
            "status": "completed",
            "config_path": config_path,
            "output_dir": config_dict.get("output_dir"),
            "base_model": config_dict.get("base_model"),
            "num_gpus": TRAIN_NUM_GPUS,
        }

    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"Training failed: {e}")
@app.function( image=AXOLOTL_IMAGE, volumes=VOLUME_CONFIG, secrets=[huggingface_secret], timeout=24 * HOURS, gpu=TRAIN_GPU_CONFIG, ) def train_model( train_config_yaml: str = TRAIN_CONFIG_YAML, config_path: str = "/data/config.yml", ): """ Train or fine-tune a model using Axolotl with multi-GPU support. All configuration is defined in the YAML file. Uses accelerate for multi-GPU training. Args: train_config_yaml: YAML configuration content as string config_path: Path where config will be written on the volume Returns: dict: Contains training status and output paths """ import os import subprocess os.environ["HF_TOKEN"] = os.environ["HUGGINGFACE_TOKEN"] os.environ["WANDB_API_KEY"] = os.environ["WANDB_API_KEY"] os.environ["WANDB_PROJECT"] = WANDB_PROJECT_DEFAULT # Write config to volume using global helper function config_dict = write_config_to_volume( train_config_yaml=train_config_yaml, config_path=config_path, update_paths=True, ) exp_volume.commit() # Run Axolotl training with accelerate for multi-GPU support print(f"Starting training with {TRAIN_NUM_GPUS} GPUs...") cmd = [ "accelerate", "launch", "--multi_gpu", "--num_processes", str(TRAIN_NUM_GPUS), "--num_machines", "1", "--mixed_precision", "bf16", "--dynamo_backend", "no", "-m", "axolotl.cli.train", config_path, ] try: subprocess.run(cmd, check=True) print("✓ Training completed") # Commit trained model to volume exp_volume.commit() return { "status": "completed", "config_path": config_path, "output_dir": config_dict.get("output_dir"), "base_model": config_dict.get("base_model"), "num_gpus": TRAIN_NUM_GPUS, } except subprocess.CalledProcessError as e: raise RuntimeError(f"Training failed: {e}")

============================================================================= MERGE LORA FUNCTION¶

In [ ]:

  Copied!     
 
# GPU Configuration for merging LoRA (single GPU)
MERGE_NUM_GPUS = 1
MERGE_GPU_CONFIG = f"{GPU_TYPE}:{MERGE_NUM_GPUS}"
# GPU Configuration for merging LoRA (single GPU) MERGE_NUM_GPUS = 1 MERGE_GPU_CONFIG = f"{GPU_TYPE}:{MERGE_NUM_GPUS}"

In [ ]:

  Copied!     
 
@app.function(
    image=AXOLOTL_IMAGE,
    volumes=VOLUME_CONFIG,
    secrets=[huggingface_secret],
    timeout=4 * HOURS,
    gpu=MERGE_GPU_CONFIG,
)
def merge_lora(
    train_config_yaml: str = TRAIN_CONFIG_YAML,
    config_path: str = "/data/config.yml",
    lora_model_dir: str = None,
):
    """
    Merge trained LoRA adapters into the base model.

    Args:
        train_config_yaml: YAML configuration content as string
        config_path: Path where config will be written on the volume
        lora_model_dir: Path to LoRA adapter directory (optional, uses config if not provided)

    Returns:
        dict: Contains merge status and output paths
    """
    import os
    import subprocess

    os.environ["HF_TOKEN"] = os.environ["HUGGINGFACE_TOKEN"]

    # Write config to volume
    config_dict = write_config_to_volume(
        train_config_yaml=train_config_yaml,
        config_path=config_path,
        update_paths=True,
    )

    exp_volume.commit()

    # Build merge command
    print("Starting LoRA merge...")
    cmd = ["axolotl", "merge-lora", config_path]

    if lora_model_dir:
        cmd.extend(["--lora-model-dir", lora_model_dir])

    try:
        subprocess.run(cmd, check=True)
        print("✓ LoRA merge completed")

        # Commit merged model to volume
        exp_volume.commit()

        return {
            "status": "completed",
            "config_path": config_path,
            "output_dir": config_dict.get("output_dir"),
            "lora_model_dir": lora_model_dir or config_dict.get("lora_model_dir"),
        }

    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"LoRA merge failed: {e}")
@app.function( image=AXOLOTL_IMAGE, volumes=VOLUME_CONFIG, secrets=[huggingface_secret], timeout=4 * HOURS, gpu=MERGE_GPU_CONFIG, ) def merge_lora( train_config_yaml: str = TRAIN_CONFIG_YAML, config_path: str = "/data/config.yml", lora_model_dir: str = None, ): """ Merge trained LoRA adapters into the base model. Args: train_config_yaml: YAML configuration content as string config_path: Path where config will be written on the volume lora_model_dir: Path to LoRA adapter directory (optional, uses config if not provided) Returns: dict: Contains merge status and output paths """ import os import subprocess os.environ["HF_TOKEN"] = os.environ["HUGGINGFACE_TOKEN"] # Write config to volume config_dict = write_config_to_volume( train_config_yaml=train_config_yaml, config_path=config_path, update_paths=True, ) exp_volume.commit() # Build merge command print("Starting LoRA merge...") cmd = ["axolotl", "merge-lora", config_path] if lora_model_dir: cmd.extend(["--lora-model-dir", lora_model_dir]) try: subprocess.run(cmd, check=True) print("✓ LoRA merge completed") # Commit merged model to volume exp_volume.commit() return { "status": "completed", "config_path": config_path, "output_dir": config_dict.get("output_dir"), "lora_model_dir": lora_model_dir or config_dict.get("lora_model_dir"), } except subprocess.CalledProcessError as e: raise RuntimeError(f"LoRA merge failed: {e}")

============================================================================= INFERENCE FUNCTION¶

In [ ]:

  Copied!     
 
# GPU Configuration for inference (single GPU)
INFERENCE_NUM_GPUS = 1
INFERENCE_GPU_CONFIG = f"{GPU_TYPE}:{INFERENCE_NUM_GPUS}"
# GPU Configuration for inference (single GPU) INFERENCE_NUM_GPUS = 1 INFERENCE_GPU_CONFIG = f"{GPU_TYPE}:{INFERENCE_NUM_GPUS}"

In [ ]:

  Copied!     
 
@app.function(
    image=AXOLOTL_IMAGE,
    volumes=VOLUME_CONFIG,
    secrets=[huggingface_secret],
    timeout=1 * HOURS,
    gpu=INFERENCE_GPU_CONFIG,
)
def run_inference(
    train_config_yaml: str = TRAIN_CONFIG_YAML,
    config_path: str = "/data/config.yml",
    prompt: str = "Hello, how are you?",
    lora_model_dir: str = None,
    base_model: str = None,
):
    """
    Run inference using the trained model.

    Args:
        train_config_yaml: YAML configuration content as string
        config_path: Path where config will be written on the volume
        prompt: Input prompt for inference
        lora_model_dir: Path to LoRA adapter directory (optional)
        base_model: Path to base or merged model (optional)

    Returns:
        dict: Contains inference output and metadata
    """
    import os
    import subprocess
    import tempfile

    os.environ["HF_TOKEN"] = os.environ["HUGGINGFACE_TOKEN"]

    # Write config to volume
    config_dict = write_config_to_volume(
        train_config_yaml=train_config_yaml,
        config_path=config_path,
        update_paths=True,
    )

    # Build inference command
    print("Starting inference...")
    print(f"Prompt: {prompt}")
    print("-" * 80)

    cmd = ["axolotl", "inference", config_path]

    if lora_model_dir:
        cmd.extend(["--lora-model-dir", lora_model_dir])
    if base_model:
        cmd.extend(["--base-model", base_model])

    # Write prompt to temp file and pipe it
    try:
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
            f.write(prompt)
            prompt_file = f.name

        # Run inference with prompt piped from file
        with open(prompt_file, "r") as f:
            result = subprocess.run(
                cmd,
                stdin=f,
                capture_output=True,
                text=True,
                check=True,
            )

        print("✓ Inference completed")
        print("\n" + "=" * 80)
        print("MODEL OUTPUT:")
        print("=" * 80)
        print(result.stdout)
        print("=" * 80)

        if result.stderr:
            print("\nSTDERR:")
            print(result.stderr)

        response_dict = {
            "status": "completed",
            "prompt": prompt,
            "output": result.stdout,
            "model": base_model or config_dict.get("base_model"),
        }

        return response_dict

    except subprocess.CalledProcessError as e:
        print(f"Error output: {e.stderr}")
        print(f"Command output: {e.stdout}")
        raise RuntimeError(f"Inference failed: {e}")
    finally:
        # Clean up temp file
        import os as os_module

        if "prompt_file" in locals():
            os_module.unlink(prompt_file)
@app.function( image=AXOLOTL_IMAGE, volumes=VOLUME_CONFIG, secrets=[huggingface_secret], timeout=1 * HOURS, gpu=INFERENCE_GPU_CONFIG, ) def run_inference( train_config_yaml: str = TRAIN_CONFIG_YAML, config_path: str = "/data/config.yml", prompt: str = "Hello, how are you?", lora_model_dir: str = None, base_model: str = None, ): """ Run inference using the trained model. Args: train_config_yaml: YAML configuration content as string config_path: Path where config will be written on the volume prompt: Input prompt for inference lora_model_dir: Path to LoRA adapter directory (optional) base_model: Path to base or merged model (optional) Returns: dict: Contains inference output and metadata """ import os import subprocess import tempfile os.environ["HF_TOKEN"] = os.environ["HUGGINGFACE_TOKEN"] # Write config to volume config_dict = write_config_to_volume( train_config_yaml=train_config_yaml, config_path=config_path, update_paths=True, ) # Build inference command print("Starting inference...") print(f"Prompt: {prompt}") print("-" * 80) cmd = ["axolotl", "inference", config_path] if lora_model_dir: cmd.extend(["--lora-model-dir", lora_model_dir]) if base_model: cmd.extend(["--base-model", base_model]) # Write prompt to temp file and pipe it try: with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f: f.write(prompt) prompt_file = f.name # Run inference with prompt piped from file with open(prompt_file, "r") as f: result = subprocess.run( cmd, stdin=f, capture_output=True, text=True, check=True, ) print("✓ Inference completed") print("\n" + "=" * 80) print("MODEL OUTPUT:") print("=" * 80) print(result.stdout) print("=" * 80) if result.stderr: print("\nSTDERR:") print(result.stderr) response_dict = { "status": "completed", "prompt": prompt, "output": result.stdout, "model": base_model or config_dict.get("base_model"), } return response_dict except subprocess.CalledProcessError as e: print(f"Error output: {e.stderr}") print(f"Command output: {e.stdout}") raise RuntimeError(f"Inference failed: {e}") finally: # Clean up temp file import os as os_module if "prompt_file" in locals(): os_module.unlink(prompt_file)

============================================================================= CONFIGURATION CONSTANTS¶

============================================================================= MODAL APP VOLUME AND SECRET SETUP¶

============================================================================= MODEL IMAGE SETUP¶

============================================================================= TRAINING CONFIGURATION You can fine more Configuration options here: https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples¶

============================================================================= PREPROCESSING FUNCTION¶

============================================================================= TRAINING FUNCTION¶

============================================================================= MERGE LORA FUNCTION¶

============================================================================= INFERENCE FUNCTION¶

============================================================================= TRAINING CONFIGURATION You can fine more Configuration options here: https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples ¶