use core::panic;
use std::{
    collections::BTreeMap,
    fs::{self},
    io::{stdin, stdout, Cursor, Read, Write},
    iter::repeat_with,
    path::PathBuf,
    sync::mpsc::channel,
};

use arboard::Clipboard;
use crossterm::{
    cursor::MoveToColumn,
    style::Print,
    terminal::{Clear, ClearType},
    ExecutableCommand, QueueableCommand,
};
use image::{buffer::ConvertBuffer, EncodableLayout, Pixel, Rgba32FImage, RgbaImage};
use pico_args::Arguments;
use serde::Deserialize;
use serde_with::{serde_as, Map};
use wgpu::{
    util::{make_spirv_raw, DeviceExt},
    Backends, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry, BlendComponent,
    BlendState, BufferBinding, BufferDescriptor, BufferUsages, Color, ColorTargetState,
    ColorWrites, CommandEncoderDescriptor, DeviceDescriptor, Extent3d, Features, FragmentState,
    Instance, InstanceDescriptor, InstanceFlags, Limits, MemoryHints, MultisampleState, Origin3d,
    PipelineCompilationOptions, PipelineLayoutDescriptor, PrimitiveState, PushConstantRange,
    QuerySetDescriptor, QueryType, RenderPassColorAttachment, RenderPipelineDescriptor,
    RequestAdapterOptions, ShaderModuleDescriptor, ShaderStages, TexelCopyBufferInfo,
    TexelCopyBufferLayout, TexelCopyTextureInfo, Texture, TextureDescriptor, TextureDimension,
    TextureFormat, TextureUsages, TextureViewDescriptor, VertexState,
};
use zip::ZipArchive;

/// Number of passes to run in standalone mode
const NUM_PERF_PASSES: u32 = 512;

/// vertex shader for a fullscreen triangle
const VERTEX_SHADER: &str = "
struct VertOut {
    @builtin(position) pos: vec4f,
    @location(0) @interpolate(linear, centroid) fragcoord: vec2f,
};

@vertex fn fullscreen_triangle(@builtin(vertex_index) idx: u32) -> VertOut {
    let pos = array(
        vec2f(-1.0, -1.0),
        vec2f(-1.0,  3.0),
        vec2f( 3.0, -1.0),
    );

    let uv = array(
        vec2f(0.0, 0.0),
        vec2f(0.0, 2.0),
        vec2f(2.0, 0.0),
    );

    var out: VertOut;
    out.pos = vec4f(pos[idx], 0.0, 1.0);
    out.fragcoord = uv[idx];
    return out;
}
";

/// Config for a single pass
#[serde_as]
#[derive(Deserialize)]
struct PassConfig {
    /// width
    width: u32,

    /// height
    height: u32,

    /// depth, if any
    depth: Option<u32>,

    /// Path to the shader
    path: String,

    /// texture inputs, 2d
    buffers: Vec<String>,

    /// texture inputs, 3d
    volumes: Vec<String>,

    /// uniforms
    #[serde_as(as = "Map<_, _>")]
    uniforms: Vec<(String, Vec<f32>)>,
}

/// Config for a single shader output
#[serde_as]
#[derive(Deserialize)]
struct OutConfig {
    /// width
    width: u32,

    /// height
    height: u32,

    /// number of passes
    passes: Option<u32>,

    /// path to the shader
    path: String,

    /// texture inputs, 2d
    buffers: Vec<String>,

    /// texture inputs, 3d
    volumes: Vec<String>,

    /// Complex, run complex shaders
    complex: Option<bool>,

    /// uniforms
    #[serde_as(as = "Map<_, _>")]
    uniforms: Vec<(String, Vec<f32>)>,
}

/// Config for the shaders
#[serde_as]
#[derive(Deserialize)]
struct Config {
    /// global uniforms
    #[serde_as(as = "Map<_, _>")]
    uniforms: Vec<(String, Vec<f32>)>,

    /// shader passes
    #[serde_as(as = "Map<_, _>")]
    passes: Vec<(String, PassConfig)>,

    /// shader outputs
    #[serde_as(as = "Map<_, _>")]
    outputs: Vec<(String, OutConfig)>,
}

#[derive(Debug)]
struct Timings {
    count: usize,
    pixels: u64,
    time: u64,
}

fn push_const_bytes(layer: u32, width: u32, height: u32) -> [u8; 12] {
    let mut bytes = [0; 12];
    for (idx, byte) in layer
        .to_ne_bytes()
        .iter()
        .chain(width.to_ne_bytes().iter())
        .chain(height.to_ne_bytes().iter())
        .enumerate()
    {
        bytes[idx] = *byte;
    }

    bytes
}

const USAGE: &str = "\
Usage:
    mini-shadertoy [FILE] [OPTIONS]

Options:
    -o --output     The directory to output to (default: parent directory of the input)
    -e --extension  File extentions to use when saving (can be comma seperated, (default: png)
                    numpy's .npy format is supported as well
    -c --complex    Don't skip passes marked as complex
    -d --debug      Capture using the renderdoc integration, needs to be launched by renderdoc
    -p --profile    Whether to output a timings.toml file with the timings, via pipeline timestamps
    -b --backend    What WGPU backend to use (gl, vulkan, metal, dx12)

File format (toml):
    [uniforms]
    color = [0.8, 0.7, 0.5]
    # .. other named uniforms, value must be an array
    # in the shader, set = 0, binding = 15

    [passes.preproc]
    width = 256
    height = 64
    # depth = 1 # setting this will make the texture 3d
    buffers = [] # use a previous 2d texture, binds to binding 0-7
    volumes = [] # use a previous 3d texture, binds to binding 8-11
    uniforms = {} # in the shader, set = 0, binding = 12
    path = \"shader.spv\" # path to the shader

    [outputs.main]
    width = 1920
    height = 1080
    # passes = 1 # set this to run the shader multiple times and average the result
    buffers = [] # same as in passes
    volumes = [] # same as in passes
    uniforms = {} # same as in passes
    # complex = false # set to true to skip unless the --complex argument is passed
    path = \"shader.spv\"

    # linear interpolating texture sampler is bound to binding 13,
    # nearest neighbor texture sampler is bound to binding 14
";

const STANDALONE: &[u8] = include_bytes!("../standalone.zip");

fn main() {
    // get the path of the shader config to use
    let mut args = Arguments::from_env();
    if args.contains(["-h", "--help"]) {
        print!("{}", USAGE);
        return;
    }

    let base_path: Option<String> = args.opt_free_from_str().expect("did not get config path");
    let output: Option<String> = args
        .opt_value_from_str(["-o", "--output"])
        .expect("did not get optional output path");
    let extensions: Vec<String> = args
        .opt_value_from_str(["-e", "--extension"])
        .expect("did not get image extension")
        .unwrap_or("png".to_string())
        .split(',')
        .map(str::to_string)
        .collect();
    let backend: Option<Backends> = args
        .opt_value_from_str(["-b", "--backend"])
        .expect("did not get backend")
        .map(|x: String| {
            Backends::from_name(x.to_uppercase().as_str().trim())
                .expect("No backend with that name!")
        });
    let complex: bool = args.contains(["-c", "--complex"]);
    let renderdoc: bool = args.contains(["-d", "--debug"]);
    let profile: bool = args.contains(["-p", "--profile"]) || base_path.is_none(); // profiling is a must in standalone mode
    let standalone = base_path.is_none();
    let mut archive =
        ZipArchive::new(Cursor::new(STANDALONE)).expect("failed to read standalone zip file");
    let standalone_entry = archive
        .file_names()
        .find(|x| x.ends_with(".toml"))
        .expect("no .toml config file in the standalone zip")
        .to_string();

    // parse the shader config
    let config: Config = toml::from_str(
        &if let Some(p) = &base_path {
            fs::read_to_string(p)
        } else {
            let mut file = archive
                .by_name(&standalone_entry)
                .expect("could not find standalone entry");
            let mut string = String::new();
            file.read_to_string(&mut string)
                .expect("failed to read standalone entry to string");

            Ok(string)
        }
        .expect("Failed to read config"),
    )
    .expect("Failed to parse");

    // number of items to process
    let total_items = config.passes.len() + config.outputs.len();
    let mut current_item = 0;

    // make sure the output directory exists
    let (output, base_path) = if !standalone {
        // base path without the file
        let base_path = PathBuf::from(base_path.as_ref().map(|x| x.as_str()).unwrap_or("/"));
        let base_path = base_path.parent().unwrap();
        let output = output.map(PathBuf::from).unwrap_or(base_path.to_path_buf());

        fs::create_dir_all(&output).expect("Could not create output directory");

        (output, base_path.to_path_buf())
    } else {
        println!("Running as standalone");

        // empty
        (PathBuf::new(), PathBuf::new())
    };

    // profiling output path
    let mut pass_timings = BTreeMap::new();
    let mut out_timings = BTreeMap::new();

    // set up wgpu
    let instance = Instance::new(&InstanceDescriptor {
        backends: backend.unwrap_or(Backends::all().with_env()),
        flags: if profile || renderdoc {
            InstanceFlags::debugging()
        } else {
            InstanceFlags::default()
        }
        .with_env(),
        ..InstanceDescriptor::default().with_env()
    });

    let adapter = pollster::block_on(instance.request_adapter(&RequestAdapterOptions {
        power_preference: wgpu::PowerPreference::HighPerformance,
        force_fallback_adapter: false,
        compatible_surface: None,
    }))
    .expect("No adapter");

    let (device, queue) = pollster::block_on(adapter.request_device(
        &DeviceDescriptor {
            label: None,
            required_features: Features::FLOAT32_FILTERABLE
                // needed to have blendable Rgba32Float
                | Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES
                // needed for easier layer setting
                | Features::PUSH_CONSTANTS
                // timestamp queries if we want to measure performance
                | if profile { Features::TIMESTAMP_QUERY | Features::TIMESTAMP_QUERY_INSIDE_PASSES } else { Features::empty() },
            required_limits: Limits {
                // needed for push constants
                // we only set one int here
                max_push_constant_size: 12,
                ..Limits::default()
            },
            memory_hints: MemoryHints::Performance,
        },
        None,
    ))
    .expect("no device");

    // print the GPU
    println!("GPU: {}", adapter.get_info().name);
    println!("Backend: {}", adapter.get_info().backend);
    println!("Info: {}", adapter.get_info().driver_info);

    // start capture
    if renderdoc {
        device.start_capture();
    }

    // query set, if needed
    let (query_set, query_buffer, query_staging) = if profile {
        let set = device.create_query_set(&QuerySetDescriptor {
            label: Some("profile set"),
            ty: QueryType::Timestamp,
            count: 2,
        });

        let buf = device.create_buffer(&BufferDescriptor {
            label: Some("profile buffer"),
            size: 16,
            usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC,
            mapped_at_creation: false,
        });

        let buf2 = device.create_buffer(&BufferDescriptor {
            label: Some("profile buffer"),
            size: 16,
            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
            mapped_at_creation: false,
        });

        (Some(set), Some(buf), Some(buf2))
    } else {
        (None, None, None)
    };

    // default fullscreen triangle
    let vertex_shader = device.create_shader_module(ShaderModuleDescriptor {
        label: Some("vertex shader"),
        source: wgpu::ShaderSource::Wgsl(VERTEX_SHADER.into()),
    });

    // set up default textures
    let default_buffer = device.create_texture_with_data(
        &queue,
        &TextureDescriptor {
            label: Some("default buffer"),
            size: Extent3d {
                width: 1,
                height: 1,
                depth_or_array_layers: 1,
            },
            mip_level_count: 1,
            sample_count: 1,
            dimension: wgpu::TextureDimension::D2,
            format: wgpu::TextureFormat::Rgba32Float,
            usage: TextureUsages::TEXTURE_BINDING,
            view_formats: &[TextureFormat::Rgba32Float],
        },
        wgpu::util::TextureDataOrder::LayerMajor,
        // black
        &[0; 16],
    );

    let default_volume = device.create_texture_with_data(
        &queue,
        &TextureDescriptor {
            label: Some("default buffer"),
            size: Extent3d {
                width: 1,
                height: 1,
                depth_or_array_layers: 1,
            },
            mip_level_count: 1,
            sample_count: 1,
            dimension: wgpu::TextureDimension::D3,
            format: wgpu::TextureFormat::Rgba32Float,
            usage: TextureUsages::TEXTURE_BINDING,
            view_formats: &[TextureFormat::Rgba32Float],
        },
        wgpu::util::TextureDataOrder::LayerMajor,
        // black
        &[0; 16],
    );

    // samplers
    // with interpolation
    let interp_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
        label: Some("interpolation sampler"),
        address_mode_u: wgpu::AddressMode::ClampToEdge,
        address_mode_v: wgpu::AddressMode::ClampToEdge,
        address_mode_w: wgpu::AddressMode::ClampToEdge,
        mag_filter: wgpu::FilterMode::Linear,
        min_filter: wgpu::FilterMode::Linear,
        mipmap_filter: wgpu::FilterMode::Linear,
        lod_min_clamp: 0.0,
        lod_max_clamp: 1.0,
        compare: None,
        anisotropy_clamp: 1,
        border_color: None,
    });

    // nearest neighbors
    let nearest_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
        label: Some("nearest sampler"),
        address_mode_u: wgpu::AddressMode::ClampToEdge,
        address_mode_v: wgpu::AddressMode::ClampToEdge,
        address_mode_w: wgpu::AddressMode::ClampToEdge,
        mag_filter: wgpu::FilterMode::Nearest,
        min_filter: wgpu::FilterMode::Nearest,
        mipmap_filter: wgpu::FilterMode::Nearest,
        lod_min_clamp: 0.0,
        lod_max_clamp: 1.0,
        compare: None,
        anisotropy_clamp: 1,
        border_color: None,
    });

    // uniform buffer
    let uniforms = device.create_buffer(&BufferDescriptor {
        label: Some("uniform buffer"),
        size: 1024 * 4,
        usage: BufferUsages::UNIFORM | BufferUsages::COPY_DST,
        mapped_at_creation: false,
    });

    // global uniform buffers
    let globals = device.create_buffer(&BufferDescriptor {
        label: Some("global uniform buffer"),
        size: 1024 * 4,
        usage: BufferUsages::UNIFORM | BufferUsages::COPY_DST,
        mapped_at_creation: false,
    });

    // bind group layout
    let buffer_entry = BindGroupLayoutEntry {
        binding: 0,
        count: None,
        visibility: ShaderStages::FRAGMENT,
        ty: wgpu::BindingType::Texture {
            sample_type: wgpu::TextureSampleType::Float { filterable: true },
            view_dimension: wgpu::TextureViewDimension::D2,
            multisampled: false,
        },
    };

    let volume_entry = BindGroupLayoutEntry {
        binding: 0,
        count: None,
        visibility: ShaderStages::FRAGMENT,
        ty: wgpu::BindingType::Texture {
            sample_type: wgpu::TextureSampleType::Float { filterable: true },
            view_dimension: wgpu::TextureViewDimension::D3,
            multisampled: false,
        },
    };

    let layout = device.create_bind_group_layout(&BindGroupLayoutDescriptor {
        label: Some("default layout"),
        entries: &[
            // buffers
            BindGroupLayoutEntry {
                binding: 0,
                ..buffer_entry
            },
            BindGroupLayoutEntry {
                binding: 1,
                ..buffer_entry
            },
            BindGroupLayoutEntry {
                binding: 2,
                ..buffer_entry
            },
            BindGroupLayoutEntry {
                binding: 3,
                ..buffer_entry
            },
            BindGroupLayoutEntry {
                binding: 4,
                ..buffer_entry
            },
            BindGroupLayoutEntry {
                binding: 5,
                ..buffer_entry
            },
            BindGroupLayoutEntry {
                binding: 6,
                ..buffer_entry
            },
            BindGroupLayoutEntry {
                binding: 7,
                ..buffer_entry
            },
            // volumes
            BindGroupLayoutEntry {
                binding: 8,
                ..volume_entry
            },
            BindGroupLayoutEntry {
                binding: 9,
                ..volume_entry
            },
            BindGroupLayoutEntry {
                binding: 10,
                ..volume_entry
            },
            BindGroupLayoutEntry {
                binding: 11,
                ..volume_entry
            },
            // uniform buffer
            BindGroupLayoutEntry {
                binding: 12,
                count: None,
                visibility: ShaderStages::VERTEX_FRAGMENT,
                ty: wgpu::BindingType::Buffer {
                    ty: wgpu::BufferBindingType::Uniform,
                    has_dynamic_offset: false,
                    min_binding_size: None,
                },
            },
            // samplers
            BindGroupLayoutEntry {
                binding: 13,
                count: None,
                visibility: ShaderStages::VERTEX_FRAGMENT,
                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
            },
            BindGroupLayoutEntry {
                binding: 14,
                count: None,
                visibility: ShaderStages::VERTEX_FRAGMENT,
                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
            },
            // global uniform buffer
            BindGroupLayoutEntry {
                binding: 15,
                count: None,
                visibility: ShaderStages::VERTEX_FRAGMENT,
                ty: wgpu::BindingType::Buffer {
                    ty: wgpu::BufferBindingType::Uniform,
                    has_dynamic_offset: false,
                    min_binding_size: None,
                },
            },
        ],
    });

    // pipeline layout
    let pipeline_layout = device.create_pipeline_layout(&PipelineLayoutDescriptor {
        label: Some("pipeline layout"),
        bind_group_layouts: &[&layout],
        push_constant_ranges: &[PushConstantRange {
            stages: ShaderStages::FRAGMENT,
            range: 0..12,
        }],
    });

    // global uniforms
    let mut buffer = Vec::new();
    for (_, uniform) in config.uniforms {
        // ensure alignment
        let align = if uniform.len() < 2 {
            4
        } else if uniform.len() == 2 {
            8
        } else {
            16
        };

        // keep adding 0 until we are aligned
        while buffer.len() & (align - 1) != 0 {
            buffer.push(0);
        }

        // add to the buffer
        for value in uniform.iter() {
            buffer.extend_from_slice(&value.to_ne_bytes());
        }
    }

    queue.write_buffer(&globals, 0, &buffer);

    // create the buffers
    let mut buffers: BTreeMap<String, Texture> = BTreeMap::new();
    let mut volumes: BTreeMap<String, Texture> = BTreeMap::new();

    for (name, config) in config.passes {
        current_item += 1;

        // shader code
        let shader_code = if config.path.ends_with(".wgsl") {
            wgpu::ShaderSource::Wgsl(
                if standalone {
                    let mut file = archive
                        .by_name(&config.path)
                        .expect(&format!("Failed to load shader {name}"));
                    let mut shader = String::new();
                    file.read_to_string(&mut shader)
                        .expect("Failed to read shader to string");
                    shader
                } else {
                    fs::read_to_string(base_path.join(config.path))
                        .expect(&format!("Failed to load shader {name}"))
                }
                .into(),
            )
        } else {
            let bytes = if standalone {
                let mut file = archive
                    .by_name(&config.path)
                    .expect(&format!("Failed to load shader {name}"));
                let mut shader = Vec::new();
                file.read_to_end(&mut shader)
                    .expect("Failed to read shader to string");
                shader
            } else {
                fs::read(base_path.join(config.path))
                    .expect(&format!("Failed to load shader {name}"))
            };
            wgpu::ShaderSource::SpirV(make_spirv_raw(&bytes).into_owned().into())
        };

        // load the shader
        let shader = device.create_shader_module(ShaderModuleDescriptor {
            label: Some(format!("buffer - {name}").leak()),
            source: shader_code,
        });

        // make the pipeline
        let pipeline = device.create_render_pipeline(&RenderPipelineDescriptor {
            label: Some(format!("buffer - pipeline - {name}").leak()),
            layout: Some(&pipeline_layout),
            vertex: VertexState {
                module: &vertex_shader,
                entry_point: Some("fullscreen_triangle"),
                compilation_options: PipelineCompilationOptions {
                    constants: &Default::default(),
                    zero_initialize_workgroup_memory: false,
                },
                buffers: &[],
            },
            primitive: PrimitiveState {
                topology: wgpu::PrimitiveTopology::TriangleList,
                strip_index_format: None,
                front_face: wgpu::FrontFace::Cw,
                cull_mode: None,
                unclipped_depth: false,
                polygon_mode: wgpu::PolygonMode::Fill,
                conservative: false,
            },
            depth_stencil: None,
            multisample: MultisampleState {
                count: 1,
                mask: !0,
                alpha_to_coverage_enabled: false,
            },
            fragment: Some(FragmentState {
                module: &shader,
                entry_point: None,
                compilation_options: PipelineCompilationOptions {
                    constants: &Default::default(),
                    zero_initialize_workgroup_memory: false,
                },
                targets: &[Some(ColorTargetState {
                    format: TextureFormat::Rgba32Float,
                    blend: None,
                    write_mask: ColorWrites::ALL,
                })],
            }),
            multiview: None,
            cache: None,
        });

        let bufs = config
            .buffers
            .iter()
            .map(|x| {
                buffers
                    .get(x)
                    .expect(&format!("Failed to get {x}"))
                    .create_view(&TextureViewDescriptor {
                        format: None,
                        label: None,
                        dimension: None,
                        usage: None,
                        aspect: wgpu::TextureAspect::All,
                        base_mip_level: 0,
                        mip_level_count: None,
                        base_array_layer: 0,
                        array_layer_count: None,
                    })
            })
            .chain(repeat_with(|| {
                default_buffer.create_view(&TextureViewDescriptor {
                    format: None,
                    label: None,
                    dimension: None,
                    usage: None,
                    aspect: wgpu::TextureAspect::All,
                    base_mip_level: 0,
                    mip_level_count: None,
                    base_array_layer: 0,
                    array_layer_count: None,
                })
            }))
            .take(8)
            .collect::<Vec<_>>();

        let vols = config
            .volumes
            .iter()
            .map(|x| {
                volumes
                    .get(x)
                    .expect(&format!("Failed to get {x}"))
                    .create_view(&TextureViewDescriptor {
                        format: None,
                        label: None,
                        dimension: None,
                        usage: None,
                        aspect: wgpu::TextureAspect::All,
                        base_mip_level: 0,
                        mip_level_count: None,
                        base_array_layer: 0,
                        array_layer_count: None,
                    })
            })
            .chain(repeat_with(|| {
                default_volume.create_view(&TextureViewDescriptor {
                    format: None,
                    label: None,
                    dimension: None,
                    usage: None,
                    aspect: wgpu::TextureAspect::All,
                    base_mip_level: 0,
                    mip_level_count: None,
                    base_array_layer: 0,
                    array_layer_count: None,
                })
            }))
            .take(4)
            .collect::<Vec<_>>();

        // create the bind group
        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
            label: None,
            layout: &layout,
            entries: &[
                // buffers
                BindGroupEntry {
                    binding: 0,
                    resource: wgpu::BindingResource::TextureView(&bufs[0]),
                },
                BindGroupEntry {
                    binding: 1,
                    resource: wgpu::BindingResource::TextureView(&bufs[1]),
                },
                BindGroupEntry {
                    binding: 2,
                    resource: wgpu::BindingResource::TextureView(&bufs[2]),
                },
                BindGroupEntry {
                    binding: 3,
                    resource: wgpu::BindingResource::TextureView(&bufs[3]),
                },
                BindGroupEntry {
                    binding: 4,
                    resource: wgpu::BindingResource::TextureView(&bufs[4]),
                },
                BindGroupEntry {
                    binding: 5,
                    resource: wgpu::BindingResource::TextureView(&bufs[5]),
                },
                BindGroupEntry {
                    binding: 6,
                    resource: wgpu::BindingResource::TextureView(&bufs[6]),
                },
                BindGroupEntry {
                    binding: 7,
                    resource: wgpu::BindingResource::TextureView(&bufs[7]),
                }, // volumes
                BindGroupEntry {
                    binding: 8,
                    resource: wgpu::BindingResource::TextureView(&vols[0]),
                },
                BindGroupEntry {
                    binding: 9,
                    resource: wgpu::BindingResource::TextureView(&vols[1]),
                },
                BindGroupEntry {
                    binding: 10,
                    resource: wgpu::BindingResource::TextureView(&vols[2]),
                },
                BindGroupEntry {
                    binding: 11,
                    resource: wgpu::BindingResource::TextureView(&vols[3]),
                },
                // uniforms
                BindGroupEntry {
                    binding: 12,
                    resource: wgpu::BindingResource::Buffer(BufferBinding {
                        buffer: &uniforms,
                        offset: 0,
                        size: None,
                    }),
                },
                // samplers
                BindGroupEntry {
                    binding: 13,
                    resource: wgpu::BindingResource::Sampler(&interp_sampler),
                },
                BindGroupEntry {
                    binding: 14,
                    resource: wgpu::BindingResource::Sampler(&nearest_sampler),
                },
                // global uniforms
                BindGroupEntry {
                    binding: 15,
                    resource: wgpu::BindingResource::Buffer(BufferBinding {
                        buffer: &globals,
                        offset: 0,
                        size: None,
                    }),
                },
            ],
        });

        // write the uniforms
        let mut buffer = Vec::new();
        for (_, uniform) in config.uniforms {
            // ensure alignment
            let align = if uniform.len() < 2 {
                4
            } else if uniform.len() == 2 {
                8
            } else {
                16
            };

            // keep adding 0 until we are aligned
            while buffer.len() & (align - 1) != 0 {
                buffer.push(0);
            }

            // add to the buffer
            for value in uniform.iter() {
                buffer.extend_from_slice(&value.to_ne_bytes());
            }
        }

        queue.write_buffer(&uniforms, 0, &buffer);

        // make the texture
        let texture = device.create_texture(&TextureDescriptor {
            label: Some(format!("{name}").leak()),
            size: Extent3d {
                width: config.width,
                height: config.height,
                depth_or_array_layers: 1,
            },
            mip_level_count: 1,
            sample_count: 1,
            dimension: TextureDimension::D2,
            format: TextureFormat::Rgba32Float,
            usage: TextureUsages::TEXTURE_BINDING
                | TextureUsages::RENDER_ATTACHMENT
                | TextureUsages::COPY_SRC,
            view_formats: &[TextureFormat::Rgba32Float],
        });

        // volume texture, if needed
        let volume_target = config.depth.map(|d| {
            device.create_texture(&TextureDescriptor {
                label: Some(format!("{name}").leak()),
                size: Extent3d {
                    width: config.width,
                    height: config.height,
                    depth_or_array_layers: d,
                },
                mip_level_count: 1,
                sample_count: 1,
                dimension: TextureDimension::D3,
                format: TextureFormat::Rgba32Float,
                usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST,
                view_formats: &[TextureFormat::Rgba32Float],
            })
        });

        // start the pipeline and render to the texture
        for layer in 0..config.depth.unwrap_or(1) {
            // indicate where we are
            stdout()
                .queue(Clear(ClearType::CurrentLine))
                .expect("Failed to clear")
                .queue(Print(format!(
                    "({current_item}/{total_items}) buffer {name}: layer {}/{}",
                    layer + 1,
                    config.depth.unwrap_or(1)
                )))
                .expect("Failed to print")
                .execute(MoveToColumn(0))
                .expect("Failed to move cursor");

            let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor {
                label: Some("encoder"),
            });

            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
                label: Some(format!("{name}: layer {layer}").leak()),
                color_attachments: &[Some(RenderPassColorAttachment {
                    view: &texture.create_view(&TextureViewDescriptor {
                        label: None,
                        format: None,
                        dimension: None,
                        usage: None,
                        aspect: wgpu::TextureAspect::All,
                        base_mip_level: 0,
                        mip_level_count: None,
                        base_array_layer: 0,
                        array_layer_count: None,
                    }),
                    resolve_target: None,
                    ops: wgpu::Operations {
                        load: wgpu::LoadOp::Clear(Color::BLACK),
                        store: wgpu::StoreOp::Store,
                    },
                })],
                depth_stencil_attachment: None,
                timestamp_writes: None,
                occlusion_query_set: None,
            });

            pass.set_blend_constant(Color::WHITE);
            pass.set_pipeline(&pipeline);
            pass.set_push_constants(
                ShaderStages::FRAGMENT,
                0,
                &push_const_bytes(layer, config.width, config.height),
            );
            pass.set_bind_group(0, &bind_group, &[]);

            if let Some(q) = &query_set {
                pass.write_timestamp(&q, 0);
            }

            pass.draw(0..3, 0..1);

            if let Some(q) = &query_set {
                pass.write_timestamp(&q, 1);
            }

            std::mem::drop(pass);

            // copy texture if needed
            if let Some(x) = &volume_target {
                encoder.copy_texture_to_texture(
                    texture.as_image_copy(),
                    TexelCopyTextureInfo {
                        texture: &x,
                        mip_level: 0,
                        origin: Origin3d {
                            x: 0,
                            y: 0,
                            z: layer,
                        },
                        aspect: wgpu::TextureAspect::All,
                    },
                    Extent3d {
                        width: config.width,
                        height: config.height,
                        depth_or_array_layers: 1,
                    },
                )
            }

            if let (Some(q), Some(b), Some(s)) = (&query_set, &query_buffer, &query_staging) {
                // resolve
                encoder.resolve_query_set(q, 0..2, b, 0);

                // copy our buffer
                encoder.copy_buffer_to_buffer(b, 0, s, 0, 16);
            }

            // run shader
            queue.submit([encoder.finish()]);

            // resolve queries
            let receiver_and_slice = if let Some(s) = &query_staging {
                // map our buffer too
                let slice = s.slice(..);
                let (sender, receiver) = channel();

                // send it over
                slice.map_async(wgpu::MapMode::Read, move |v| {
                    sender.send(v).expect("Failed to map buffer");
                });

                Some((receiver, slice))
            } else {
                None
            };

            // wait a bit, prevents the gpu from freezing
            device.poll(wgpu::MaintainBase::Wait).panic_on_timeout();

            // read the buffer
            if let (Some((r, s)), Some(b)) = (receiver_and_slice, &query_staging) {
                if let Ok(Ok(())) = r.try_recv() {
                    let data = s.get_mapped_range();
                    let start = u64::from_ne_bytes([
                        data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
                    ]);

                    let end = u64::from_ne_bytes([
                        data[8], data[9], data[10], data[11], data[12], data[13], data[14],
                        data[15],
                    ]);

                    std::mem::drop(data);
                    b.unmap();

                    let time = ((end - start) as f32 * queue.get_timestamp_period()) as u64;

                    // add the timing
                    if !pass_timings.contains_key(&format!("{name}")) {
                        pass_timings.insert(
                            format!("{name}"),
                            Timings {
                                count: 0,
                                time: 0,
                                pixels: config.width as u64
                                    * config.height as u64
                                    * config.depth.unwrap_or(1) as u64,
                            },
                        );
                    }
                    let timing = pass_timings.get_mut(&format!("{name}")).unwrap();

                    timing.count += 1;
                    timing.time += time;
                } else {
                    panic!("Failed to read buffer")
                }
            }
        }

        // add the texture to the buffer list
        if let Some(x) = volume_target {
            volumes.insert(name, x);
        } else {
            buffers.insert(name, texture);
        }
    }

    rayon::scope(|s| {
        // render output images
        for (name, config) in config.outputs {
            current_item += 1;
            // skip if it's complex
            if config.complex.unwrap_or(false) && !complex {
                // don't print this if standalone
                if !standalone {
                    println!("({current_item}/{total_items}) Skipping {name} (marked as complex, but --complex was not passed)");
                }

                continue;
            }

            // shader code
            let shader_code = if config.path.ends_with(".wgsl") {
                wgpu::ShaderSource::Wgsl(
                    if standalone {
                        let mut file = archive
                            .by_name(&config.path)
                            .expect(&format!("Failed to load shader {name}"));
                        let mut shader = String::new();
                        file.read_to_string(&mut shader)
                            .expect("Failed to read shader to string");
                        shader
                    } else {
                        fs::read_to_string(base_path.join(config.path))
                            .expect(&format!("Failed to load shader {name}"))
                    }
                    .into(),
                )
            } else {
                let bytes = if standalone {
                    let mut file = archive
                        .by_name(&config.path)
                        .expect(&format!("Failed to load shader {name}"));
                    let mut shader = Vec::new();
                    file.read_to_end(&mut shader)
                        .expect("Failed to read shader to string");
                    shader
                } else {
                    fs::read(base_path.join(config.path))
                        .expect(&format!("Failed to load shader {name}"))
                };
                wgpu::ShaderSource::SpirV(make_spirv_raw(&bytes).into_owned().into())
            };

            // load the shader
            let shader = device.create_shader_module(ShaderModuleDescriptor {
                label: Some(format!("output - {name}").leak()),
                source: shader_code,
            });

            // make the pipeline
            let pipeline = device.create_render_pipeline(&RenderPipelineDescriptor {
                label: Some(format!("output - pipeline - {name}").leak()),
                layout: Some(&pipeline_layout),
                vertex: VertexState {
                    module: &vertex_shader,
                    entry_point: Some("fullscreen_triangle"),
                    compilation_options: PipelineCompilationOptions {
                        constants: &Default::default(),
                        zero_initialize_workgroup_memory: false,
                    },
                    buffers: &[],
                },
                primitive: PrimitiveState {
                    topology: wgpu::PrimitiveTopology::TriangleList,
                    strip_index_format: None,
                    front_face: wgpu::FrontFace::Cw,
                    cull_mode: None,
                    unclipped_depth: false,
                    polygon_mode: wgpu::PolygonMode::Fill,
                    conservative: false,
                },
                depth_stencil: None,
                multisample: MultisampleState {
                    count: 1,
                    mask: !0,
                    alpha_to_coverage_enabled: false,
                },
                fragment: Some(FragmentState {
                    module: &shader,
                    entry_point: None,
                    compilation_options: PipelineCompilationOptions {
                        constants: &Default::default(),
                        zero_initialize_workgroup_memory: false,
                    },
                    targets: &[Some(ColorTargetState {
                        format: TextureFormat::Rgba32Float,
                        // disable blending if we are in standalone mode
                        // this should hopefully eliminate the overhead of blending
                        // when measuring performance
                        blend: if !standalone {
                            Some(BlendState {
                                color: BlendComponent {
                                    src_factor: wgpu::BlendFactor::Constant,
                                    dst_factor: wgpu::BlendFactor::One,
                                    operation: wgpu::BlendOperation::Add,
                                },
                                alpha: BlendComponent {
                                    src_factor: wgpu::BlendFactor::Constant,
                                    dst_factor: wgpu::BlendFactor::One,
                                    operation: wgpu::BlendOperation::Add,
                                },
                            })
                        } else {
                            None
                        },
                        write_mask: ColorWrites::ALL,
                    })],
                }),
                multiview: None,
                cache: None,
            });

            let bufs = config
                .buffers
                .iter()
                .map(|x| {
                    buffers
                        .get(x)
                        .expect(&format!("Failed to get {x}"))
                        .create_view(&TextureViewDescriptor {
                            format: None,
                            label: None,
                            dimension: None,
                            usage: None,
                            aspect: wgpu::TextureAspect::All,
                            base_mip_level: 0,
                            mip_level_count: None,
                            base_array_layer: 0,
                            array_layer_count: None,
                        })
                })
                .chain(repeat_with(|| {
                    default_buffer.create_view(&TextureViewDescriptor {
                        format: None,
                        label: None,
                        dimension: None,
                        usage: None,
                        aspect: wgpu::TextureAspect::All,
                        base_mip_level: 0,
                        mip_level_count: None,
                        base_array_layer: 0,
                        array_layer_count: None,
                    })
                }))
                .take(8)
                .collect::<Vec<_>>();

            let vols = config
                .volumes
                .iter()
                .map(|x| {
                    volumes
                        .get(x)
                        .expect(&format!("Failed to get {x}"))
                        .create_view(&TextureViewDescriptor {
                            format: None,
                            label: None,
                            dimension: None,
                            usage: None,
                            aspect: wgpu::TextureAspect::All,
                            base_mip_level: 0,
                            mip_level_count: None,
                            base_array_layer: 0,
                            array_layer_count: None,
                        })
                })
                .chain(repeat_with(|| {
                    default_volume.create_view(&TextureViewDescriptor {
                        format: None,
                        label: None,
                        dimension: None,
                        usage: None,
                        aspect: wgpu::TextureAspect::All,
                        base_mip_level: 0,
                        mip_level_count: None,
                        base_array_layer: 0,
                        array_layer_count: None,
                    })
                }))
                .take(4)
                .collect::<Vec<_>>();

            // create the bind group
            let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
                label: None,
                layout: &layout,
                entries: &[
                    // buffers
                    BindGroupEntry {
                        binding: 0,
                        resource: wgpu::BindingResource::TextureView(&bufs[0]),
                    },
                    BindGroupEntry {
                        binding: 1,
                        resource: wgpu::BindingResource::TextureView(&bufs[1]),
                    },
                    BindGroupEntry {
                        binding: 2,
                        resource: wgpu::BindingResource::TextureView(&bufs[2]),
                    },
                    BindGroupEntry {
                        binding: 3,
                        resource: wgpu::BindingResource::TextureView(&bufs[3]),
                    },
                    BindGroupEntry {
                        binding: 4,
                        resource: wgpu::BindingResource::TextureView(&bufs[4]),
                    },
                    BindGroupEntry {
                        binding: 5,
                        resource: wgpu::BindingResource::TextureView(&bufs[5]),
                    },
                    BindGroupEntry {
                        binding: 6,
                        resource: wgpu::BindingResource::TextureView(&bufs[6]),
                    },
                    BindGroupEntry {
                        binding: 7,
                        resource: wgpu::BindingResource::TextureView(&bufs[7]),
                    }, // volumes
                    BindGroupEntry {
                        binding: 8,
                        resource: wgpu::BindingResource::TextureView(&vols[0]),
                    },
                    BindGroupEntry {
                        binding: 9,
                        resource: wgpu::BindingResource::TextureView(&vols[1]),
                    },
                    BindGroupEntry {
                        binding: 10,
                        resource: wgpu::BindingResource::TextureView(&vols[2]),
                    },
                    BindGroupEntry {
                        binding: 11,
                        resource: wgpu::BindingResource::TextureView(&vols[3]),
                    },
                    // uniforms
                    BindGroupEntry {
                        binding: 12,
                        resource: wgpu::BindingResource::Buffer(BufferBinding {
                            buffer: &uniforms,
                            offset: 0,
                            size: None,
                        }),
                    },
                    // samplers
                    BindGroupEntry {
                        binding: 13,
                        resource: wgpu::BindingResource::Sampler(&interp_sampler),
                    },
                    BindGroupEntry {
                        binding: 14,
                        resource: wgpu::BindingResource::Sampler(&nearest_sampler),
                    },
                    // global uniforms
                    BindGroupEntry {
                        binding: 15,
                        resource: wgpu::BindingResource::Buffer(BufferBinding {
                            buffer: &globals,
                            offset: 0,
                            size: None,
                        }),
                    },
                ],
            });

            // write the uniforms
            let mut buffer = Vec::new();
            for (_, uniform) in config.uniforms {
                // ensure alignment
                let align = if uniform.len() < 2 {
                    4
                } else if uniform.len() == 2 {
                    8
                } else {
                    16
                };

                // keep adding 0 until we are aligned
                while buffer.len() & (align - 1) != 0 {
                    buffer.push(0);
                }

                // add to the buffer
                for value in uniform.iter() {
                    buffer.extend_from_slice(&value.to_ne_bytes());
                }
            }

            queue.write_buffer(&uniforms, 0, &buffer);

            // make the texture
            let texture = device.create_texture(&TextureDescriptor {
                label: Some(format!("{name}").leak()),
                size: Extent3d {
                    width: config.width,
                    height: config.height,
                    depth_or_array_layers: 1,
                },
                mip_level_count: 1,
                sample_count: 1,
                dimension: TextureDimension::D2,
                format: TextureFormat::Rgba32Float,
                usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::COPY_SRC,
                view_formats: &[TextureFormat::Rgba32Float],
            });

            // start the pipeline and render to the texture

            // number of passes here, if in standalone performance mode
            // run with more to do a warm start
            let num_passes = if standalone {
                NUM_PERF_PASSES
            } else {
                config.passes.unwrap_or(1)
            };
            let inv_passes = 1.0 / num_passes as f64;
            for iter in 0..num_passes {
                // indicate where we are
                stdout()
                    .queue(Clear(ClearType::CurrentLine))
                    .expect("Failed to clear")
                    .queue(Print(format!(
                        "({current_item}/{total_items}) output {name}: pass {}/{}",
                        iter + 1,
                        num_passes
                    )))
                    .expect("Failed to print")
                    .execute(MoveToColumn(0))
                    .expect("Failed to move cursor");

                let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor {
                    label: Some("encoder"),
                });

                let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
                    label: Some(format!("output {name}: pass {iter}").leak()),
                    color_attachments: &[Some(RenderPassColorAttachment {
                        view: &texture.create_view(&TextureViewDescriptor {
                            label: None,
                            format: None,
                            dimension: None,
                            usage: None,
                            aspect: wgpu::TextureAspect::All,
                            base_mip_level: 0,
                            mip_level_count: None,
                            base_array_layer: 0,
                            array_layer_count: None,
                        }),
                        resolve_target: None,
                        ops: wgpu::Operations {
                            load: wgpu::LoadOp::Load,
                            store: wgpu::StoreOp::Store,
                        },
                    })],
                    depth_stencil_attachment: None,
                    timestamp_writes: None,
                    occlusion_query_set: None,
                });

                pass.set_blend_constant(Color {
                    r: inv_passes,
                    g: inv_passes,
                    b: inv_passes,
                    a: inv_passes,
                });
                pass.set_pipeline(&pipeline);
                pass.set_push_constants(
                    ShaderStages::FRAGMENT,
                    0,
                    &push_const_bytes(iter, config.width, config.height),
                );
                pass.set_bind_group(0, &bind_group, &[]);

                if let Some(q) = &query_set {
                    pass.write_timestamp(&q, 0);
                }

                pass.draw(0..3, 0..1);

                if let Some(q) = &query_set {
                    pass.write_timestamp(&q, 1);
                }

                std::mem::drop(pass);

                if let (Some(q), Some(b), Some(s)) = (&query_set, &query_buffer, &query_staging) {
                    // resolve
                    encoder.resolve_query_set(q, 0..2, b, 0);

                    // copy our buffer
                    encoder.copy_buffer_to_buffer(b, 0, s, 0, 16);
                }

                // run shader
                queue.submit([encoder.finish()]);

                // resolve queries
                let receiver_and_slice = if let Some(s) = &query_staging {
                    // map our buffer too
                    let slice = s.slice(..);
                    let (sender, receiver) = channel();

                    // send it over
                    slice.map_async(wgpu::MapMode::Read, move |v| {
                        sender.send(v).expect("Failed to map buffer");
                    });

                    Some((receiver, slice))
                } else {
                    None
                };

                // wait a bit, prevents the gpu from freezing
                device.poll(wgpu::MaintainBase::Wait).panic_on_timeout();

                // read the buffer
                if let (Some((r, s)), Some(b)) = (receiver_and_slice, &query_staging) {
                    if let Ok(Ok(())) = r.try_recv() {
                        let data = s.get_mapped_range();
                        let start = u64::from_ne_bytes([
                            data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
                        ]);

                        let end = u64::from_ne_bytes([
                            data[8], data[9], data[10], data[11], data[12], data[13], data[14],
                            data[15],
                        ]);

                        std::mem::drop(data);
                        b.unmap();

                        let time = ((end - start) as f32 * queue.get_timestamp_period()) as u64;

                        // add the timing, but only if we are at the right iteration
                        // this ensures a warm start
                        if !standalone || iter >= NUM_PERF_PASSES / 2 {
                            if !out_timings.contains_key(&format!("{name}")) {
                                out_timings.insert(
                                    format!("{name}"),
                                    Timings {
                                        count: 0,
                                        time: 0,
                                        pixels: config.width as u64 * config.height as u64,
                                    },
                                );
                            }
                            let timing = out_timings.get_mut(&format!("{name}")).unwrap();

                            timing.count += 1;
                            timing.time += time;
                        }
                    } else {
                        panic!("Failed to read buffer")
                    }
                }
            }

            // stop here, as we don't need to output anything
            if standalone {
                continue;
            }

            // staging buffer to read from
            let staging = device.create_buffer(&BufferDescriptor {
                label: Some(format!("staging {name}").leak()),
                size: config.width as u64 * config.height as u64 * 16,
                usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
                mapped_at_creation: false,
            });

            // new encoder, copy to the staging buffer with this
            let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor {
                label: Some("texture copier"),
            });

            encoder.copy_texture_to_buffer(
                texture.as_image_copy(),
                TexelCopyBufferInfo {
                    buffer: &staging,
                    layout: TexelCopyBufferLayout {
                        offset: 0,
                        bytes_per_row: Some(config.width as u32 * 16),
                        rows_per_image: None,
                    },
                },
                Extent3d {
                    width: config.width,
                    height: config.height,
                    depth_or_array_layers: 1,
                },
            );

            queue.submit([encoder.finish()]);

            // map the buffer
            let slice = staging.slice(..);
            let (sender, receiver) = channel();

            // send it over, once mapped, we can read it
            slice.map_async(wgpu::MapMode::Read, move |v| {
                sender.send(v).expect("Failed to map buffer")
            });

            // wait for all GPU operations to be done so we can read our buffer
            device.poll(wgpu::MaintainBase::wait()).panic_on_timeout();

            // read our buffers
            if let Ok(Ok(())) = receiver.try_recv() {
                // collect as bytes
                let data = slice
                    .get_mapped_range()
                    .chunks_exact(4)
                    .map(|x| f32::from_ne_bytes([x[0], x[1], x[2], x[3]]))
                    .map(|x| x.max(0.0))
                    .collect::<Vec<f32>>();

                // to image
                let mut image = Rgba32FImage::from_raw(config.width, config.height, data.clone())
                    .expect("Failed to make image");

                let exts = extensions.clone().into_iter();
                let output = output.clone();
                let task = move || {
                    // save the image to our output path
                    for ext in exts {
                        if ext == "exr" {
                            // save full range f32 image
                            image
                                .save_with_format(
                                    output.join(format!("{name}.{ext}")),
                                    image::ImageFormat::OpenExr,
                                )
                                .expect("Failed to save image");
                        } else if ext == "npy" {
                            let mut npy = Vec::new();

                            // header data
                            let mut header = format!(
                            "{{ 'descr': '<f4', 'fortran_order': False, 'shape': ({}, {}, 4), }}",
                            config.height, config.width
                        );

                            // pad to 64 bytes divisible, without the newline
                            while (7 + header.len()) % 64 != 0 {
                                header.push('\x20');
                            }

                            // see https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html
                            // header
                            npy.extend_from_slice(b"\x93NUMPY"); // magic number
                            npy.extend(b"\x03\x00"); // major version (3) and minor version (0)
                            npy.extend((header.len() as u32 + 1).to_le_bytes()); // header length
                            npy.extend(header.as_bytes()); // header
                            npy.extend(b"\n"); // newline ending the header

                            // actual data
                            npy.extend(data.as_bytes());

                            // save
                            fs::write(output.join(format!("{name}.{ext}")), npy)
                                .expect("Failed to save numpy array");
                        } else {
                            // convert to nonlinear sRGB, as image expects that
                            for pixel in image.pixels_mut() {
                                // simple tonemap
                                let alpha = pixel[3];
                                *pixel = pixel.map(|x| 1.0 - (-x).exp());
                                pixel[3] = alpha;
                                // linear to nonlinear sRGB
                                *pixel = pixel.map(|x| x.powf(1.0 / 2.2));
                                // clamp
                                *pixel = pixel.map(|x| x.max(0.0).min(1.0));
                            }

                            // convert to rgba u8 to not crash on encoding to something like png
                            let converted: RgbaImage = image.convert();
                            converted
                                .save(output.join(format!("{name}.{ext}")))
                                .expect("Failed to save image");
                        }
                    }
                };

                s.spawn(|_| task());
            } else {
                panic!("Failed to read buffer");
            }
        }
    });

    // done, end capture
    if renderdoc {
        device.stop_capture();
    }

    // clear for nicer printing
    stdout()
        .execute(Clear(ClearType::CurrentLine))
        .expect("Failed to clear");

    // write out the timings
    if profile || standalone {
        let mut file = Cursor::new(Vec::new());

        // gpu
        write!(
            file,
            "[gpu]\nname = {:?}\nbackend = {:?}\ndriver = {:?}\n\n",
            adapter.get_info().name,
            adapter.get_info().backend.to_str(),
            adapter.get_info().driver_info
        )
        .unwrap();

        write!(file, "# times are in nanoseconds\n").unwrap();

        for (name, timings) in pass_timings {
            write!(
                file,
                "[passes.{}]\ntotal = {}\naverage = {}\ncount = {}\nper-pixel = {}\n\n",
                name,
                timings.time,
                timings.time / timings.count as u64,
                timings.count,
                timings.time / timings.pixels
            )
            .unwrap();
        }

        for (name, timings) in out_timings {
            write!(
                file,
                "[outputs.{}]\ntotal = {}\naverage = {}\ncount = {}\nper-pixel = {}\n\n",
                name,
                timings.time,
                timings.time / timings.count as u64,
                timings.count,
                timings.time / timings.pixels,
            )
            .unwrap();
        }

        if !standalone {
            // write out
            fs::write(output.join("timings.toml"), file.get_ref())
                .expect("Failed to create timings file");

            println!(
                "Written timings to `{}`",
                output.join("timings.toml").to_string_lossy()
            );
        } else {
            // copy to clipboard
            let mut clip = Clipboard::new().expect("Failed to get clipboard");
            if clip
                .set_text(String::from_utf8_lossy(file.get_ref()))
                .is_err()
            {
                println!("{}", String::from_utf8_lossy(file.get_ref()));
            } else {
                println!("Timings copied to clipboard");
            }

            // wait for input so we can paste
            let mut buf = String::new();
            println!("You may now close the program (ctrl-d)");
            stdin().read_line(&mut buf).expect("failed to read stdio");
        }
    }
}
