use anyhow::Result;
use bytemuck;
use cgmath::{Deg, Matrix4, Point2, Vector3};
use std::{iter, mem};
use wgpu::{self, util::DeviceExt};
use winit::{self, window::Window};

use super::texturearray::TextureArray;
use crate::Sprite;

pub struct GPUState {
	device: wgpu::Device,
	config: wgpu::SurfaceConfiguration,
	surface: wgpu::Surface,
	queue: wgpu::Queue,

	pub window: Window,
	pub size: winit::dpi::PhysicalSize<u32>,

	render_pipeline: wgpu::RenderPipeline,

	vertex_buffer: wgpu::Buffer,
	index_buffer: wgpu::Buffer,
	texture_array: TextureArray,
	instance_buffer: wgpu::Buffer,
}

#[rustfmt::skip]
const OPENGL_TO_WGPU_MATRIX: Matrix4<f32> = Matrix4::new(
	1.0, 0.0, 0.0, 0.0,
	0.0, 1.0, 0.0, 0.0,
	0.0, 0.0, 0.5, 0.5,
	0.0, 0.0, 0.0, 1.0,
);

struct Instance {
	transform: Transform,
	texture_index: u32,
}
impl Instance {
	fn to_raw(&self) -> InstanceRaw {
		InstanceRaw {
			model: (self.transform.build_view_projection_matrix()).into(),
			texture_index: self.texture_index,
		}
	}
}

#[repr(C)]
#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
struct InstanceRaw {
	model: [[f32; 4]; 4],
	texture_index: u32,
}

impl InstanceRaw {
	fn get_size() -> u64 {
		20
	}

	fn desc() -> wgpu::VertexBufferLayout<'static> {
		wgpu::VertexBufferLayout {
			array_stride: mem::size_of::<InstanceRaw>() as wgpu::BufferAddress,
			// We need to switch from using a step mode of Vertex to Instance
			// This means that our shaders will only change to use the next
			// instance when the shader starts processing a new instance
			step_mode: wgpu::VertexStepMode::Instance,
			attributes: &[
				// A mat4 takes up 4 vertex slots as it is technically 4 vec4s. We need to define a slot
				// for each vec4. We'll have to reassemble the mat4 in the shader.
				wgpu::VertexAttribute {
					offset: 0,
					// While our vertex shader only uses locations 0, and 1 now, in later tutorials, we'll
					// be using 2, 3, and 4, for Vertex. We'll start at slot 5, not conflict with them later
					shader_location: 5,
					format: wgpu::VertexFormat::Float32x4,
				},
				wgpu::VertexAttribute {
					offset: mem::size_of::<[f32; 4]>() as wgpu::BufferAddress,
					shader_location: 6,
					format: wgpu::VertexFormat::Float32x4,
				},
				wgpu::VertexAttribute {
					offset: mem::size_of::<[f32; 8]>() as wgpu::BufferAddress,
					shader_location: 7,
					format: wgpu::VertexFormat::Float32x4,
				},
				wgpu::VertexAttribute {
					offset: mem::size_of::<[f32; 12]>() as wgpu::BufferAddress,
					shader_location: 8,
					format: wgpu::VertexFormat::Float32x4,
				},
				wgpu::VertexAttribute {
					offset: mem::size_of::<[f32; 16]>() as wgpu::BufferAddress,
					shader_location: 9,
					format: wgpu::VertexFormat::Uint32,
				},
			],
		}
	}
}

struct Transform {
	pos: Point2<f32>,
	aspect: f32, // width / height
	scale: f32,
	rotate: f32, // Around this object's center, in degrees measured ccw from vertical
}

impl Transform {
	fn build_view_projection_matrix(&self) -> Matrix4<f32> {
		// Apply aspect ratio and scale
		let mut scale = Matrix4::from_nonuniform_scale(1.0, 1.0 / self.aspect, 1.0);
		scale = scale * Matrix4::from_scale(self.scale);

		// Our mesh starts at (0, 0), so this will rotate around the object's center.
		// Note that we translate AFTER scaling.
		let rotate = Matrix4::from_angle_z(Deg { 0: self.rotate });

		let translate = Matrix4::from_translation(Vector3 {
			x: self.pos.x,
			y: self.pos.y,
			z: 0.0,
		});

		// Order matters!
		// These are applied right-to-left
		return OPENGL_TO_WGPU_MATRIX * translate * rotate * scale;
	}
}

// Datatype for vertex buffer
#[repr(C)]
#[derive(Copy, Clone, Debug, bytemuck::Pod, bytemuck::Zeroable)]
struct Vertex {
	position: [f32; 3],
	tex_coords: [f32; 2],
}

impl Vertex {
	fn desc() -> wgpu::VertexBufferLayout<'static> {
		wgpu::VertexBufferLayout {
			array_stride: mem::size_of::<Vertex>() as wgpu::BufferAddress,
			step_mode: wgpu::VertexStepMode::Vertex,
			attributes: &[
				wgpu::VertexAttribute {
					offset: 0,
					shader_location: 0,
					format: wgpu::VertexFormat::Float32x3,
				},
				wgpu::VertexAttribute {
					offset: mem::size_of::<[f32; 3]>() as wgpu::BufferAddress,
					shader_location: 1,
					format: wgpu::VertexFormat::Float32x2,
				},
			],
		}
	}
}

// This is centered at 0,0 intentionally,
// so scaling works properly.
const VERTICES: &[Vertex] = &[
	Vertex {
		position: [-0.5, 0.5, 0.0],
		tex_coords: [0.0, 0.0],
	},
	Vertex {
		position: [0.5, 0.5, 0.0],
		tex_coords: [1.0, 0.0],
	},
	Vertex {
		position: [0.5, -0.5, 0.0],
		tex_coords: [1.0, 1.0],
	},
	Vertex {
		position: [-0.5, -0.5, 0.0],
		tex_coords: [0.0, 1.0],
	},
];

const INDICES: &[u16] = &[0, 3, 2, 0, 2, 1];

impl GPUState {
	// We can draw at most this many sprites on the screen.
	// TODO: compile-time option
	pub const SPRITE_LIMIT: u64 = 100;

	pub async fn new(window: Window) -> Result<Self> {
		let size = window.inner_size();

		let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
			backends: wgpu::Backends::all(),
			..Default::default()
		});

		let surface = unsafe { instance.create_surface(&window) }.unwrap();

		// Basic setup
		let device;
		let queue;
		let config;

		{
			let adapter = instance
				.request_adapter(&wgpu::RequestAdapterOptions {
					power_preference: wgpu::PowerPreference::default(),
					compatible_surface: Some(&surface),
					force_fallback_adapter: false,
				})
				.await
				.unwrap();

			(device, queue) = adapter
				.request_device(
					&wgpu::DeviceDescriptor {
						features: wgpu::Features::TEXTURE_BINDING_ARRAY | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
						// We may need limits if we compile for wasm
						limits: wgpu::Limits::default(),
						label: Some("gpu device"),
					},
					None,
				)
				.await
				.unwrap();

			// Assume sRGB
			let surface_caps = surface.get_capabilities(&adapter);
			let surface_format = surface_caps
				.formats
				.iter()
				.copied()
				.filter(|f| f.is_srgb())
				.filter(|f| f.has_stencil_aspect())
				.next()
				.unwrap_or(surface_caps.formats[0]);

			config = wgpu::SurfaceConfiguration {
				usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
				format: surface_format,
				width: size.width,
				height: size.height,
				present_mode: surface_caps.present_modes[0],
				alpha_mode: surface_caps.alpha_modes[0],
				view_formats: vec![],
			};

			surface.configure(&device, &config);
		}

		// Load textures
		let texture_array = TextureArray::new(&device, &queue)?;

		// Render pipeline
		let render_pipeline;
		let render_pipeline_layout;

		{
			let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
				label: Some("sprite shader"),
				source: wgpu::ShaderSource::Wgsl(
					include_str!(concat!(
						env!("CARGO_MANIFEST_DIR"),
						"/src/render/shaders/",
						"shader.wgsl"
					))
					.into(),
				),
			});

			render_pipeline_layout =
				device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
					label: Some("render pipeline layout"),
					bind_group_layouts: &[&texture_array.bind_group_layout],
					push_constant_ranges: &[],
				});

			render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
				label: Some("render pipeline"),
				layout: Some(&render_pipeline_layout),
				vertex: wgpu::VertexState {
					module: &shader,
					entry_point: "vertex_shader_main",
					buffers: &[Vertex::desc(), InstanceRaw::desc()],
				},
				fragment: Some(wgpu::FragmentState {
					module: &shader,
					entry_point: "fragment_shader_main",
					targets: &[Some(wgpu::ColorTargetState {
						format: config.format,
						blend: Some(wgpu::BlendState::ALPHA_BLENDING),
						write_mask: wgpu::ColorWrites::ALL,
					})],
				}),

				primitive: wgpu::PrimitiveState {
					topology: wgpu::PrimitiveTopology::TriangleList,
					strip_index_format: None,
					front_face: wgpu::FrontFace::Ccw,
					cull_mode: Some(wgpu::Face::Back),
					polygon_mode: wgpu::PolygonMode::Fill,
					unclipped_depth: false,
					conservative: false,
				},

				depth_stencil: None,
				multisample: wgpu::MultisampleState {
					count: 1,
					mask: !0,
					alpha_to_coverage_enabled: false,
				},
				multiview: None,
			});
		}

		let vertex_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
			label: Some("vertex buffer"),
			contents: bytemuck::cast_slice(VERTICES),
			usage: wgpu::BufferUsages::VERTEX,
		});

		let index_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
			label: Some("vertex index buffer"),
			contents: bytemuck::cast_slice(INDICES),
			usage: wgpu::BufferUsages::INDEX,
		});

		let instance_buffer = device.create_buffer(&wgpu::BufferDescriptor {
			label: Some("instance buffer"),
			usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
			size: InstanceRaw::get_size() * Self::SPRITE_LIMIT,
			mapped_at_creation: false,
		});

		return Ok(Self {
			surface,
			device,
			queue,
			config,
			size,
			window,
			render_pipeline,
			vertex_buffer,
			index_buffer,
			instance_buffer,
			texture_array,
		});
	}

	pub fn window(&self) -> &Window {
		&self.window
	}

	pub fn resize(&mut self, new_size: winit::dpi::PhysicalSize<u32>) {
		if new_size.width > 0 && new_size.height > 0 {
			self.size = new_size;
			self.config.width = new_size.width;
			self.config.height = new_size.height;
			self.surface.configure(&self.device, &self.config);
		}
	}

	pub fn update(&mut self) {}

	pub fn render(&mut self, sprites: &Vec<Sprite>) -> Result<(), wgpu::SurfaceError> {
		let output = self.surface.get_current_texture()?;
		let view = output
			.texture
			.create_view(&wgpu::TextureViewDescriptor::default());

		let mut encoder = self
			.device
			.create_command_encoder(&wgpu::CommandEncoderDescriptor {
				label: Some("sprite render encoder"),
			});

		let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
			label: Some("sprite render pass"),

			color_attachments: &[Some(wgpu::RenderPassColorAttachment {
				view: &view,
				resolve_target: None,
				ops: wgpu::Operations {
					load: wgpu::LoadOp::Clear(wgpu::Color {
						r: 0.0,
						g: 0.0,
						b: 0.0,
						a: 1.0,
					}),
					store: wgpu::StoreOp::Store,
				},
			})],
			depth_stencil_attachment: None,
			occlusion_query_set: None,
			timestamp_writes: None,
		});

		// Correct for screen aspect ratio
		// (it may not be square!)
		let screen_aspect = self.size.width as f32 / self.size.height as f32;

		// TODO: warning when too many sprites are drawn.
		let mut instances: Vec<Instance> = Vec::new();
		for s in sprites {
			let mut pos: Point2<f32> = (s.position.0 as f32, s.position.1 as f32).into();

			// TODO: dynamic
			pos.x /= 400.0;
			pos.y /= 400.0;

			let texture = self.texture_array.get_texture(&s.name[..]);

			instances.push(Instance {
				transform: Transform {
					pos,
					aspect: texture.aspect / screen_aspect,
					scale: 0.25,
					rotate: s.angle,
				},
				texture_index: texture.index,
			})
		}

		// Enforce sprite limit
		if sprites.len() as u64 >= Self::SPRITE_LIMIT {
			// TODO: no panic, handle this better.
			panic!("Sprite limit exceeded!")
		}

		// Write new sprite data to buffer
		let instance_data: Vec<_> = instances.iter().map(Instance::to_raw).collect();
		self.queue.write_buffer(
			&self.instance_buffer,
			0,
			bytemuck::cast_slice(&instance_data),
		);

		render_pass.set_pipeline(&self.render_pipeline);
		render_pass.set_bind_group(0, &self.texture_array.bind_group, &[]);
		render_pass.set_vertex_buffer(0, self.vertex_buffer.slice(..));
		render_pass.set_vertex_buffer(1, self.instance_buffer.slice(..));
		render_pass.set_index_buffer(self.index_buffer.slice(..), wgpu::IndexFormat::Uint16);
		render_pass.draw_indexed(0..INDICES.len() as u32, 0, 0..instances.len() as _);

		// begin_render_pass borrows encoder mutably, so we can't call finish()
		// without dropping this variable.
		drop(render_pass);

		self.queue.submit(iter::once(encoder.finish()));
		output.present();

		Ok(())
	}
}