From 4b44cfba678dc5fe8c4ee6987155004b80d2c98b Mon Sep 17 00:00:00 2001 From: Martijn Gerritsen Date: Mon, 8 Dec 2025 16:43:09 +0100 Subject: [PATCH] aoc3 --- src/aoc/aoc_3.rs | 67 +++++++++++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 23 deletions(-) diff --git a/src/aoc/aoc_3.rs b/src/aoc/aoc_3.rs index 1195e4e..7524cfb 100644 --- a/src/aoc/aoc_3.rs +++ b/src/aoc/aoc_3.rs @@ -1,3 +1,4 @@ +use std::simd::prelude::*; pub fn solve_p1(input: &[u8]) -> u32 { let mut answer = 0; let mut i = 1; @@ -36,37 +37,58 @@ pub fn solve_p1(input: &[u8]) -> u32 { pub fn solve_p1_fast(input: &[u8]) -> u32 { let mut answer = 0; - let mut i = 1; - let len = input.len(); + let size = input.len()/100 -1; + //let size = 1; + const LANES: usize = 33; + let mut idx = 0; + type SimdVec = Simd; + for j in 0..size { + let line = &input[idx..idx+100]; + //println!("{:?}", line); + for i in (1..=9) { + let target = SimdVec::splat(58-i); - let mut num = input[0]; - let mut num2 = input[1]; - while i < len { - let mut j = i; - num = input[j-1]; - num2 = input[j]; + let (prefix, chunks, suffix) = line.as_simd::(); - while input[j+1] != 10 { + // 4. Process the main SIMD chunks + let mask :u128 = (chunks[0].simd_eq(target).to_bitmask() as u128) | (chunks[1].simd_eq(target).to_bitmask() as u128) << LANES | (chunks[2].simd_eq(target).to_bitmask() as u128) << 2*LANES ; + let count = mask.count_ones() ; + // println!("{:?} ", count); - if input[j] > num { - num = input[j]; - num2 = input[j + 1]; - } else if input[j] > num2 { - num2 = input[j]; - } - j +=1; - if j+2 == len { + if count >= 2 { + answer += ((10 * (10 - i)) + 10 -i) as u32; + break; + } else if count == 1 { + if 58-i == suffix[0] { + answer += ((10 * (10-i)) + suffix[0] - 48) as u32; + break; + } + if (58-i)-1 == suffix[0] { + answer += ((10 * (10-i)) + suffix[0] - 48) as u32; + break; + } + let idx = mask.trailing_zeros() as usize +1; + //println!("{:?}, {}, {}", line, idx, 58-i); + let line = &line[idx..100]; + let mut num = suffix[0]; + //println!("{:?}", line); + for v in line.iter() { + if v > &num { + num = *v; + } + } + //println!("{}", num); + answer += (((10 - i) * 10) + num -48) as u32; break; } } + idx += 101; - if input[j] > num2 { - num2 =input[j ]; - } - answer += (((num - 48) * 10) + num2 - 48) as u32; + // 5. Handle the remaining elements (head and tail) via standard scalar iteration + // These are the parts of the array that didn't fit into a perfect 32-byte chunk - i = j + 3; + //println!("nines {}", count + scalar_check(prefix) + scalar_check(suffix)); } answer @@ -103,7 +125,6 @@ pub fn solve_p2(input: &[u8]) -> u64 { pub fn solve_p2_fast(input: &[u8]) -> u64 { let mut answer = 0; - for line in input.split(|&x| x == 10) { let mut end = line.len() - 12; let total = 12;