The halo2 from taikoxyz

`getFftData` only works for `2^k` or `2^extended_k` right now

If you want to use #9 (fft mem optimization) with the recursive FFT, you need to do FFT for lengths between 2^k and 2^extended_k. Right now it will give you erroneous FFT data in between.

I fixed this in my dev branch by just making domain.fft_data into a HashMap since usually k..=extended_k is not that big.

[Refactor] Followup on prover memory optimization PR

As discussed today #6 will be merged and the review items will be logged so that in 3~4 weeks they can be revisited to improve the code.

Some helpful background to understand what those advice and isntance columns mean:

https://learn.0xparc.org/materials/halo2/learning-group-1/introduction/
On the circuits layout at 45min - https://youtu.be/W_zlH2mmtZA?t=2729

Original idea;

zcash#427

Tasks:

Document the ConstraintCluster purpose

halo2/halo2_proofs/src/plonk/evaluation.rs

Lines 181 to 197 in 9eaccbb

 #[derive(Clone, Default, Debug)] 

 struct ConstraintCluster<C: CurveAffine> { 

 /// Used fixed columns in each cluster 

 used_fixed_columns: Vec<usize>, 

 /// Used instance columns in each cluster 

 used_instance_columns: Vec<usize>, 

 /// Used advice columns in each cluster 

 used_advice_columns: Vec<usize>, 

 /// Custom gates evalution 

 evaluator: GraphEvaluator<C>, 

 /// The first index of constraints are being evaluated at in each cluster 

 first_constraint_idx: usize, 

 /// The last index of constraints are being evaluated at in each cluster 

 last_constraint_idx: usize, 

 /// The last value source 

 last_value_source: Option<ValueSource>, 

 }

Explain where the 5 comes from, potentially replacing it by its source instead of a hard coded constant that may rot after refactoring

halo2/halo2_proofs/src/plonk/evaluation.rs

Lines 313 to 315 in 9eaccbb

// Lookups

for lookup in cs.lookups.iter() {

constraint_idx += 5;

halo2/halo2_proofs/src/plonk/evaluation.rs

Lines 767 to 777 in 9eaccbb

 for (n, lookup) in lookups.iter().enumerate() { 

 let (lookup_evaluator, max_degree, used_columns) = &self.lookups[n]; 

 let running_prod_cluster = 

 Self::compute_cluster_idx(max_degree + 2, num_clusters - 1); 

 if !need_to_compute(part_idx, 1) 

 && !need_to_compute(part_idx, 2) 

 && !need_to_compute(part_idx, running_prod_cluster) 

 { 

 constraint_idx += 5; 

 continue; 

 }

I suspect it's linked to the number of lookup columns?

Break down evaluate_h function

halo2/halo2_proofs/src/plonk/evaluation.rs

Lines 393 to 976 in 9eaccbb

 /// Evaluate h poly 

 pub(in crate::plonk) fn evaluate_h( 

 &self, 

 pk: &ProvingKey<C>, 

 advice_polys: &[&[Polynomial<C::ScalarExt, Coeff>]], 

 instance_polys: &[&[Polynomial<C::ScalarExt, Coeff>]], 

 challenges: &[C::ScalarExt], 

 y: C::ScalarExt, 

 beta: C::ScalarExt, 

 gamma: C::ScalarExt, 

 theta: C::ScalarExt, 

 lookups: &[Vec<lookup::prover::Committed<C>>], 

 permutations: &[permutation::prover::Committed<C>], 

 ) -> Polynomial<C::ScalarExt, ExtendedLagrangeCoeff> { 

 let domain = &pk.vk.domain; 

 let size = 1 << domain.k() as usize; 

 let rot_scale = 1; 

 let extended_omega = domain.get_extended_omega(); 

 let omega = domain.get_omega(); 

 let isize = size as i32; 

 let one = C::ScalarExt::ONE; 

 let p = &pk.vk.cs.permutation; 

 let num_parts = domain.extended_len() >> domain.k(); 

 let num_clusters = (domain.extended_k() - domain.k() + 1) as usize; 

 assert!(self.custom_gate_clusters.len() <= num_clusters); 

 // Initialize the the powers of y and constraint counter 

 let mut y_powers = vec![C::ScalarExt::ONE; self.num_y_powers * instance_polys.len()]; 

 for i in 1..self.num_y_powers { 

 y_powers[i] = y_powers[i - 1] * y; 

 } 

 let need_to_compute = |part_idx, cluster_idx| part_idx % (num_parts >> cluster_idx) == 0; 

 let compute_part_idx_in_cluster = 

 |part_idx, cluster_idx| part_idx >> (num_clusters - cluster_idx - 1); 

 let mut value_part_clusters = Vec::new(); 

 value_part_clusters.resize(num_clusters, Vec::new()); 

 for (cluster_idx, cluster) in value_part_clusters 

 .iter_mut() 

 .enumerate() 

 .take(num_clusters) 

 { 

 cluster.resize(1 << cluster_idx, domain.empty_lagrange()); 

 } 

 // Calculate the quotient polynomial for each part 

 let mut current_extended_omega = one; 

 for part_idx in 0..num_parts { 

 let mut fixed: Vec<Option<Polynomial<C::ScalarExt, LagrangeCoeff>>> = 

 vec![None; pk.fixed_polys.len()]; 

 let l0 = domain.coeff_to_extended_part(pk.l0.clone(), current_extended_omega); 

 let l_last = domain.coeff_to_extended_part(pk.l_last.clone(), current_extended_omega); 

 let l_active_row = 

 domain.coeff_to_extended_part(pk.l_active_row.clone(), current_extended_omega); 

 let mut constraint_idx = 0; 

 let mut cluster_last_constraint_idx = vec![0; num_clusters]; 

 // Core expression evaluations 

 let num_threads = multicore::current_num_threads(); 

 for (((advice_polys, instance_polys), lookups), permutation) in advice_polys 

 .iter() 

 .zip(instance_polys.iter()) 

 .zip(lookups.iter()) 

 .zip(permutations.iter()) 

 { 

 // Calculate the advice and instance cosets 

 let mut advice: Vec<Option<Polynomial<C::Scalar, LagrangeCoeff>>> = 

 vec![None; advice_polys.len()]; 

 let mut instance: Vec<Option<Polynomial<C::Scalar, LagrangeCoeff>>> = 

 vec![None; instance_polys.len()]; 

 // Custom gates 

 let start = start_measure("custom gates", false); 

 for (cluster_idx, custom_gates) in self.custom_gate_clusters.iter().enumerate() { 

 if !need_to_compute(part_idx, cluster_idx) 

 || custom_gates.last_value_source.is_none() 

 { 

 continue; 

 } 

 let values = &mut value_part_clusters[cluster_idx] 

 [compute_part_idx_in_cluster(part_idx, cluster_idx)]; 

 for fixed_idx in custom_gates.used_fixed_columns.iter() { 

 if fixed[*fixed_idx].is_none() { 

 fixed[*fixed_idx] = Some(domain.coeff_to_extended_part( 

 pk.fixed_polys[*fixed_idx].clone(), 

 current_extended_omega, 

 )); 

 } 

 } 

 for instance_idx in custom_gates.used_instance_columns.iter() { 

 if instance[*instance_idx].is_none() { 

 instance[*instance_idx] = Some(domain.coeff_to_extended_part( 

 instance_polys[*instance_idx].clone(), 

 current_extended_omega, 

 )); 

 } 

 } 

 for advice_idx in custom_gates.used_advice_columns.iter() { 

 if advice[*advice_idx].is_none() { 

 advice[*advice_idx] = Some(domain.coeff_to_extended_part( 

 advice_polys[*advice_idx].clone(), 

 current_extended_omega, 

 )); 

 } 

 } 

 let fixed_slice = &fixed[..]; 

 let advice_slice = &advice[..]; 

 let instance_slice = &instance[..]; 

 let y_power_slice = &y_powers[..]; 

 let y_power = y_powers[constraint_idx + custom_gates.first_constraint_idx 

 - cluster_last_constraint_idx[cluster_idx]]; 

 multicore::scope(|scope| { 

 let chunk_size = (size + num_threads - 1) / num_threads; 

 for (thread_idx, values) in values.chunks_mut(chunk_size).enumerate() { 

 let start = thread_idx * chunk_size; 

 scope.spawn(move |_| { 

 let mut eval_data = custom_gates.evaluator.instance(); 

 for (i, value) in values.iter_mut().enumerate() { 

 let idx = start + i; 

 *value = *value * y_power 

 + custom_gates.evaluator.evaluate( 

 &mut eval_data, 

 fixed_slice, 

 advice_slice, 

 instance_slice, 

 challenges, 

 y_power_slice, 

 &beta, 

 &gamma, 

 &theta, 

 idx, 

 rot_scale, 

 isize, 

 ); 

 } 

 }); 

 } 

 }); 

 // Update the constraint index 

 cluster_last_constraint_idx[cluster_idx] = 

 constraint_idx + custom_gates.last_constraint_idx; 

 } 

 constraint_idx += self.num_custom_gate_constraints; 

 stop_measure(start); 

 // Permutations 

 let start = start_measure("permutations", false); 

 let sets = &permutation.sets; 

 if !sets.is_empty() { 

 let blinding_factors = pk.vk.cs.blinding_factors(); 

 let last_rotation = Rotation(-((blinding_factors + 1) as i32)); 

 let chunk_len = pk.vk.cs.degree() - 2; 

 let delta_start = beta * &C::Scalar::ZETA; 

 let permutation_product_cosets: Vec<Polynomial<C::ScalarExt, LagrangeCoeff>> = 

 sets.iter() 

 .map(|set| { 

 domain.coeff_to_extended_part( 

 set.permutation_product_poly.clone(), 

 current_extended_omega, 

 ) 

 }) 

 .collect(); 

 let first_set_permutation_product_coset = 

 permutation_product_cosets.first().unwrap(); 

 let last_set_permutation_product_coset = 

 permutation_product_cosets.last().unwrap(); 

 // Permutation constraints 

 constraint_idx += 1; 

 if need_to_compute(part_idx, 1) { 

 let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[1]]; 

 parallelize( 

 &mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)], 

 |values, start| { 

 for (i, value) in values.iter_mut().enumerate() { 

 let idx = start + i; 

 // Enforce only for the first set. 

 // l_0(X) * (1 - z_0(X)) = 0, degree = 2 

 *value = *value * y_power 

 + ((one - first_set_permutation_product_coset[idx]) 

 * l0[idx]); 

 } 

 }, 

 ); 

 cluster_last_constraint_idx[1] = constraint_idx; 

 } 

 constraint_idx += 1; 

 if need_to_compute(part_idx, 2) { 

 let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[2]]; 

 parallelize( 

 &mut value_part_clusters[2][compute_part_idx_in_cluster(part_idx, 2)], 

 |values, start| { 

 for (i, value) in values.iter_mut().enumerate() { 

 let idx = start + i; 

 // Enforce only for the last set. 

 // l_last(X) * (z_l(X)^2 - z_l(X)) = 0, degree = 3 

 *value = *value * y_power 

 + ((last_set_permutation_product_coset[idx] 

 * last_set_permutation_product_coset[idx] 

 - last_set_permutation_product_coset[idx]) 

 * l_last[idx]); 

 } 

 }, 

 ); 

 cluster_last_constraint_idx[2] = constraint_idx; 

 } 

 constraint_idx += sets.len() - 1; 

 if need_to_compute(part_idx, 1) { 

 let y_skip = y_powers 

 [constraint_idx + 1 - sets.len() - cluster_last_constraint_idx[1]]; 

 parallelize( 

 &mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)], 

 |values, start| { 

 for (i, value) in values.iter_mut().enumerate() { 

 let idx = start + i; 

 // Except for the first set, enforce. 

 // l_0(X) * (z_i(X) - z_{i-1}(\omega^(last) X)) = 0, degree = 2 

 let r_last = 

 get_rotation_idx(idx, last_rotation.0, rot_scale, isize); 

 *value = *value * y_skip; 

 for (set_idx, permutation_product_coset) in 

 permutation_product_cosets.iter().enumerate() 

 { 

 if set_idx != 0 { 

 *value = *value * y 

 + ((permutation_product_coset[idx] 

 - permutation_product_cosets[set_idx - 1] 

 [r_last]) 

 * l0[idx]); 

 } 

 } 

 } 

 }, 

 ); 

 cluster_last_constraint_idx[1] = constraint_idx; 

 } 

 constraint_idx += sets.len(); 

 let running_prod_cluster = 

 Self::compute_cluster_idx(2 + chunk_len, num_clusters - 1); 

 if need_to_compute(part_idx, running_prod_cluster) { 

 for column in p.columns.iter() { 

 match column.column_type() { 

 Any::Advice(_) => { 

 let advice = &mut advice[column.index()]; 

 if (*advice).is_none() { 

 *advice = Some(domain.coeff_to_extended_part( 

 advice_polys[column.index()].clone(), 

 current_extended_omega, 

 )); 

 } 

 } 

 Any::Instance => { 

 let instance = &mut instance[column.index()]; 

 if instance.is_none() { 

 *instance = Some(domain.coeff_to_extended_part( 

 instance_polys[column.index()].clone(), 

 current_extended_omega, 

 )); 

 } 

 } 

 Any::Fixed => { 

 let fixed = &mut fixed[column.index()]; 

 if fixed.is_none() { 

 *fixed = Some(domain.coeff_to_extended_part( 

 pk.fixed_polys[column.index()].clone(), 

 current_extended_omega, 

 )); 

 } 

 } 

 } 

 } 

 let permutation_cosets: Vec<Polynomial<C::ScalarExt, LagrangeCoeff>> = pk 

 .permutation 

 .polys 

 .iter() 

 .map(|p| { 

 domain.coeff_to_extended_part(p.clone(), current_extended_omega) 

 }) 

 .collect(); 

 let y_skip = y_powers[constraint_idx 

 - sets.len() 

 - cluster_last_constraint_idx[running_prod_cluster]]; 

 parallelize( 

 &mut value_part_clusters[running_prod_cluster] 

 [compute_part_idx_in_cluster(part_idx, running_prod_cluster)], 

 |values, start| { 

 let mut beta_term = current_extended_omega 

 * omega.pow_vartime(&[start as u64, 0, 0, 0]); 

 for (i, value) in values.iter_mut().enumerate() { 

 let idx = start + i; 

 let r_next = get_rotation_idx(idx, 1, rot_scale, isize); 

 *value = *value * y_skip; 

 // And for all the sets we enforce: 

 // (1 - (l_last(X) + l_blind(X))) * ( 

 // z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma) 

 // - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma) 

 // ), degree = 2 + chunk_len 

 let mut current_delta = delta_start * beta_term; 

 for ( 

 (columns, permutation_product_coset), 

 permutation_coset_chunk, 

 ) in p 

 .columns 

 .chunks(chunk_len) 

 .zip(permutation_product_cosets.iter()) 

 .zip(permutation_cosets.chunks(chunk_len)) 

 { 

 let mut left = permutation_product_coset[r_next]; 

 for (values, permutation) in columns 

 .iter() 

 .map(|&column| match column.column_type() { 

 Any::Advice(_) => { 

 advice[column.index()].as_ref().unwrap() 

 } 

 Any::Fixed => { 

 fixed[column.index()].as_ref().unwrap() 

 } 

 Any::Instance => { 

 instance[column.index()].as_ref().unwrap() 

 } 

 }) 

 .zip(permutation_coset_chunk.iter()) 

 { 

 left *= values[idx] + beta * permutation[idx] + gamma; 

 } 

 let mut right = permutation_product_coset[idx]; 

 for values in columns.iter().map(|&column| { 

 match column.column_type() { 

 Any::Advice(_) => { 

 advice[column.index()].as_ref().unwrap() 

 } 

 Any::Fixed => { 

 fixed[column.index()].as_ref().unwrap() 

 } 

 Any::Instance => { 

 instance[column.index()].as_ref().unwrap() 

 } 

 } 

 }) { 

 right *= values[idx] + current_delta + gamma; 

 current_delta *= &C::Scalar::DELTA; 

 } 

 *value = *value * y + ((left - right) * l_active_row[idx]); 

 } 

 beta_term *= &omega; 

 } 

 }, 

 ); 

 cluster_last_constraint_idx[running_prod_cluster] = constraint_idx; 

 } 

 } 

 stop_measure(start); 

 // Lookups 

 let start = start_measure("lookups", false); 

 for (n, lookup) in lookups.iter().enumerate() { 

 let (lookup_evaluator, max_degree, used_columns) = &self.lookups[n]; 

 let running_prod_cluster = 

 Self::compute_cluster_idx(max_degree + 2, num_clusters - 1); 

 if !need_to_compute(part_idx, 1) 

 && !need_to_compute(part_idx, 2) 

 && !need_to_compute(part_idx, running_prod_cluster) 

 { 

 constraint_idx += 5; 

 continue; 

 } 

 // Polynomials required for this lookup. 

 // Calculated here so these only have to be kept in memory for the short time 

 // they are actually needed. 

 let product_coset = pk.vk.domain.coeff_to_extended_part( 

 lookup.product_poly.clone(), 

 current_extended_omega, 

 ); 

 let permuted_input_coset = pk.vk.domain.coeff_to_extended_part( 

 lookup.permuted_input_poly.clone(), 

 current_extended_omega, 

 ); 

 let permuted_table_coset = pk.vk.domain.coeff_to_extended_part( 

 lookup.permuted_table_poly.clone(), 

 current_extended_omega, 

 ); 

 // Lookup constraints 

 constraint_idx += 1; 

 if need_to_compute(part_idx, 1) { 

 let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[1]]; 

 parallelize( 

 &mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)], 

 |values, start| { 

 for (i, value) in values.iter_mut().enumerate() { 

 let idx = start + i; 

 // l_0(X) * (1 - z(X)) = 0, degree = 2 

 *value = 

 *value * y_power + ((one - product_coset[idx]) * l0[idx]); 

 } 

 }, 

 ); 

 cluster_last_constraint_idx[1] = constraint_idx; 

 } 

 constraint_idx += 1; 

 if need_to_compute(part_idx, 2) { 

 let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[2]]; 

 parallelize( 

 &mut value_part_clusters[2][compute_part_idx_in_cluster(part_idx, 2)], 

 |values, start| { 

 for (i, value) in values.iter_mut().enumerate() { 

 let idx = start + i; 

 // l_last(X) * (z(X)^2 - z(X)) = 0, degree = 3 

 *value = *value * y_power 

 + ((product_coset[idx] * product_coset[idx] 

 - product_coset[idx]) 

 * l_last[idx]); 

 } 

 }, 

 ); 

 cluster_last_constraint_idx[2] = constraint_idx; 

 } 

 constraint_idx += 1; 

 if need_to_compute(part_idx, running_prod_cluster) { 

 for fixed_column in used_columns.0.iter() { 

 let fixed = &mut fixed[*fixed_column]; 

 if fixed.is_none() { 

 *fixed = Some(domain.coeff_to_extended_part( 

 pk.fixed_polys[*fixed_column].clone(), 

 current_extended_omega, 

 )); 

 } 

 } 

 for instance_column in used_columns.1.iter() { 

 let instance = &mut instance[*instance_column]; 

 if instance.is_none() { 

 *instance = Some(domain.coeff_to_extended_part( 

 instance_polys[*instance_column].clone(), 

 current_extended_omega, 

 )); 

 } 

 } 

 for advice_column in used_columns.2.iter() { 

 let advice = &mut advice[*advice_column]; 

 if (*advice).is_none() { 

 *advice = Some(domain.coeff_to_extended_part( 

 advice_polys[*advice_column].clone(), 

 current_extended_omega, 

 )); 

 } 

 } 

 let y_power = y_powers 

 [constraint_idx - cluster_last_constraint_idx[running_prod_cluster]]; 

 let fixed_slice = &fixed[..]; 

 let advice_slice = &advice[..]; 

 let instance_slice = &instance[..]; 

 let y_power_slice = &y_powers[..]; 

 parallelize( 

 &mut value_part_clusters[running_prod_cluster] 

 [compute_part_idx_in_cluster(part_idx, running_prod_cluster)], 

 |values, start| { 

 let mut eval_data = lookup_evaluator.instance(); 

 for (i, value) in values.iter_mut().enumerate() { 

 let idx = start + i; 

 let table_value = lookup_evaluator.evaluate( 

 &mut eval_data, 

 fixed_slice, 

 advice_slice, 

 instance_slice, 

 challenges, 

 y_power_slice, 

 &beta, 

 &gamma, 

 &theta, 

 idx, 

 rot_scale, 

 isize, 

 ); 

 let r_next = get_rotation_idx(idx, 1, rot_scale, isize); 

 // (1 - (l_last(X) + l_blind(X))) * ( 

 // z(\omega X) (a'(X) + \beta) (s'(X) + \gamma) 

 // - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta) 

 // (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma) 

 // ) = 0, degree = 2 + max(deg(a)) + max(deg(s)) 

 *value = *value * y_power 

 + ((product_coset[r_next] 

 * (permuted_input_coset[idx] + beta) 

 * (permuted_table_coset[idx] + gamma) 

 - product_coset[idx] * table_value) 

 * l_active_row[idx]); 

 } 

 }, 

 ); 

 cluster_last_constraint_idx[running_prod_cluster] = constraint_idx; 

 } 

 constraint_idx += 1; 

 if need_to_compute(part_idx, 1) { 

 let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[1]]; 

 parallelize( 

 &mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)], 

 |values, start| { 

 for (i, value) in values.iter_mut().enumerate() { 

 let idx = start + i; 

 let a_minus_s = 

 permuted_input_coset[idx] - permuted_table_coset[idx]; 

 // Check that the first values in the permuted input expression and permuted 

 // fixed expression are the same. 

 // l_0(X) * (a'(X) - s'(X)) = 0, degree = 2 

 *value = *value * y_power + (a_minus_s * l0[idx]); 

 } 

 }, 

 ); 

 cluster_last_constraint_idx[1] = constraint_idx; 

 } 

 constraint_idx += 1; 

 if need_to_compute(part_idx, 2) { 

 let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[2]]; 

 parallelize( 

 &mut value_part_clusters[2][compute_part_idx_in_cluster(part_idx, 2)], 

 |values, start| { 

 for (i, value) in values.iter_mut().enumerate() { 

 let idx = start + i; 

 let r_prev = get_rotation_idx(idx, -1, rot_scale, isize); 

 // Check that each value in the permuted lookup input expression is either 

 // equal to the value above it, or the value at the same index in the 

 // permuted table expression. 

 // (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0, degree = 3 

 let a_minus_s = 

 permuted_input_coset[idx] - permuted_table_coset[idx]; 

 *value = *value * y_power 

 + (a_minus_s 

 * (permuted_input_coset[idx] 

 - permuted_input_coset[r_prev]) 

 * l_active_row[idx]); 

 } 

 }, 

 ); 

 cluster_last_constraint_idx[2] = constraint_idx; 

 } 

 } 

 stop_measure(start); 

 } 

 // Align the constraints by different powers of y. 

 for (i, cluster) in value_part_clusters.iter_mut().enumerate() { 

 if need_to_compute(part_idx, i) && cluster_last_constraint_idx[i] > 0 { 

 let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[i]]; 

 parallelize( 

 &mut cluster[compute_part_idx_in_cluster(part_idx, i)], 

 |values, _| { 

 for value in values.iter_mut() { 

 *value = *value * y_power; 

 } 

 }, 

 ); 

 } 

 } 

 current_extended_omega *= extended_omega; 

 } 

 domain.lagrange_vecs_to_extended(value_part_clusters) 

 }

so that it's easier to understand, test, audit and refactor its logic

Optimize transposition

halo2/halo2_proofs/src/poly/domain.rs

Lines 188 to 212 in 9eaccbb

 /// Obtains a polynomial in ExtendedLagrange form when given a vector of 

 /// Lagrange polynomials with total size `extended_n`; panics if the 

 /// provided vector is the wrong length. 

 pub fn lagrange_vec_to_extended( 

 &self, 

 values: Vec<Polynomial<F, LagrangeCoeff>>, 

 ) -> Polynomial<F, ExtendedLagrangeCoeff> { 

 assert_eq!(values.len(), (self.extended_len() >> self.k) as usize); 

 assert_eq!(values[0].len(), self.n as usize); 

 // transpose the values in parallel 

 let mut transposed = vec![vec![F::ZERO; values.len()]; self.n as usize]; 

 values.into_iter().enumerate().for_each(|(i, p)| { 

 parallelize(&mut transposed, |transposed, start| { 

 for (transposed, p) in transposed.iter_mut().zip(p.values[start..].iter()) { 

 transposed[i] = *p; 

 } 

 }); 

 }); 

 Polynomial { 

 values: transposed.into_iter().flatten().collect(), 

 _marker: PhantomData, 

 } 

 }

The transposition could be done without an intermediary step + flatten at the end.

Also if this is a bottleneck, transposition can be improved 4x even on serial, with cache blocking.

See my benchmarks of transposition algorithms at: https://github.com/mratsim/laser/blob/e23b5d63f58441968188fb95e16862d1498bb845/benchmarks/transpose/transpose_bench.nim#L558-L674

The change in algorithm is simple,

this is 3x slower

    for (int i = 0; i < M; i++)
      for (int j = 0; j < N; j++)
        po[i+j*M] = pa[j+i*N];

than this (1D-blocking)

    // No min function in C ...
    #define min(a,b) (((a)<(b))?(a):(b))
    const blck = 32;

    for (int i = 0; i < M; i+=blck)
      for (int j = 0; j < N; ++j)
        for (int ii = i; ii < min(i+blck,M); ++ii)
          po[ii+j*M] = pa[j+ii*N];

or 4x slower than this (2D blocking)

    #define min(a,b) (((a)<(b))?(a):(b))
    const blck = 32;

    for (int j = 0; j < N; j+=blck)
      for (int i = 0; i < M; i+=blck)
        for (int jj = j; jj<min(j+blck, N); ++jj)
          for (int ii = i; ii<min(i+blck,M); ++ii)
            po[ii+jj*M] = pa[jj+ii*N];

taikoxyz / halo2 Goto Github PK

halo2's People

Contributors

Stargazers

Watchers

Forkers

halo2's Issues

ELI15: docs: update link

`getFftData` only works for `2^k` or `2^extended_k` right now

[Refactor] Followup on prover memory optimization PR

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent

	#[derive(Clone, Default, Debug)]
	struct ConstraintCluster<C: CurveAffine> {
	/// Used fixed columns in each cluster
	used_fixed_columns: Vec<usize>,
	/// Used instance columns in each cluster
	used_instance_columns: Vec<usize>,
	/// Used advice columns in each cluster
	used_advice_columns: Vec<usize>,
	/// Custom gates evalution
	evaluator: GraphEvaluator<C>,
	/// The first index of constraints are being evaluated at in each cluster
	first_constraint_idx: usize,
	/// The last index of constraints are being evaluated at in each cluster
	last_constraint_idx: usize,
	/// The last value source
	last_value_source: Option<ValueSource>,
	}

	// Lookups
	for lookup in cs.lookups.iter() {
	constraint_idx += 5;

	for (n, lookup) in lookups.iter().enumerate() {
	let (lookup_evaluator, max_degree, used_columns) = &self.lookups[n];
	let running_prod_cluster =
	Self::compute_cluster_idx(max_degree + 2, num_clusters - 1);
	if !need_to_compute(part_idx, 1)
	&& !need_to_compute(part_idx, 2)
	&& !need_to_compute(part_idx, running_prod_cluster)
	{
	constraint_idx += 5;
	continue;
	}

	/// Evaluate h poly
	pub(in crate::plonk) fn evaluate_h(
	&self,
	pk: &ProvingKey<C>,
	advice_polys: &[&[Polynomial<C::ScalarExt, Coeff>]],
	instance_polys: &[&[Polynomial<C::ScalarExt, Coeff>]],
	challenges: &[C::ScalarExt],
	y: C::ScalarExt,
	beta: C::ScalarExt,
	gamma: C::ScalarExt,
	theta: C::ScalarExt,
	lookups: &[Vec<lookup::prover::Committed<C>>],
	permutations: &[permutation::prover::Committed<C>],
	) -> Polynomial<C::ScalarExt, ExtendedLagrangeCoeff> {
	let domain = &pk.vk.domain;
	let size = 1 << domain.k() as usize;
	let rot_scale = 1;
	let extended_omega = domain.get_extended_omega();
	let omega = domain.get_omega();
	let isize = size as i32;
	let one = C::ScalarExt::ONE;
	let p = &pk.vk.cs.permutation;
	let num_parts = domain.extended_len() >> domain.k();
	let num_clusters = (domain.extended_k() - domain.k() + 1) as usize;

	assert!(self.custom_gate_clusters.len() <= num_clusters);

	// Initialize the the powers of y and constraint counter
	let mut y_powers = vec![C::ScalarExt::ONE; self.num_y_powers * instance_polys.len()];
	for i in 1..self.num_y_powers {
	y_powers[i] = y_powers[i - 1] * y;
	}

	let need_to_compute = \|part_idx, cluster_idx\| part_idx % (num_parts >> cluster_idx) == 0;
	let compute_part_idx_in_cluster =
	\|part_idx, cluster_idx\| part_idx >> (num_clusters - cluster_idx - 1);

	let mut value_part_clusters = Vec::new();
	value_part_clusters.resize(num_clusters, Vec::new());
	for (cluster_idx, cluster) in value_part_clusters
	.iter_mut()
	.enumerate()
	.take(num_clusters)
	{
	cluster.resize(1 << cluster_idx, domain.empty_lagrange());
	}

	// Calculate the quotient polynomial for each part
	let mut current_extended_omega = one;
	for part_idx in 0..num_parts {
	let mut fixed: Vec<Option<Polynomial<C::ScalarExt, LagrangeCoeff>>> =
	vec![None; pk.fixed_polys.len()];
	let l0 = domain.coeff_to_extended_part(pk.l0.clone(), current_extended_omega);
	let l_last = domain.coeff_to_extended_part(pk.l_last.clone(), current_extended_omega);
	let l_active_row =
	domain.coeff_to_extended_part(pk.l_active_row.clone(), current_extended_omega);

	let mut constraint_idx = 0;
	let mut cluster_last_constraint_idx = vec![0; num_clusters];

	// Core expression evaluations
	let num_threads = multicore::current_num_threads();
	for (((advice_polys, instance_polys), lookups), permutation) in advice_polys
	.iter()
	.zip(instance_polys.iter())
	.zip(lookups.iter())
	.zip(permutations.iter())
	{
	// Calculate the advice and instance cosets
	let mut advice: Vec<Option<Polynomial<C::Scalar, LagrangeCoeff>>> =
	vec![None; advice_polys.len()];
	let mut instance: Vec<Option<Polynomial<C::Scalar, LagrangeCoeff>>> =
	vec![None; instance_polys.len()];

	// Custom gates
	let start = start_measure("custom gates", false);
	for (cluster_idx, custom_gates) in self.custom_gate_clusters.iter().enumerate() {
	if !need_to_compute(part_idx, cluster_idx)
	\|\| custom_gates.last_value_source.is_none()
	{
	continue;
	}
	let values = &mut value_part_clusters[cluster_idx]
	[compute_part_idx_in_cluster(part_idx, cluster_idx)];
	for fixed_idx in custom_gates.used_fixed_columns.iter() {
	if fixed[*fixed_idx].is_none() {
	fixed[*fixed_idx] = Some(domain.coeff_to_extended_part(
	pk.fixed_polys[*fixed_idx].clone(),
	current_extended_omega,
	));
	}
	}
	for instance_idx in custom_gates.used_instance_columns.iter() {
	if instance[*instance_idx].is_none() {
	instance[*instance_idx] = Some(domain.coeff_to_extended_part(
	instance_polys[*instance_idx].clone(),
	current_extended_omega,
	));
	}
	}
	for advice_idx in custom_gates.used_advice_columns.iter() {
	if advice[*advice_idx].is_none() {
	advice[*advice_idx] = Some(domain.coeff_to_extended_part(
	advice_polys[*advice_idx].clone(),
	current_extended_omega,
	));
	}
	}
	let fixed_slice = &fixed[..];
	let advice_slice = &advice[..];
	let instance_slice = &instance[..];
	let y_power_slice = &y_powers[..];
	let y_power = y_powers[constraint_idx + custom_gates.first_constraint_idx
	- cluster_last_constraint_idx[cluster_idx]];
	multicore::scope(\|scope\| {
	let chunk_size = (size + num_threads - 1) / num_threads;
	for (thread_idx, values) in values.chunks_mut(chunk_size).enumerate() {
	let start = thread_idx * chunk_size;
	scope.spawn(move \|_\| {
	let mut eval_data = custom_gates.evaluator.instance();
	for (i, value) in values.iter_mut().enumerate() {
	let idx = start + i;
	value = value * y_power
	+ custom_gates.evaluator.evaluate(
	&mut eval_data,
	fixed_slice,
	advice_slice,
	instance_slice,
	challenges,
	y_power_slice,
	&beta,
	&gamma,
	&theta,
	idx,
	rot_scale,
	isize,
	);
	}
	});
	}
	});

	// Update the constraint index
	cluster_last_constraint_idx[cluster_idx] =
	constraint_idx + custom_gates.last_constraint_idx;
	}
	constraint_idx += self.num_custom_gate_constraints;
	stop_measure(start);


	// Permutations
	let start = start_measure("permutations", false);
	let sets = &permutation.sets;
	if !sets.is_empty() {
	let blinding_factors = pk.vk.cs.blinding_factors();
	let last_rotation = Rotation(-((blinding_factors + 1) as i32));
	let chunk_len = pk.vk.cs.degree() - 2;
	let delta_start = beta * &C::Scalar::ZETA;

	let permutation_product_cosets: Vec<Polynomial<C::ScalarExt, LagrangeCoeff>> =
	sets.iter()
	.map(\|set\| {
	domain.coeff_to_extended_part(
	set.permutation_product_poly.clone(),
	current_extended_omega,
	)
	})
	.collect();

	let first_set_permutation_product_coset =
	permutation_product_cosets.first().unwrap();
	let last_set_permutation_product_coset =
	permutation_product_cosets.last().unwrap();

	// Permutation constraints
	constraint_idx += 1;
	if need_to_compute(part_idx, 1) {
	let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[1]];
	parallelize(
	&mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)],
	\|values, start\| {
	for (i, value) in values.iter_mut().enumerate() {
	let idx = start + i;
	// Enforce only for the first set.
	// l_0(X) * (1 - z_0(X)) = 0, degree = 2
	value = value * y_power
	+ ((one - first_set_permutation_product_coset[idx])
	* l0[idx]);
	}
	},
	);
	cluster_last_constraint_idx[1] = constraint_idx;
	}

	constraint_idx += 1;
	if need_to_compute(part_idx, 2) {
	let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[2]];
	parallelize(
	&mut value_part_clusters[2][compute_part_idx_in_cluster(part_idx, 2)],
	\|values, start\| {
	for (i, value) in values.iter_mut().enumerate() {
	let idx = start + i;
	// Enforce only for the last set.
	// l_last(X) * (z_l(X)^2 - z_l(X)) = 0, degree = 3
	value = value * y_power
	+ ((last_set_permutation_product_coset[idx]
	* last_set_permutation_product_coset[idx]
	- last_set_permutation_product_coset[idx])
	* l_last[idx]);
	}
	},
	);
	cluster_last_constraint_idx[2] = constraint_idx;
	}

	constraint_idx += sets.len() - 1;
	if need_to_compute(part_idx, 1) {
	let y_skip = y_powers
	[constraint_idx + 1 - sets.len() - cluster_last_constraint_idx[1]];
	parallelize(
	&mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)],
	\|values, start\| {
	for (i, value) in values.iter_mut().enumerate() {
	let idx = start + i;
	// Except for the first set, enforce.
	// l_0(X) * (z_i(X) - z_{i-1}(\omega^(last) X)) = 0, degree = 2
	let r_last =
	get_rotation_idx(idx, last_rotation.0, rot_scale, isize);

	value = value * y_skip;

	for (set_idx, permutation_product_coset) in
	permutation_product_cosets.iter().enumerate()
	{
	if set_idx != 0 {
	value = value * y
	+ ((permutation_product_coset[idx]
	- permutation_product_cosets[set_idx - 1]
	[r_last])
	* l0[idx]);
	}
	}
	}
	},
	);
	cluster_last_constraint_idx[1] = constraint_idx;
	}

	constraint_idx += sets.len();
	let running_prod_cluster =
	Self::compute_cluster_idx(2 + chunk_len, num_clusters - 1);
	if need_to_compute(part_idx, running_prod_cluster) {
	for column in p.columns.iter() {
	match column.column_type() {
	Any::Advice(_) => {
	let advice = &mut advice[column.index()];
	if (*advice).is_none() {
	*advice = Some(domain.coeff_to_extended_part(
	advice_polys[column.index()].clone(),
	current_extended_omega,
	));
	}
	}
	Any::Instance => {
	let instance = &mut instance[column.index()];
	if instance.is_none() {
	*instance = Some(domain.coeff_to_extended_part(
	instance_polys[column.index()].clone(),
	current_extended_omega,
	));
	}
	}
	Any::Fixed => {
	let fixed = &mut fixed[column.index()];
	if fixed.is_none() {
	*fixed = Some(domain.coeff_to_extended_part(
	pk.fixed_polys[column.index()].clone(),
	current_extended_omega,
	));
	}
	}
	}
	}

	let permutation_cosets: Vec<Polynomial<C::ScalarExt, LagrangeCoeff>> = pk
	.permutation
	.polys
	.iter()
	.map(\|p\| {
	domain.coeff_to_extended_part(p.clone(), current_extended_omega)
	})
	.collect();

	let y_skip = y_powers[constraint_idx
	- sets.len()
	- cluster_last_constraint_idx[running_prod_cluster]];

	parallelize(
	&mut value_part_clusters[running_prod_cluster]
	[compute_part_idx_in_cluster(part_idx, running_prod_cluster)],
	\|values, start\| {
	let mut beta_term = current_extended_omega
	* omega.pow_vartime(&[start as u64, 0, 0, 0]);
	for (i, value) in values.iter_mut().enumerate() {
	let idx = start + i;
	let r_next = get_rotation_idx(idx, 1, rot_scale, isize);

	value = value * y_skip;

	// And for all the sets we enforce:
	// (1 - (l_last(X) + l_blind(X))) * (
	// z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
	// - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma)
	// ), degree = 2 + chunk_len
	let mut current_delta = delta_start * beta_term;
	for (
	(columns, permutation_product_coset),
	permutation_coset_chunk,
	) in p
	.columns
	.chunks(chunk_len)
	.zip(permutation_product_cosets.iter())
	.zip(permutation_cosets.chunks(chunk_len))
	{
	let mut left = permutation_product_coset[r_next];
	for (values, permutation) in columns
	.iter()
	.map(\|&column\| match column.column_type() {
	Any::Advice(_) => {
	advice[column.index()].as_ref().unwrap()
	}
	Any::Fixed => {
	fixed[column.index()].as_ref().unwrap()
	}
	Any::Instance => {
	instance[column.index()].as_ref().unwrap()
	}
	})
	.zip(permutation_coset_chunk.iter())
	{
	left = values[idx] + beta permutation[idx] + gamma;
	}

	let mut right = permutation_product_coset[idx];
	for values in columns.iter().map(\|&column\| {
	match column.column_type() {
	Any::Advice(_) => {
	advice[column.index()].as_ref().unwrap()
	}
	Any::Fixed => {
	fixed[column.index()].as_ref().unwrap()
	}
	Any::Instance => {
	instance[column.index()].as_ref().unwrap()
	}
	}
	}) {
	right *= values[idx] + current_delta + gamma;
	current_delta *= &C::Scalar::DELTA;
	}

	value = value * y + ((left - right) * l_active_row[idx]);
	}
	beta_term *= ω
	}
	},
	);
	cluster_last_constraint_idx[running_prod_cluster] = constraint_idx;
	}
	}
	stop_measure(start);

	// Lookups
	let start = start_measure("lookups", false);
	for (n, lookup) in lookups.iter().enumerate() {
	let (lookup_evaluator, max_degree, used_columns) = &self.lookups[n];
	let running_prod_cluster =
	Self::compute_cluster_idx(max_degree + 2, num_clusters - 1);
	if !need_to_compute(part_idx, 1)
	&& !need_to_compute(part_idx, 2)
	&& !need_to_compute(part_idx, running_prod_cluster)
	{
	constraint_idx += 5;
	continue;
	}

	// Polynomials required for this lookup.
	// Calculated here so these only have to be kept in memory for the short time
	// they are actually needed.
	let product_coset = pk.vk.domain.coeff_to_extended_part(
	lookup.product_poly.clone(),
	current_extended_omega,
	);
	let permuted_input_coset = pk.vk.domain.coeff_to_extended_part(
	lookup.permuted_input_poly.clone(),
	current_extended_omega,
	);
	let permuted_table_coset = pk.vk.domain.coeff_to_extended_part(
	lookup.permuted_table_poly.clone(),
	current_extended_omega,
	);

	// Lookup constraints
	constraint_idx += 1;
	if need_to_compute(part_idx, 1) {
	let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[1]];

	parallelize(
	&mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)],
	\|values, start\| {
	for (i, value) in values.iter_mut().enumerate() {
	let idx = start + i;
	// l_0(X) * (1 - z(X)) = 0, degree = 2
	*value =
	value y_power + ((one - product_coset[idx]) * l0[idx]);
	}
	},
	);
	cluster_last_constraint_idx[1] = constraint_idx;
	}

	constraint_idx += 1;
	if need_to_compute(part_idx, 2) {
	let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[2]];
	parallelize(
	&mut value_part_clusters[2][compute_part_idx_in_cluster(part_idx, 2)],
	\|values, start\| {
	for (i, value) in values.iter_mut().enumerate() {
	let idx = start + i;
	// l_last(X) * (z(X)^2 - z(X)) = 0, degree = 3
	value = value * y_power
	+ ((product_coset[idx] * product_coset[idx]
	- product_coset[idx])
	* l_last[idx]);
	}
	},
	);
	cluster_last_constraint_idx[2] = constraint_idx;
	}
	constraint_idx += 1;
	if need_to_compute(part_idx, running_prod_cluster) {
	for fixed_column in used_columns.0.iter() {
	let fixed = &mut fixed[*fixed_column];
	if fixed.is_none() {
	*fixed = Some(domain.coeff_to_extended_part(
	pk.fixed_polys[*fixed_column].clone(),
	current_extended_omega,
	));
	}
	}
	for instance_column in used_columns.1.iter() {
	let instance = &mut instance[*instance_column];
	if instance.is_none() {
	*instance = Some(domain.coeff_to_extended_part(
	instance_polys[*instance_column].clone(),
	current_extended_omega,
	));
	}
	}

	for advice_column in used_columns.2.iter() {
	let advice = &mut advice[*advice_column];
	if (*advice).is_none() {
	*advice = Some(domain.coeff_to_extended_part(
	advice_polys[*advice_column].clone(),
	current_extended_omega,
	));
	}
	}

	let y_power = y_powers
	[constraint_idx - cluster_last_constraint_idx[running_prod_cluster]];
	let fixed_slice = &fixed[..];
	let advice_slice = &advice[..];
	let instance_slice = &instance[..];
	let y_power_slice = &y_powers[..];
	parallelize(
	&mut value_part_clusters[running_prod_cluster]
	[compute_part_idx_in_cluster(part_idx, running_prod_cluster)],
	\|values, start\| {
	let mut eval_data = lookup_evaluator.instance();
	for (i, value) in values.iter_mut().enumerate() {
	let idx = start + i;
	let table_value = lookup_evaluator.evaluate(
	&mut eval_data,
	fixed_slice,
	advice_slice,
	instance_slice,
	challenges,
	y_power_slice,
	&beta,
	&gamma,
	&theta,
	idx,
	rot_scale,
	isize,
	);

	let r_next = get_rotation_idx(idx, 1, rot_scale, isize);

	// (1 - (l_last(X) + l_blind(X))) * (
	// z(\omega X) (a'(X) + \beta) (s'(X) + \gamma)
	// - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta)
	// (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma)
	// ) = 0, degree = 2 + max(deg(a)) + max(deg(s))
	value = value * y_power
	+ ((product_coset[r_next]
	* (permuted_input_coset[idx] + beta)
	* (permuted_table_coset[idx] + gamma)
	- product_coset[idx] * table_value)
	* l_active_row[idx]);
	}
	},
	);
	cluster_last_constraint_idx[running_prod_cluster] = constraint_idx;
	}

	constraint_idx += 1;
	if need_to_compute(part_idx, 1) {
	let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[1]];
	parallelize(
	&mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)],
	\|values, start\| {
	for (i, value) in values.iter_mut().enumerate() {
	let idx = start + i;
	let a_minus_s =
	permuted_input_coset[idx] - permuted_table_coset[idx];
	// Check that the first values in the permuted input expression and permuted
	// fixed expression are the same.
	// l_0(X) * (a'(X) - s'(X)) = 0, degree = 2
	value = value * y_power + (a_minus_s * l0[idx]);
	}
	},
	);
	cluster_last_constraint_idx[1] = constraint_idx;
	}

	constraint_idx += 1;
	if need_to_compute(part_idx, 2) {
	let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[2]];
	parallelize(
	&mut value_part_clusters[2][compute_part_idx_in_cluster(part_idx, 2)],
	\|values, start\| {
	for (i, value) in values.iter_mut().enumerate() {
	let idx = start + i;
	let r_prev = get_rotation_idx(idx, -1, rot_scale, isize);

	// Check that each value in the permuted lookup input expression is either
	// equal to the value above it, or the value at the same index in the
	// permuted table expression.
	// (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0, degree = 3
	let a_minus_s =
	permuted_input_coset[idx] - permuted_table_coset[idx];
	value = value * y_power
	+ (a_minus_s
	* (permuted_input_coset[idx]
	- permuted_input_coset[r_prev])
	* l_active_row[idx]);
	}
	},
	);
	cluster_last_constraint_idx[2] = constraint_idx;
	}
	}
	stop_measure(start);
	}
	// Align the constraints by different powers of y.
	for (i, cluster) in value_part_clusters.iter_mut().enumerate() {
	if need_to_compute(part_idx, i) && cluster_last_constraint_idx[i] > 0 {
	let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[i]];
	parallelize(
	&mut cluster[compute_part_idx_in_cluster(part_idx, i)],
	\|values, _\| {
	for value in values.iter_mut() {
	value = value * y_power;
	}
	},
	);
	}
	}
	current_extended_omega *= extended_omega;
	}
	domain.lagrange_vecs_to_extended(value_part_clusters)
	}

	/// Obtains a polynomial in ExtendedLagrange form when given a vector of
	/// Lagrange polynomials with total size `extended_n`; panics if the
	/// provided vector is the wrong length.
	pub fn lagrange_vec_to_extended(
	&self,
	values: Vec<Polynomial<F, LagrangeCoeff>>,
	) -> Polynomial<F, ExtendedLagrangeCoeff> {
	assert_eq!(values.len(), (self.extended_len() >> self.k) as usize);
	assert_eq!(values[0].len(), self.n as usize);

	// transpose the values in parallel
	let mut transposed = vec![vec![F::ZERO; values.len()]; self.n as usize];
	values.into_iter().enumerate().for_each(\|(i, p)\| {
	parallelize(&mut transposed, \|transposed, start\| {
	for (transposed, p) in transposed.iter_mut().zip(p.values[start..].iter()) {
	transposed[i] = *p;
	}
	});
	});

	Polynomial {
	values: transposed.into_iter().flatten().collect(),
	_marker: PhantomData,
	}
	}