Skip to main content

ci_core/utils/
contingency_table.rs

1use ndarray::{Array, Array1, Array2};
2use ordered_float::OrderedFloat;
3use std::collections::HashMap;
4use std::hash::BuildHasher;
5
6/// Count how often each pair `(col1[i], col2[i])` occurs and return the
7/// counts as a 2D table. Rows and columns are ordered by value, so the
8/// result does not depend on the order of the inputs.
9#[must_use]
10pub fn contingency_table(col1: &Array1<f64>, col2: &Array1<f64>) -> Array2<f64> {
11    let col1_map = build_global_category_map(col1);
12    let col2_map = build_global_category_map(col2);
13    contingency_table_with_categories(col1, col2, &col1_map, &col2_map)
14}
15
16//Great replacement of the contingency_table_with_categories function in power divergence, but cannot
17//be used when contingency_table_with_categories is needed within another function
18#[must_use]
19pub fn contingency_table_from_indices<S1: BuildHasher, S2: BuildHasher>(
20    indices: &[usize],
21    x_values: &Array1<f64>,
22    y_values: &Array1<f64>,
23    x_categories: &HashMap<OrderedFloat<f64>, usize, S1>,
24    y_categories: &HashMap<OrderedFloat<f64>, usize, S2>,
25) -> Array2<f64> {
26    let mut table = Array2::<f64>::zeros((x_categories.len(), y_categories.len()));
27
28    for &i in indices {
29        let r = x_categories[&OrderedFloat(x_values[i])];
30        let c = y_categories[&OrderedFloat(y_values[i])];
31
32        table[[r, c]] += 1.0;
33    }
34
35    table
36}
37
38/// Number every distinct value in `arr` (smallest gets 0, next gets 1,
39/// and so on). Pass the same map to several calls of
40/// `contingency_table_with_categories` to get tables that share the
41/// exact same rows and columns.
42#[must_use]
43pub fn build_global_category_map(arr: &Array1<f64>) -> HashMap<OrderedFloat<f64>, usize> {
44    let mut map = HashMap::new();
45
46    for &v in arr {
47        let key = OrderedFloat(v);
48        let next_index = map.len();
49        map.entry(key).or_insert(next_index);
50    }
51
52    map
53}
54
55/// Like `contingency_table`, but the rows and columns are fixed by the
56/// maps you pass in. Use this when several tables need the same shape,
57/// even if some values are missing from a particular table.
58#[must_use]
59pub fn contingency_table_with_categories<S1: BuildHasher, S2: BuildHasher>(
60    col1: &Array1<f64>,
61    col2: &Array1<f64>,
62    col1_map: &HashMap<OrderedFloat<f64>, usize, S1>,
63    col2_map: &HashMap<OrderedFloat<f64>, usize, S2>,
64) -> Array2<f64> {
65    let mut result = Array::zeros((col1_map.len(), col2_map.len()));
66    for i in 0..col1.len() {
67        if let (Some(&r), Some(&c)) = (
68            col1_map.get(&OrderedFloat(col1[i])),
69            col2_map.get(&OrderedFloat(col2[i])),
70        ) {
71            result[[r, c]] += 1.;
72        }
73    }
74    result
75}
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80    use ndarray::array;
81
82    #[test]
83    fn basic_test() {
84        let test1_x: Array1<f64> = array![1.0, 2.0, 3.0, 1.0, 1.0];
85        let test1_y: Array1<f64> = array![1.0, 2.0, 3.0, 1.0, 2.0];
86        let test1_expected: Array2<f64> = array![[2.0, 1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]];
87        assert_eq!(test1_expected, contingency_table(&test1_x, &test1_y));
88    }
89
90    #[test]
91    fn single_value() {
92        let test3_x = array![1.0, 1.0, 1.0];
93        let test3_y = array![2.0, 2.0, 2.0];
94        let test3_expected = array![[3.0]];
95        assert_eq!(test3_expected, contingency_table(&test3_x, &test3_y));
96    }
97}