Loading
Feedback

ADDI Alzheimers Detection Challenge

ADDI Alzhemiers columns Reference

Feature Explorations

By  siddharth_singh8

Feature Explorations



ADDI Alzhemiers columns Reference

In [1]:
import pandas as pd
In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.display import HTML
import warnings
warnings.filterwarnings("ignore")

INPUT_DIR = '../data/'
COLS_TO_SHOW = 104
COLS_TO_SHOW_FROM_END = 100

def h(content):
    display(HTML(content))
    
def _desc(data, col, label):
    d0 = data.describe().reset_index()
    d0.columns = [col, label]
    return d0.append({col:'unique values', label:data.unique().shape[0]}, ignore_index=True) \
             .append({col:'unique values / count', label:np.round(data.unique().shape[0] / data.shape[0], 4)}, ignore_index=True) \
             .append({col:'NaNs', label:data.isnull().sum()}, ignore_index=True) \
    
def desc1(col):
    d0 = _desc(tr[col], col, 'Train')
    d3 = _desc(te[col], col, 'Validation')
    dd = d0.merge(d3)
    display(dd)
    
    if col not in ['row_id']:
        h('<b>Most popular values (NaN = -999):</b>')
        N = 10
        d0 = tr[['row_id',col]].fillna(-999).groupby(col)['row_id'].count().reset_index()
        d1 = te[['row_id',col]].fillna(-999).groupby(col)['row_id'].count().reset_index()
        dd = d0.merge(d1, how='left', on=col)
        dd['Share in train'] = np.round(dd['row_id_x'] / dd['row_id_x'].sum(), 5)
        dd['Share in validation'] = np.round(dd['row_id_y'] / dd['row_id_y'].sum(), 5)
        dd = dd.sort_values('row_id_x', ascending=False).head(N).fillna(0).reset_index(drop=True)
        dd = dd.rename({'row_id_x':'Count in train (desc)','row_id_y':'Count in validation'}, axis=1)
        display(dd)

def hist1(col):
    plt.figure(figsize=(15, 3))
    plt.subplot(121)
    plt.hist(tr[col], bins=70);
    plt.title('Train histogram: ' + col);
    plt.subplot(122)
    plt.hist(te[col], bins=70);
    plt.title('Validation histogram: ' + col);
    plt.show()

def barh1(col):
    if col not in ['row_id']:
        plt.figure(figsize=(15, 3))
        plt.subplot(121)
        tr[col].value_counts().sort_values().plot(kind = 'barh')
        plt.title('Train value counts: ' + col);
        plt.subplot(122)
        te[col].value_counts().sort_values().plot(kind = 'barh')
        plt.title('Validation value counts: ' + col);
        plt.show()
        
def corr1(col):
    N = None #10000
    num_vars = [f for f in tr.columns if tr[f].dtype != 'object']
    trx = tr.head(N) if N is not None else tr.copy()
    corrs = trx[num_vars].corrwith(trx[col]).reset_index().sort_values(0, ascending=False).reset_index(drop=True).rename({'index':'Column',0:'Correlation with ' + col}, axis=1)
    h('<b>Most correlated values with ' + col + ':</b>')
    trx = pd.concat([corrs.head(6), corrs.dropna().tail(5)])
    def linkx(val):
        return '<a href="#c_{}">{}</a>'.format(val, val) if val in included_cols else val
    trx['Column'] = trx['Column'].apply(linkx)
    h(trx.to_html(escape=False))
    
def numeric(col):
    hist1(col)
    desc1(col)
    corr1(col) 
    
def categorical(col):
    barh1(col)
    desc1(col)

def proc(col):
    h('<h3 id="c_' + col + '">' + col + '</h3>' + '<a style="font-size:11px" href="#home">(Jump to top)</a>')
    categorical(col) if tr[col].dtype == 'object' else numeric(col)
        
tr = pd.read_csv(INPUT_DIR + 'train.csv')
te = pd.read_csv(INPUT_DIR + 'validation.csv')

included_cols = list(tr.columns.values[:COLS_TO_SHOW]) + list(tr.columns.values[-COLS_TO_SHOW_FROM_END:])
split_on = ['row_id','missing_digit_1','1 dist from cen','diagnosis','euc_dist_digit_1','area_digit_1','height_digit_1','width_digit_1','variance_width']
h('<b>Links to column info:</b> ' + ', '.join([('<li>' if col in split_on else '') + '<a href="#c_' + col + '">' + col + '</a>' for col in included_cols]))

h('Train features shape: <b>' + str(tr.shape) + '</b>' + 
  '<br>Validation features shape: <b>' + str(te.shape) + '</b>')
h('Train features preview:')
display(tr.head(10))

for col in included_cols:
    if "diagnosis" not in col:
        proc(col)
Links to column info:
  • row_id, number_of_digits,
  • missing_digit_1, missing_digit_2, missing_digit_3, missing_digit_4, missing_digit_5, missing_digit_6, missing_digit_7, missing_digit_8, missing_digit_9, missing_digit_10, missing_digit_11, missing_digit_12,
  • 1 dist from cen, 10 dist from cen, 11 dist from cen, 12 dist from cen, 2 dist from cen, 3 dist from cen, 4 dist from cen, 5 dist from cen, 6 dist from cen, 7 dist from cen, 8 dist from cen, 9 dist from cen,
  • euc_dist_digit_1, euc_dist_digit_2, euc_dist_digit_3, euc_dist_digit_4, euc_dist_digit_5, euc_dist_digit_6, euc_dist_digit_7, euc_dist_digit_8, euc_dist_digit_9, euc_dist_digit_10, euc_dist_digit_11, euc_dist_digit_12,
  • area_digit_1, area_digit_2, area_digit_3, area_digit_4, area_digit_5, area_digit_6, area_digit_7, area_digit_8, area_digit_9, area_digit_10, area_digit_11, area_digit_12,
  • height_digit_1, height_digit_2, height_digit_3, height_digit_4, height_digit_5, height_digit_6, height_digit_7, height_digit_8, height_digit_9, height_digit_10, height_digit_11, height_digit_12,
  • width_digit_1, width_digit_2, width_digit_3, width_digit_4, width_digit_5, width_digit_6, width_digit_7, width_digit_8, width_digit_9, width_digit_10, width_digit_11, width_digit_12,
  • variance_width, variance_height, variance_area, deviation_dist_from_mid_axis, between_axis_digits_angle_sum, between_axis_digits_angle_var, between_digits_angle_cw_sum, between_digits_angle_cw_var, between_digits_angle_ccw_sum, between_digits_angle_ccw_var, sequence_flag_cw, sequence_flag_ccw, number_of_hands, hand_count_dummy, hour_hand_length, minute_hand_length, single_hand_length, clockhand_ratio, clockhand_diff, angle_between_hands, deviation_from_centre, intersection_pos_rel_centre, hour_proximity_from_11, minute_proximity_from_2, hour_pointing_digit, actual_hour_digit, minute_pointing_digit, actual_minute_digit, final_rotation_angle, ellipse_circle_ratio, 6 dist from cen, 7 dist from cen, 8 dist from cen, 9 dist from cen,
  • euc_dist_digit_1, euc_dist_digit_2, euc_dist_digit_3, euc_dist_digit_4, euc_dist_digit_5, euc_dist_digit_6, euc_dist_digit_7, euc_dist_digit_8, euc_dist_digit_9, euc_dist_digit_10, euc_dist_digit_11, euc_dist_digit_12,
  • area_digit_1, area_digit_2, area_digit_3, area_digit_4, area_digit_5, area_digit_6, area_digit_7, area_digit_8, area_digit_9, area_digit_10, area_digit_11, area_digit_12,
  • height_digit_1, height_digit_2, height_digit_3, height_digit_4, height_digit_5, height_digit_6, height_digit_7, height_digit_8, height_digit_9, height_digit_10, height_digit_11, height_digit_12,
  • width_digit_1, width_digit_2, width_digit_3, width_digit_4, width_digit_5, width_digit_6, width_digit_7, width_digit_8, width_digit_9, width_digit_10, width_digit_11, width_digit_12,
  • variance_width, variance_height, variance_area, deviation_dist_from_mid_axis, between_axis_digits_angle_sum, between_axis_digits_angle_var, between_digits_angle_cw_sum, between_digits_angle_cw_var, between_digits_angle_ccw_sum, between_digits_angle_ccw_var, sequence_flag_cw, sequence_flag_ccw, number_of_hands, hand_count_dummy, hour_hand_length, minute_hand_length, single_hand_length, clockhand_ratio, clockhand_diff, angle_between_hands, deviation_from_centre, intersection_pos_rel_centre, hour_proximity_from_11, minute_proximity_from_2, hour_pointing_digit, actual_hour_digit, minute_pointing_digit, actual_minute_digit, final_rotation_angle, ellipse_circle_ratio, count_defects, percentage_inside_ellipse, pred_tremor, double_major, double_minor, vertical_dist, horizontal_dist, top_area_perc, bottom_area_perc, left_area_perc, right_area_perc, hor_count, vert_count, eleven_ten_error, other_error, time_diff, centre_dot_detect,
  • diagnosis
  • Train features shape: (32777, 122)
    Validation features shape: (362, 121)
    Train features preview:
    row_id number_of_digits missing_digit_1 missing_digit_2 missing_digit_3 missing_digit_4 missing_digit_5 missing_digit_6 missing_digit_7 missing_digit_8 ... bottom_area_perc left_area_perc right_area_perc hor_count vert_count eleven_ten_error other_error time_diff centre_dot_detect diagnosis
    0 S0CIXBKIUEOUBNURP 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.526170 0.524975 0.474667 0 0 0 1 -105.0 0.0 normal
    1 IW1Z4Z3H720OPW8LL 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.000810 0.516212 0.483330 0 1 0 1 NaN NaN normal
    2 PVUGU14JRSU44ZADT 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.488109 0.550606 0.449042 0 0 0 0 0.0 0.0 normal
    3 RW5UTGMB9H67LWJHX 7.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 ... NaN NaN NaN 1 0 0 1 NaN NaN normal
    4 W0IM2V6F6UP5LYS3E 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.512818 0.511865 0.487791 0 1 0 0 0.0 1.0 normal
    5 IR9A4R5TTZJR78ZC8 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.513425 0.482235 0.517410 0 0 0 1 495.0 0.0 normal
    6 LS1R4PFJUOVEU0K0E 2.0 1.0 0.0 1.0 1.0 1.0 1.0 1.0 1.0 ... 0.510611 0.527788 0.471864 1 0 0 1 NaN NaN post_alzheimer
    7 OQLC2VXVZUNWI31P9 11.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 ... 0.501412 0.532871 0.466693 1 0 0 1 NaN NaN normal
    8 N0KKCFX9FJG0NSQ1E 4.0 1.0 0.0 0.0 1.0 1.0 0.0 1.0 1.0 ... NaN NaN NaN 1 0 0 1 540.0 0.0 normal
    9 3LDA1Z7RH2HXAKRR1 10.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 ... 0.491031 0.530640 0.468971 1 1 0 1 NaN NaN normal

    10 rows × 122 columns

    row_id Train Validation
    0 count 32777 362
    1 unique 32777 362
    2 top 5CFZ3FWWP9IV07XAX NZ9ZQQBDN6SS5MKZ1
    3 freq 1 1
    4 unique values 32777 362
    5 unique values / count 1.0 1.0
    6 NaNs 0 0

    number_of_digits

    (Jump to top)
    number_of_digits Train Validation
    0 count 32703.000000 359.000000
    1 mean 10.299422 9.520891
    2 std 2.345710 3.132811
    3 min 1.000000 1.000000
    4 25% 10.000000 8.500000
    5 50% 11.000000 11.000000
    6 75% 12.000000 12.000000
    7 max 17.000000 13.000000
    8 unique values 18.000000 14.000000
    9 unique values / count 0.000500 0.038700
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    number_of_digits Count in train (desc) Count in validation Share in train Share in validation
    0 12.0 12818 122.0 0.39107 0.33702
    1 11.0 7535 74.0 0.22989 0.20442
    2 10.0 4416 42.0 0.13473 0.11602
    3 9.0 2541 27.0 0.07752 0.07459
    4 8.0 1564 12.0 0.04772 0.03315
    5 7.0 932 12.0 0.02843 0.03315
    6 6.0 678 14.0 0.02069 0.03867
    7 5.0 507 11.0 0.01547 0.03039
    8 4.0 460 13.0 0.01403 0.03591
    9 1.0 390 13.0 0.01190 0.03591
    Most correlated values with number_of_digits:
    Column Correlation with number_of_digits
    0 number_of_digits 1.000000
    1 between_digits_angle_cw_sum 0.450802
    2 between_axis_digits_angle_sum 0.398671
    3 between_digits_angle_ccw_sum 0.384744
    4 count_defects 0.337459
    5 sequence_flag_cw 0.304220
    112 missing_digit_3 -0.564346
    113 missing_digit_6 -0.579419
    114 missing_digit_5 -0.597470
    115 missing_digit_4 -0.601174
    116 missing_digit_7 -0.617619

    missing_digit_1

    (Jump to top)
    missing_digit_1 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.221845 0.289694
    2 std 0.415494 0.454253
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 1.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_1 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 25448 255 0.77640 0.70442
    1 1.0 7255 104 0.22134 0.28729
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_1:
    Column Correlation with missing_digit_1
    0 missing_digit_1 1.000000
    1 missing_digit_7 0.283906
    2 between_digits_angle_cw_var 0.276948
    3 missing_digit_6 0.275377
    4 between_digits_angle_ccw_var 0.272619
    5 missing_digit_4 0.272392
    107 sequence_flag_cw -0.191492
    108 count_defects -0.214796
    109 between_digits_angle_cw_sum -0.234926
    110 between_digits_angle_ccw_sum -0.275472
    111 number_of_digits -0.558238

    missing_digit_2

    (Jump to top)
    missing_digit_2 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.148243 0.178273
    2 std 0.355346 0.383277
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_2 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 27855 295 0.84983 0.81492
    1 1.0 4848 64 0.14791 0.17680
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_2:
    Column Correlation with missing_digit_2
    0 missing_digit_2 1.000000
    1 other_error 0.303021
    2 missing_digit_7 0.187493
    3 missing_digit_1 0.183191
    4 sequence_flag_ccw 0.182169
    5 missing_digit_4 0.178188
    106 count_defects -0.111115
    107 between_axis_digits_angle_sum -0.115060
    108 between_digits_angle_cw_sum -0.259538
    109 between_digits_angle_ccw_sum -0.329012
    110 number_of_digits -0.406318

    missing_digit_3

    (Jump to top)
    missing_digit_3 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.125096 0.172702
    2 std 0.330832 0.378517
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_3 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 28612 297 0.87293 0.82044
    1 1.0 4091 62 0.12481 0.17127
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_3:
    Column Correlation with missing_digit_3
    0 missing_digit_3 1.000000
    1 between_digits_angle_cw_var 0.349647
    2 between_digits_angle_ccw_var 0.342616
    3 missing_digit_6 0.330152
    4 missing_digit_4 0.313203
    5 missing_digit_7 0.288323
    107 between_digits_angle_ccw_sum -0.210373
    108 count_defects -0.222940
    109 between_digits_angle_cw_sum -0.332417
    110 between_axis_digits_angle_sum -0.340519
    111 number_of_digits -0.564346

    missing_digit_4

    (Jump to top)
    missing_digit_4 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.166713 0.250696
    2 std 0.372725 0.434019
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.500000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_4 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 27251 269 0.83141 0.74309
    1 1.0 5452 90 0.16634 0.24862
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_4:
    Column Correlation with missing_digit_4
    0 missing_digit_4 1.000000
    1 missing_digit_5 0.350929
    2 missing_digit_7 0.349934
    3 between_digits_angle_cw_var 0.335683
    4 between_digits_angle_ccw_var 0.323871
    5 missing_digit_3 0.313203
    107 between_axis_digits_angle_sum -0.202060
    108 sequence_flag_cw -0.229831
    109 between_digits_angle_ccw_sum -0.239031
    110 between_digits_angle_cw_sum -0.292007
    111 number_of_digits -0.601174

    missing_digit_5

    (Jump to top)
    missing_digit_5 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.202153 0.261838
    2 std 0.401612 0.440249
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 1.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_5 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 26092 265 0.79605 0.73204
    1 1.0 6611 94 0.20170 0.25967
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_5:
    Column Correlation with missing_digit_5
    0 missing_digit_5 1.000000
    1 between_digits_angle_cw_var 0.376451
    2 between_digits_angle_ccw_var 0.362640
    3 missing_digit_7 0.351303
    4 missing_digit_4 0.350929
    5 missing_digit_6 0.308022
    107 between_axis_digits_angle_sum -0.198862
    108 between_digits_angle_ccw_sum -0.209133
    109 between_digits_angle_cw_sum -0.266865
    110 sequence_flag_cw -0.273350
    111 number_of_digits -0.597470

    missing_digit_6

    (Jump to top)
    missing_digit_6 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.131364 0.192201
    2 std 0.337803 0.394580
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_6 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 28407 290 0.86667 0.80110
    1 1.0 4296 69 0.13107 0.19061
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_6:
    Column Correlation with missing_digit_6
    0 missing_digit_6 1.000000
    1 between_axis_digits_angle_var 0.371018
    2 between_digits_angle_ccw_var 0.366339
    3 between_digits_angle_cw_var 0.365358
    4 missing_digit_7 0.341398
    5 missing_digit_3 0.330152
    107 vert_count -0.236259
    108 count_defects -0.249927
    109 between_digits_angle_cw_sum -0.327822
    110 between_axis_digits_angle_sum -0.342443
    111 number_of_digits -0.579419

    missing_digit_7

    (Jump to top)
    missing_digit_7 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.126839 0.197772
    2 std 0.332797 0.398875
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_7 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 28555 288 0.87119 0.79558
    1 1.0 4148 71 0.12655 0.19613
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_7:
    Column Correlation with missing_digit_7
    0 missing_digit_7 1.000000
    1 missing_digit_8 0.361055
    2 between_digits_angle_cw_var 0.360861
    3 between_digits_angle_ccw_var 0.352368
    4 missing_digit_5 0.351303
    5 missing_digit_4 0.349934
    107 sequence_flag_cw -0.218304
    108 between_axis_digits_angle_sum -0.231261
    109 between_digits_angle_ccw_sum -0.251166
    110 between_digits_angle_cw_sum -0.332969
    111 number_of_digits -0.617619

    missing_digit_8

    (Jump to top)
    missing_digit_8 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.120723 0.186630
    2 std 0.325810 0.390158
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_8 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 28755 292 0.87729 0.80663
    1 1.0 3948 67 0.12045 0.18508
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_8:
    Column Correlation with missing_digit_8
    0 missing_digit_8 1.000000
    1 missing_digit_7 0.361055
    2 between_digits_angle_cw_var 0.306664
    3 missing_digit_4 0.306659
    4 missing_digit_5 0.301447
    5 between_digits_angle_ccw_var 0.297185
    107 count_defects -0.178400
    108 sequence_flag_cw -0.183346
    109 between_axis_digits_angle_sum -0.202892
    110 between_digits_angle_cw_sum -0.235842
    111 number_of_digits -0.551926

    missing_digit_9

    (Jump to top)
    missing_digit_9 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.175183 0.289694
    2 std 0.380129 0.454253
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 1.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_9 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 26974 255 0.82296 0.70442
    1 1.0 5729 104 0.17479 0.28729
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_9:
    Column Correlation with missing_digit_9
    0 missing_digit_9 1.000000
    1 between_digits_angle_cw_var 0.381017
    2 between_axis_digits_angle_var 0.366502
    3 between_digits_angle_ccw_var 0.366297
    4 missing_digit_7 0.280479
    5 missing_digit_3 0.278251
    107 hor_count -0.233388
    108 between_axis_digits_angle_sum -0.261707
    109 sequence_flag_cw -0.283803
    110 between_digits_angle_cw_sum -0.289947
    111 number_of_digits -0.530098

    missing_digit_10

    (Jump to top)
    missing_digit_10 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.147418 0.236769
    2 std 0.354527 0.425693
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_10 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 27882 274 0.85066 0.75691
    1 1.0 4821 85 0.14708 0.23481
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_10:
    Column Correlation with missing_digit_10
    0 missing_digit_10 1.000000
    1 between_digits_angle_cw_var 0.312914
    2 between_digits_angle_ccw_var 0.296224
    3 missing_digit_7 0.289633
    4 missing_digit_11 0.287270
    5 missing_digit_8 0.286972
    107 count_defects -0.187419
    108 between_axis_digits_angle_sum -0.191416
    109 sequence_flag_cw -0.217173
    110 between_digits_angle_cw_sum -0.238582
    111 number_of_digits -0.556065

    missing_digit_11

    (Jump to top)
    missing_digit_11 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.168241 0.222841
    2 std 0.374086 0.416733
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_11 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 27201 279 0.82988 0.77072
    1 1.0 5502 80 0.16786 0.22099
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_11:
    Column Correlation with missing_digit_11
    0 missing_digit_11 1.000000
    1 other_error 0.348206
    2 missing_digit_10 0.287270
    3 missing_digit_7 0.268994
    4 between_digits_angle_cw_var 0.265545
    5 between_digits_angle_ccw_var 0.254850
    106 count_defects -0.194526
    107 sequence_flag_cw -0.196313
    108 between_digits_angle_ccw_sum -0.233344
    109 between_digits_angle_cw_sum -0.266711
    110 number_of_digits -0.532285

    missing_digit_12

    (Jump to top)
    missing_digit_12 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.115158 0.189415
    2 std 0.319217 0.392385
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_12 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 28937 291 0.88284 0.80387
    1 1.0 3766 68 0.11490 0.18785
    2 -999.0 74 3 0.00226 0.00829
    Most correlated values with missing_digit_12:
    Column Correlation with missing_digit_12
    0 missing_digit_12 1.000000
    1 between_axis_digits_angle_var 0.242774
    2 between_digits_angle_cw_var 0.223649
    3 missing_digit_6 0.211064
    4 between_digits_angle_ccw_var 0.210177
    5 missing_digit_3 0.198314
    107 sequence_flag_cw -0.171262
    108 between_digits_angle_ccw_sum -0.178806
    109 vert_count -0.215095
    110 between_digits_angle_cw_sum -0.247508
    111 number_of_digits -0.407222

    1 dist from cen

    (Jump to top)
    1 dist from cen Train Validation
    0 count 25448.000000 255.000000
    1 mean 361.869732 354.339930
    2 std 50.310698 57.701010
    3 min 3.354102 51.983170
    4 25% 336.580321 330.244742
    5 50% 367.434688 368.160970
    6 75% 393.898464 388.853105
    7 max 618.025889 492.941426
    8 unique values 21148.000000 255.000000
    9 unique values / count 0.645200 0.704400
    10 NaNs 7329.000000 107.000000
    Most popular values (NaN = -999):
    1 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 7329 107.0 0.22360 0.54872
    1 380.304155 6 0.0 0.00018 0.00000
    2 383.020887 6 0.0 0.00018 0.00000
    3 400.707187 6 0.0 0.00018 0.00000
    4 362.286144 6 0.0 0.00018 0.00000
    5 353.022662 6 0.0 0.00018 0.00000
    6 371.875315 5 0.0 0.00015 0.00000
    7 393.817851 5 0.0 0.00015 0.00000
    8 391.651950 5 0.0 0.00015 0.00000
    9 370.961588 5 0.0 0.00015 0.00000
    Most correlated values with 1 dist from cen:
    Column Correlation with 1 dist from cen
    0 1 dist from cen 1.000000
    1 2 dist from cen 0.709005
    2 12 dist from cen 0.658284
    3 3 dist from cen 0.542965
    4 11 dist from cen 0.435573
    5 4 dist from cen 0.358795
    111 area_digit_2 -0.269999
    112 height_digit_3 -0.275796
    113 width_digit_12 -0.292792
    114 height_digit_12 -0.301515
    115 area_digit_12 -0.346086

    10 dist from cen

    (Jump to top)
    10 dist from cen Train Validation
    0 count 27882.000000 274.000000
    1 mean 367.418424 362.651154
    2 std 48.060878 61.588089
    3 min 5.852350 37.643060
    4 25% 343.945581 338.489183
    5 50% 372.683512 373.309857
    6 75% 397.112940 401.833946
    7 max 628.776988 505.421853
    8 unique values 22765.000000 275.000000
    9 unique values / count 0.694500 0.759700
    10 NaNs 4895.000000 88.000000
    Most popular values (NaN = -999):
    10 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4895 88.0 0.14934 0.50575
    1 358.793116 6 0.0 0.00018 0.00000
    2 365.735560 6 0.0 0.00018 0.00000
    3 369.040987 6 1.0 0.00018 0.00575
    4 404.515142 5 0.0 0.00015 0.00000
    5 413.559246 5 0.0 0.00015 0.00000
    6 389.403711 5 0.0 0.00015 0.00000
    7 395.501264 5 0.0 0.00015 0.00000
    8 379.692310 5 0.0 0.00015 0.00000
    9 372.320091 5 0.0 0.00015 0.00000
    Most correlated values with 10 dist from cen:
    Column Correlation with 10 dist from cen
    0 10 dist from cen 1.000000
    1 11 dist from cen 0.792786
    2 9 dist from cen 0.700233
    3 12 dist from cen 0.493608
    4 8 dist from cen 0.488509
    5 6 dist from cen 0.424319
    111 between_digits_angle_ccw_sum -0.250501
    112 height_digit_12 -0.267602
    113 area_digit_10 -0.276785
    114 width_digit_10 -0.295011
    115 area_digit_12 -0.296367

    11 dist from cen

    (Jump to top)
    11 dist from cen Train Validation
    0 count 27201.000000 279.000000
    1 mean 368.235873 368.939471
    2 std 48.425983 48.268612
    3 min 11.335784 215.352037
    4 25% 342.212288 343.152948
    5 50% 372.667412 371.481157
    6 75% 399.011278 403.296699
    7 max 613.843832 495.745903
    8 unique values 22258.000000 279.000000
    9 unique values / count 0.679100 0.770700
    10 NaNs 5576.000000 83.000000
    Most popular values (NaN = -999):
    11 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 5576 83.0 0.17012 0.47159
    1 350.089274 8 0.0 0.00024 0.00000
    2 392.698994 7 0.0 0.00021 0.00000
    3 410.766661 6 1.0 0.00018 0.00568
    4 378.123326 6 0.0 0.00018 0.00000
    5 371.726042 6 0.0 0.00018 0.00000
    6 397.109871 5 0.0 0.00015 0.00000
    7 382.382335 5 0.0 0.00015 0.00000
    8 373.915097 5 0.0 0.00015 0.00000
    9 348.500359 5 0.0 0.00015 0.00000
    Most correlated values with 11 dist from cen:
    Column Correlation with 11 dist from cen
    0 11 dist from cen 1.000000
    1 10 dist from cen 0.792786
    2 12 dist from cen 0.649639
    3 9 dist from cen 0.534832
    4 1 dist from cen 0.435573
    5 2 dist from cen 0.416683
    111 area_digit_11 -0.281544
    112 area_digit_10 -0.282686
    113 width_digit_10 -0.297822
    114 height_digit_12 -0.302116
    115 area_digit_12 -0.329968

    12 dist from cen

    (Jump to top)
    12 dist from cen Train Validation
    0 count 28937.000000 291.000000
    1 mean 370.796838 370.891134
    2 std 48.005863 56.899557
    3 min 22.102036 94.366308
    4 25% 348.353987 342.058650
    5 50% 377.180328 381.878580
    6 75% 401.186366 406.138700
    7 max 659.571073 571.075520
    8 unique values 21357.000000 292.000000
    9 unique values / count 0.651600 0.806600
    10 NaNs 3840.000000 71.000000
    Most popular values (NaN = -999):
    12 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 3840 71.0 0.11716 0.355
    1 386.505175 9 0.0 0.00027 0.000
    2 380.573909 8 0.0 0.00024 0.000
    3 386.000324 7 0.0 0.00021 0.000
    4 370.005405 7 0.0 0.00021 0.000
    5 383.720276 7 0.0 0.00021 0.000
    6 398.130946 7 0.0 0.00021 0.000
    7 360.699667 7 0.0 0.00021 0.000
    8 381.878580 6 1.0 0.00018 0.005
    9 402.657423 6 0.0 0.00018 0.000
    Most correlated values with 12 dist from cen:
    Column Correlation with 12 dist from cen
    0 12 dist from cen 1.000000
    1 1 dist from cen 0.658284
    2 11 dist from cen 0.649639
    3 2 dist from cen 0.561495
    4 10 dist from cen 0.493608
    5 3 dist from cen 0.491578
    111 height_digit_3 -0.319467
    112 area_digit_3 -0.322698
    113 width_digit_12 -0.351274
    114 height_digit_12 -0.412872
    115 area_digit_12 -0.440424

    2 dist from cen

    (Jump to top)
    2 dist from cen Train Validation
    0 count 27855.000000 295.000000
    1 mean 349.116177 340.248534
    2 std 53.313076 58.972478
    3 min 7.905694 95.630800
    4 25% 320.153479 307.196385
    5 50% 353.802911 348.680728
    6 75% 383.428285 379.130664
    7 max 568.624876 486.444498
    8 unique values 22905.000000 295.000000
    9 unique values / count 0.698800 0.814900
    10 NaNs 4922.000000 67.000000
    Most popular values (NaN = -999):
    2 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4922 67.0 0.15017 0.42405
    1 317.539368 7 0.0 0.00021 0.00000
    2 389.168986 6 0.0 0.00018 0.00000
    3 350.708212 6 0.0 0.00018 0.00000
    4 347.940368 6 0.0 0.00018 0.00000
    5 357.220170 6 0.0 0.00018 0.00000
    6 356.863069 6 0.0 0.00018 0.00000
    7 340.009191 5 0.0 0.00015 0.00000
    8 364.900671 5 0.0 0.00015 0.00000
    9 350.058924 5 0.0 0.00015 0.00000
    Most correlated values with 2 dist from cen:
    Column Correlation with 2 dist from cen
    0 2 dist from cen 1.000000
    1 3 dist from cen 0.725259
    2 1 dist from cen 0.709005
    3 12 dist from cen 0.561495
    4 4 dist from cen 0.509722
    5 minute_hand_length 0.441396
    111 height_digit_2 -0.307129
    112 width_digit_12 -0.308594
    113 area_digit_2 -0.309652
    114 height_digit_3 -0.312967
    115 area_digit_12 -0.342764

    3 dist from cen

    (Jump to top)
    3 dist from cen Train Validation
    0 count 28612.000000 297.000000
    1 mean 337.542587 328.683359
    2 std 51.175381 57.556793
    3 min 15.206906 109.592427
    4 25% 308.950947 297.228027
    5 50% 343.432854 338.913337
    6 75% 371.767737 363.792867
    7 max 611.333379 456.444137
    8 unique values 23065.000000 298.000000
    9 unique values / count 0.703700 0.823200
    10 NaNs 4165.000000 65.000000
    Most popular values (NaN = -999):
    3 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4165 65.0 0.12707 0.3869
    1 380.644848 7 0.0 0.00021 0.0000
    2 350.160677 7 0.0 0.00021 0.0000
    3 296.459525 6 0.0 0.00018 0.0000
    4 349.017550 6 0.0 0.00018 0.0000
    5 342.806797 6 0.0 0.00018 0.0000
    6 339.214165 6 0.0 0.00018 0.0000
    7 388.538930 6 0.0 0.00018 0.0000
    8 296.095002 5 0.0 0.00015 0.0000
    9 376.158544 5 0.0 0.00015 0.0000
    Most correlated values with 3 dist from cen:
    Column Correlation with 3 dist from cen
    0 3 dist from cen 1.000000
    1 4 dist from cen 0.761953
    2 2 dist from cen 0.725259
    3 5 dist from cen 0.547068
    4 1 dist from cen 0.542965
    5 12 dist from cen 0.491578
    111 area_digit_3 -0.355528
    112 height_digit_6 -0.373873
    113 area_digit_12 -0.381927
    114 width_digit_12 -0.389002
    115 height_digit_3 -0.416407

    4 dist from cen

    (Jump to top)
    4 dist from cen Train Validation
    0 count 27251.000000 269.000000
    1 mean 336.085919 327.209271
    2 std 47.456872 55.793071
    3 min 6.519202 111.803399
    4 25% 309.714788 298.724371
    5 50% 340.306480 336.473625
    6 75% 367.008855 364.111247
    7 max 580.975473 437.433424
    8 unique values 22437.000000 269.000000
    9 unique values / count 0.684500 0.743100
    10 NaNs 5526.000000 93.000000
    Most popular values (NaN = -999):
    4 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 5526 93.0 0.16859 0.5407
    1 319.330550 8 0.0 0.00024 0.0000
    2 362.913557 6 0.0 0.00018 0.0000
    3 337.662924 6 0.0 0.00018 0.0000
    4 352.709867 6 0.0 0.00018 0.0000
    5 359.336194 6 0.0 0.00018 0.0000
    6 367.355754 5 0.0 0.00015 0.0000
    7 318.814209 5 0.0 0.00015 0.0000
    8 340.648279 5 0.0 0.00015 0.0000
    9 323.235595 5 0.0 0.00015 0.0000
    Most correlated values with 4 dist from cen:
    Column Correlation with 4 dist from cen
    0 4 dist from cen 1.000000
    1 5 dist from cen 0.776762
    2 3 dist from cen 0.761953
    3 2 dist from cen 0.509722
    4 6 dist from cen 0.471918
    5 9 dist from cen 0.408276
    111 height_digit_4 -0.258472
    112 area_digit_12 -0.272054
    113 width_digit_12 -0.273842
    114 height_digit_6 -0.282919
    115 height_digit_3 -0.307230

    5 dist from cen

    (Jump to top)
    5 dist from cen Train Validation
    0 count 26092.000000 265.000000
    1 mean 335.550313 329.190015
    2 std 46.910977 53.567646
    3 min 7.826238 129.468143
    4 25% 309.358914 302.278431
    5 50% 339.694716 334.228963
    6 75% 366.316888 366.500341
    7 max 520.454849 478.221967
    8 unique values 21245.000000 266.000000
    9 unique values / count 0.648200 0.734800
    10 NaNs 6685.000000 97.000000
    Most popular values (NaN = -999):
    5 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 6685 97.0 0.20395 0.53005
    1 350.615530 8 0.0 0.00024 0.00000
    2 361.749222 7 0.0 0.00021 0.00000
    3 344.372618 6 0.0 0.00018 0.00000
    4 328.506088 6 0.0 0.00018 0.00000
    5 359.584830 6 0.0 0.00018 0.00000
    6 340.424735 6 0.0 0.00018 0.00000
    7 329.639955 6 0.0 0.00018 0.00000
    8 346.375880 6 0.0 0.00018 0.00000
    9 346.301891 6 0.0 0.00018 0.00000
    Most correlated values with 5 dist from cen:
    Column Correlation with 5 dist from cen
    0 5 dist from cen 1.000000
    1 4 dist from cen 0.776762
    2 6 dist from cen 0.635754
    3 3 dist from cen 0.547068
    4 9 dist from cen 0.415726
    5 10 dist from cen 0.412986
    111 height_digit_12 -0.243856
    112 area_digit_4 -0.252993
    113 height_digit_3 -0.258554
    114 height_digit_5 -0.270411
    115 area_digit_12 -0.278165

    6 dist from cen

    (Jump to top)
    6 dist from cen Train Validation
    0 count 28407.000000 290.000000
    1 mean 353.017822 346.825546
    2 std 47.096105 55.305610
    3 min 3.535534 61.241326
    4 25% 328.613374 321.629391
    5 50% 358.558224 355.854575
    6 75% 384.157520 382.021114
    7 max 586.950168 535.724276
    8 unique values 21664.000000 290.000000
    9 unique values / count 0.661000 0.801100
    10 NaNs 4370.000000 72.000000
    Most popular values (NaN = -999):
    6 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4370 72.0 0.13333 0.4186
    1 336.544945 11 0.0 0.00034 0.0000
    2 355.431076 8 0.0 0.00024 0.0000
    3 373.505355 8 0.0 0.00024 0.0000
    4 357.022408 8 0.0 0.00024 0.0000
    5 362.500000 8 0.0 0.00024 0.0000
    6 351.569694 7 0.0 0.00021 0.0000
    7 386.369318 7 0.0 0.00021 0.0000
    8 365.041436 6 0.0 0.00018 0.0000
    9 370.178673 6 0.0 0.00018 0.0000
    Most correlated values with 6 dist from cen:
    Column Correlation with 6 dist from cen
    0 6 dist from cen 1.000000
    1 7 dist from cen 0.683981
    2 5 dist from cen 0.635754
    3 8 dist from cen 0.550448
    4 9 dist from cen 0.507077
    5 4 dist from cen 0.471918
    111 width_digit_10 -0.274481
    112 area_digit_7 -0.280505
    113 area_digit_6 -0.304178
    114 area_digit_12 -0.320258
    115 height_digit_12 -0.326151

    7 dist from cen

    (Jump to top)
    7 dist from cen Train Validation
    0 count 28555.000000 288.000000
    1 mean 368.547709 364.170914
    2 std 50.956366 61.554328
    3 min 14.422205 36.674242
    4 25% 342.429190 337.497126
    5 50% 372.874309 372.334472
    6 75% 400.218222 399.518538
    7 max 666.132119 658.555427
    8 unique values 23415.000000 289.000000
    9 unique values / count 0.714400 0.798300
    10 NaNs 4222.000000 74.000000
    Most popular values (NaN = -999):
    7 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4222 74.0 0.12881 0.45399
    1 388.417623 7 0.0 0.00021 0.00000
    2 373.283404 7 0.0 0.00021 0.00000
    3 357.424817 6 0.0 0.00018 0.00000
    4 375.016333 6 0.0 0.00018 0.00000
    5 360.695509 6 0.0 0.00018 0.00000
    6 389.546210 6 0.0 0.00018 0.00000
    7 396.011364 5 0.0 0.00015 0.00000
    8 386.589252 5 0.0 0.00015 0.00000
    9 366.034493 5 0.0 0.00015 0.00000
    Most correlated values with 7 dist from cen:
    Column Correlation with 7 dist from cen
    0 7 dist from cen 1.000000
    1 8 dist from cen 0.753724
    2 6 dist from cen 0.683981
    3 9 dist from cen 0.573894
    4 10 dist from cen 0.398767
    5 5 dist from cen 0.396155
    111 final_rotation_angle -0.209483
    112 width_digit_9 -0.213543
    113 width_digit_10 -0.214479
    114 width_digit_8 -0.229184
    115 height_digit_12 -0.236198

    8 dist from cen

    (Jump to top)
    8 dist from cen Train Validation
    0 count 28755.000000 292.000000
    1 mean 370.329200 362.324910
    2 std 51.562665 64.947323
    3 min 8.139410 72.090221
    4 25% 345.253711 336.208249
    5 50% 375.153635 371.319385
    6 75% 401.983364 399.285841
    7 max 608.481717 529.761503
    8 unique values 23604.000000 292.000000
    9 unique values / count 0.720100 0.806600
    10 NaNs 4022.000000 70.000000
    Most popular values (NaN = -999):
    8 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4022 70.0 0.12271 0.39773
    1 403.133043 7 0.0 0.00021 0.00000
    2 334.680818 6 0.0 0.00018 0.00000
    3 368.730932 6 0.0 0.00018 0.00000
    4 395.823509 6 0.0 0.00018 0.00000
    5 402.852951 6 0.0 0.00018 0.00000
    6 395.379565 6 0.0 0.00018 0.00000
    7 358.650944 6 0.0 0.00018 0.00000
    8 383.038510 6 0.0 0.00018 0.00000
    9 382.638864 5 0.0 0.00015 0.00000
    Most correlated values with 8 dist from cen:
    Column Correlation with 8 dist from cen
    0 8 dist from cen 1.000000
    1 7 dist from cen 0.753724
    2 9 dist from cen 0.718492
    3 6 dist from cen 0.550448
    4 10 dist from cen 0.488509
    5 5 dist from cen 0.368272
    111 width_digit_8 -0.218300
    112 height_digit_12 -0.221123
    113 width_digit_10 -0.228457
    114 area_digit_8 -0.229836
    115 between_digits_angle_ccw_sum -0.358946

    9 dist from cen

    (Jump to top)
    9 dist from cen Train Validation
    0 count 26974.000000 255.000000
    1 mean 375.631690 376.453994
    2 std 45.795291 45.449431
    3 min 14.115594 146.069333
    4 25% 353.542784 351.260403
    5 50% 381.106284 382.400641
    6 75% 404.633476 403.347755
    7 max 620.016935 486.066868
    8 unique values 20996.000000 256.000000
    9 unique values / count 0.640600 0.707200
    10 NaNs 5803.000000 107.000000
    Most popular values (NaN = -999):
    9 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 5803 107.0 0.17704 0.50711
    1 367.268635 10 0.0 0.00031 0.00000
    2 392.436301 8 0.0 0.00024 0.00000
    3 397.724779 7 0.0 0.00021 0.00000
    4 400.078117 6 0.0 0.00018 0.00000
    5 411.024634 6 0.0 0.00018 0.00000
    6 394.325373 6 0.0 0.00018 0.00000
    7 392.183630 6 0.0 0.00018 0.00000
    8 405.007716 6 0.0 0.00018 0.00000
    9 397.570748 6 0.0 0.00018 0.00000
    Most correlated values with 9 dist from cen:
    Column Correlation with 9 dist from cen
    0 9 dist from cen 1.000000
    1 8 dist from cen 0.718492
    2 10 dist from cen 0.700233
    3 7 dist from cen 0.573894
    4 11 dist from cen 0.534832
    5 6 dist from cen 0.507077
    111 height_digit_12 -0.245396
    112 width_digit_12 -0.248441
    113 area_digit_8 -0.258230
    114 area_digit_12 -0.280400
    115 width_digit_10 -0.281388

    euc_dist_digit_1

    (Jump to top)
    euc_dist_digit_1 Train Validation
    0 count 25400.000000 255.000000
    1 mean 30.287315 28.712415
    2 std 33.877417 31.931081
    3 min 0.000760 0.136073
    4 25% 6.680591 6.780543
    5 50% 14.935799 15.059592
    6 75% 37.094044 32.842662
    7 max 119.957644 112.333641
    8 unique values 23913.000000 256.000000
    9 unique values / count 0.729600 0.707200
    10 NaNs 7377.000000 107.000000
    Most popular values (NaN = -999):
    euc_dist_digit_1 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 7377 107.0 0.22507 0.72297
    1 3.595678 4 0.0 0.00012 0.00000
    2 5.710511 4 0.0 0.00012 0.00000
    3 4.864260 4 0.0 0.00012 0.00000
    4 7.281675 4 0.0 0.00012 0.00000
    5 12.124343 4 0.0 0.00012 0.00000
    6 0.787928 4 0.0 0.00012 0.00000
    7 8.216260 4 0.0 0.00012 0.00000
    8 9.112259 4 0.0 0.00012 0.00000
    9 1.896679 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_1:
    Column Correlation with euc_dist_digit_1
    0 euc_dist_digit_1 1.000000
    1 euc_dist_digit_12 0.895557
    2 deviation_dist_from_mid_axis 0.883553
    3 euc_dist_digit_3 0.865873
    4 euc_dist_digit_11 0.852942
    5 euc_dist_digit_10 0.833996
    111 height_digit_8 -0.474169
    112 height_digit_9 -0.479460
    113 height_digit_7 -0.483864
    114 height_digit_6 -0.499671
    115 height_digit_1 -0.566982

    euc_dist_digit_2

    (Jump to top)
    euc_dist_digit_2 Train Validation
    0 count 27800.000000 295.000000
    1 mean 32.834984 35.414626
    2 std 31.828580 31.027786
    3 min 0.003261 0.383323
    4 25% 8.930655 10.757488
    5 50% 20.012485 25.436234
    6 75% 46.669563 50.958705
    7 max 119.906309 116.282230
    8 unique values 26417.000000 296.000000
    9 unique values / count 0.806000 0.817700
    10 NaNs 4977.000000 67.000000
    Most popular values (NaN = -999):
    euc_dist_digit_2 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4977 67.0 0.15184 0.78824
    1 26.861567 4 0.0 0.00012 0.00000
    2 0.973594 4 0.0 0.00012 0.00000
    3 15.903987 3 0.0 0.00009 0.00000
    4 9.786820 3 0.0 0.00009 0.00000
    5 8.713237 3 0.0 0.00009 0.00000
    6 19.036402 3 0.0 0.00009 0.00000
    7 9.794903 3 0.0 0.00009 0.00000
    8 11.994654 3 0.0 0.00009 0.00000
    9 21.202235 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_2:
    Column Correlation with euc_dist_digit_2
    0 euc_dist_digit_2 1.000000
    1 euc_dist_digit_3 0.817747
    2 euc_dist_digit_1 0.806897
    3 deviation_dist_from_mid_axis 0.780278
    4 euc_dist_digit_4 0.779112
    5 euc_dist_digit_5 0.775221
    111 height_digit_8 -0.393057
    112 height_digit_9 -0.400821
    113 height_digit_7 -0.415490
    114 height_digit_6 -0.418058
    115 height_digit_1 -0.497848

    euc_dist_digit_3

    (Jump to top)
    euc_dist_digit_3 Train Validation
    0 count 28603.000000 297.000000
    1 mean 33.031035 34.606700
    2 std 33.060628 31.404319
    3 min 0.000000 0.260000
    4 25% 8.190000 10.010000
    5 50% 18.980000 22.490000
    6 75% 47.840000 51.350000
    7 max 119.860000 114.400000
    8 unique values 912.000000 227.000000
    9 unique values / count 0.027800 0.627100
    10 NaNs 4174.000000 65.000000
    Most popular values (NaN = -999):
    euc_dist_digit_3 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.00 4174 65.0 0.12735 0.17956
    1 1.69 145 0.0 0.00442 0.00000
    2 1.43 135 2.0 0.00412 0.00552
    3 3.12 134 2.0 0.00409 0.00552
    4 3.38 131 1.0 0.00400 0.00276
    5 5.46 129 1.0 0.00394 0.00276
    6 3.51 129 2.0 0.00394 0.00552
    7 0.52 129 1.0 0.00394 0.00276
    8 0.65 128 0.0 0.00391 0.00000
    9 0.26 128 1.0 0.00391 0.00276
    Most correlated values with euc_dist_digit_3:
    Column Correlation with euc_dist_digit_3
    0 euc_dist_digit_3 1.000000
    1 deviation_dist_from_mid_axis 0.938323
    2 euc_dist_digit_4 0.905836
    3 euc_dist_digit_5 0.876202
    4 euc_dist_digit_1 0.865873
    5 euc_dist_digit_12 0.856718
    111 height_digit_8 -0.471607
    112 height_digit_9 -0.484704
    113 height_digit_7 -0.489826
    114 height_digit_6 -0.496467
    115 height_digit_1 -0.586916

    euc_dist_digit_4

    (Jump to top)
    euc_dist_digit_4 Train Validation
    0 count 27238.000000 269.000000
    1 mean 32.049520 32.086552
    2 std 31.662544 31.279015
    3 min 0.000760 0.068572
    4 25% 8.141702 7.743508
    5 50% 19.097071 18.212570
    6 75% 45.860710 48.230000
    7 max 119.937391 113.624643
    8 unique values 25887.000000 269.000000
    9 unique values / count 0.789800 0.743100
    10 NaNs 5539.000000 93.000000
    Most popular values (NaN = -999):
    euc_dist_digit_4 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 5539 93.0 0.16899 0.78151
    1 4.045676 5 0.0 0.00015 0.00000
    2 22.375006 4 0.0 0.00012 0.00000
    3 0.119260 3 0.0 0.00009 0.00000
    4 1.320989 3 0.0 0.00009 0.00000
    5 0.155009 3 0.0 0.00009 0.00000
    6 10.977007 3 0.0 0.00009 0.00000
    7 0.294989 3 0.0 0.00009 0.00000
    8 88.566730 3 0.0 0.00009 0.00000
    9 0.485927 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_4:
    Column Correlation with euc_dist_digit_4
    0 euc_dist_digit_4 1.000000
    1 euc_dist_digit_3 0.905836
    2 euc_dist_digit_5 0.898449
    3 deviation_dist_from_mid_axis 0.874681
    4 euc_dist_digit_7 0.840853
    5 euc_dist_digit_8 0.828104
    111 height_digit_8 -0.447081
    112 height_digit_9 -0.454875
    113 height_digit_6 -0.462422
    114 height_digit_7 -0.466630
    115 height_digit_1 -0.548491

    euc_dist_digit_5

    (Jump to top)
    euc_dist_digit_5 Train Validation
    0 count 26082.000000 264.000000
    1 mean 30.724226 30.474200
    2 std 30.055328 29.734478
    3 min 0.002345 0.197991
    4 25% 7.908380 7.617593
    5 50% 18.527444 18.515487
    6 75% 44.763078 44.525574
    7 max 119.643227 108.472561
    8 unique values 24739.000000 265.000000
    9 unique values / count 0.754800 0.732000
    10 NaNs 6695.000000 98.000000
    Most popular values (NaN = -999):
    euc_dist_digit_5 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 6695 98.0 0.20426 0.79032
    1 0.377406 5 0.0 0.00015 0.00000
    2 2.595489 4 0.0 0.00012 0.00000
    3 1.444156 4 0.0 0.00012 0.00000
    4 5.466073 4 0.0 0.00012 0.00000
    5 0.457656 4 0.0 0.00012 0.00000
    6 14.191904 4 0.0 0.00012 0.00000
    7 0.112428 4 0.0 0.00012 0.00000
    8 12.389925 3 0.0 0.00009 0.00000
    9 8.774174 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_5:
    Column Correlation with euc_dist_digit_5
    0 euc_dist_digit_5 1.000000
    1 euc_dist_digit_4 0.898449
    2 euc_dist_digit_3 0.876202
    3 deviation_dist_from_mid_axis 0.861845
    4 euc_dist_digit_6 0.836368
    5 euc_dist_digit_8 0.830755
    111 height_digit_8 -0.429745
    112 height_digit_9 -0.438334
    113 height_digit_7 -0.442409
    114 height_digit_6 -0.443804
    115 height_digit_1 -0.527828

    euc_dist_digit_6

    (Jump to top)
    euc_dist_digit_6 Train Validation
    0 count 28394.000000 290.000000
    1 mean 28.135344 27.565379
    2 std 31.245333 30.533193
    3 min 0.000000 0.000000
    4 25% 6.240000 5.492500
    5 50% 14.300000 13.975000
    6 75% 35.100000 39.227500
    7 max 119.730000 115.700000
    8 unique values 889.000000 209.000000
    9 unique values / count 0.027100 0.577300
    10 NaNs 4383.000000 72.000000
    Most popular values (NaN = -999):
    euc_dist_digit_6 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.00 4383 72.0 0.13372 0.19890
    1 4.55 173 3.0 0.00528 0.00829
    2 1.43 172 0.0 0.00525 0.00000
    3 3.64 170 3.0 0.00519 0.00829
    4 0.78 168 0.0 0.00513 0.00000
    5 3.51 168 1.0 0.00513 0.00276
    6 2.47 167 4.0 0.00510 0.01105
    7 4.68 164 1.0 0.00500 0.00276
    8 0.91 163 0.0 0.00497 0.00000
    9 1.04 162 1.0 0.00494 0.00276
    Most correlated values with euc_dist_digit_6:
    Column Correlation with euc_dist_digit_6
    0 euc_dist_digit_6 1.000000
    1 deviation_dist_from_mid_axis 0.930912
    2 euc_dist_digit_7 0.880315
    3 euc_dist_digit_9 0.866827
    4 euc_dist_digit_8 0.859128
    5 euc_dist_digit_5 0.836368
    111 height_digit_8 -0.453401
    112 height_digit_7 -0.465866
    113 height_digit_9 -0.475632
    114 height_digit_6 -0.504996
    115 height_digit_1 -0.534603

    euc_dist_digit_7

    (Jump to top)
    euc_dist_digit_7 Train Validation
    0 count 28491.000000 286.000000
    1 mean 30.886070 29.746576
    2 std 33.028061 31.199643
    3 min 0.000491 0.015987
    4 25% 6.903923 5.957112
    5 50% 15.724508 16.274192
    6 75% 44.079657 42.165667
    7 max 119.997479 115.930144
    8 unique values 26646.000000 287.000000
    9 unique values / count 0.812900 0.792800
    10 NaNs 4286.000000 76.000000
    Most popular values (NaN = -999):
    euc_dist_digit_7 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4286 76.0 0.13076 0.7037
    1 1.007512 4 0.0 0.00012 0.0000
    2 3.442655 4 0.0 0.00012 0.0000
    3 5.199820 4 0.0 0.00012 0.0000
    4 0.532594 4 0.0 0.00012 0.0000
    5 0.641761 4 0.0 0.00012 0.0000
    6 8.888319 4 0.0 0.00012 0.0000
    7 0.062679 4 0.0 0.00012 0.0000
    8 3.947404 4 0.0 0.00012 0.0000
    9 6.066569 4 0.0 0.00012 0.0000
    Most correlated values with euc_dist_digit_7:
    Column Correlation with euc_dist_digit_7
    0 euc_dist_digit_7 1.000000
    1 euc_dist_digit_8 0.922580
    2 deviation_dist_from_mid_axis 0.891760
    3 euc_dist_digit_6 0.880315
    4 euc_dist_digit_9 0.873219
    5 euc_dist_digit_10 0.860499
    111 height_digit_8 -0.467701
    112 height_digit_9 -0.477758
    113 height_digit_7 -0.483906
    114 height_digit_6 -0.488399
    115 height_digit_1 -0.542421

    euc_dist_digit_8

    (Jump to top)
    euc_dist_digit_8 Train Validation
    0 count 28641.000000 290.000000
    1 mean 32.250843 31.948840
    2 std 33.840305 32.820881
    3 min 0.000515 0.075919
    4 25% 7.413237 7.508427
    5 50% 16.984068 18.110081
    6 75% 46.748061 43.715447
    7 max 119.838808 118.501563
    8 unique values 27017.000000 290.000000
    9 unique values / count 0.824300 0.801100
    10 NaNs 4136.000000 72.000000
    Most popular values (NaN = -999):
    euc_dist_digit_8 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4136 72.0 0.12619 0.63717
    1 1.745511 5 0.0 0.00015 0.00000
    2 2.333012 4 0.0 0.00012 0.00000
    3 1.573821 4 0.0 0.00012 0.00000
    4 7.865760 4 0.0 0.00012 0.00000
    5 1.499511 4 0.0 0.00012 0.00000
    6 1.247594 4 0.0 0.00012 0.00000
    7 11.754236 4 0.0 0.00012 0.00000
    8 1.809596 4 0.0 0.00012 0.00000
    9 8.127321 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_8:
    Column Correlation with euc_dist_digit_8
    0 euc_dist_digit_8 1.000000
    1 euc_dist_digit_7 0.922580
    2 euc_dist_digit_9 0.919703
    3 deviation_dist_from_mid_axis 0.905678
    4 euc_dist_digit_10 0.899548
    5 euc_dist_digit_11 0.871677
    111 height_digit_8 -0.467785
    112 height_digit_9 -0.478848
    113 height_digit_7 -0.478994
    114 height_digit_6 -0.497087
    115 height_digit_1 -0.556503

    euc_dist_digit_9

    (Jump to top)
    euc_dist_digit_9 Train Validation
    0 count 2.693600e+04 254.000000
    1 mean 3.125026e+01 27.349134
    2 std 3.440961e+01 30.559390
    3 min 2.960000e-14 0.130000
    4 25% 6.890000e+00 6.727500
    5 50% 1.586000e+01 15.145000
    6 75% 4.085250e+01 31.427500
    7 max 1.199900e+02 111.410000
    8 unique values 9.190000e+02 192.000000
    9 unique values / count 2.800000e-02 0.530400
    10 NaNs 5.841000e+03 108.000000
    Most popular values (NaN = -999):
    euc_dist_digit_9 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.00 5841 108.0 0.17820 0.29917
    1 0.39 157 1.0 0.00479 0.00277
    2 2.73 150 1.0 0.00458 0.00277
    3 0.91 149 0.0 0.00455 0.00000
    4 4.42 148 1.0 0.00452 0.00277
    5 0.65 145 2.0 0.00442 0.00554
    6 3.12 143 1.0 0.00436 0.00277
    7 1.82 143 1.0 0.00436 0.00277
    8 4.68 142 0.0 0.00433 0.00000
    9 1.95 141 2.0 0.00430 0.00554
    Most correlated values with euc_dist_digit_9:
    Column Correlation with euc_dist_digit_9
    0 euc_dist_digit_9 1.000000
    1 deviation_dist_from_mid_axis 0.946361
    2 euc_dist_digit_10 0.933982
    3 euc_dist_digit_8 0.919703
    4 euc_dist_digit_11 0.894293
    5 euc_dist_digit_7 0.873219
    111 height_digit_8 -0.451736
    112 height_digit_7 -0.460248
    113 height_digit_6 -0.471743
    114 height_digit_9 -0.473352
    115 height_digit_1 -0.537054

    euc_dist_digit_10

    (Jump to top)
    euc_dist_digit_10 Train Validation
    0 count 27838.000000 274.000000
    1 mean 33.247571 31.575679
    2 std 34.375507 32.721449
    3 min 0.002010 0.096238
    4 25% 7.541077 7.954299
    5 50% 17.561152 17.099009
    6 75% 50.597531 43.558049
    7 max 119.915642 114.565815
    8 unique values 26196.000000 275.000000
    9 unique values / count 0.799200 0.759700
    10 NaNs 4939.000000 88.000000
    Most popular values (NaN = -999):
    euc_dist_digit_10 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4939 88.0 0.15068 0.74576
    1 1.787177 4 0.0 0.00012 0.00000
    2 17.805758 4 0.0 0.00012 0.00000
    3 10.184927 4 0.0 0.00012 0.00000
    4 15.407902 3 0.0 0.00009 0.00000
    5 0.116179 3 0.0 0.00009 0.00000
    6 17.705925 3 0.0 0.00009 0.00000
    7 27.263671 3 0.0 0.00009 0.00000
    8 5.303094 3 0.0 0.00009 0.00000
    9 1.868678 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_10:
    Column Correlation with euc_dist_digit_10
    0 euc_dist_digit_10 1.000000
    1 euc_dist_digit_11 0.935773
    2 euc_dist_digit_9 0.933982
    3 euc_dist_digit_8 0.899548
    4 deviation_dist_from_mid_axis 0.898446
    5 euc_dist_digit_7 0.860499
    111 height_digit_8 -0.471268
    112 height_digit_9 -0.480029
    113 height_digit_7 -0.484636
    114 height_digit_6 -0.489475
    115 height_digit_1 -0.555484

    euc_dist_digit_11

    (Jump to top)
    euc_dist_digit_11 Train Validation
    0 count 27151.000000 277.000000
    1 mean 32.644335 33.387692
    2 std 34.165306 33.296209
    3 min 0.001071 0.095011
    4 25% 7.369695 7.795154
    5 50% 17.209259 18.661318
    6 75% 48.329409 54.821571
    7 max 119.855309 116.061975
    8 unique values 25628.000000 278.000000
    9 unique values / count 0.781900 0.768000
    10 NaNs 5626.000000 85.000000
    Most popular values (NaN = -999):
    euc_dist_digit_11 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 5626 85.0 0.17164 0.71429
    1 2.607928 4 0.0 0.00012 0.00000
    2 7.643070 4 0.0 0.00012 0.00000
    3 6.260653 4 0.0 0.00012 0.00000
    4 2.876322 4 0.0 0.00012 0.00000
    5 23.674066 3 0.0 0.00009 0.00000
    6 7.437821 3 0.0 0.00009 0.00000
    7 95.135146 3 0.0 0.00009 0.00000
    8 1.217428 3 0.0 0.00009 0.00000
    9 37.107898 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_11:
    Column Correlation with euc_dist_digit_11
    0 euc_dist_digit_11 1.000000
    1 euc_dist_digit_10 0.935773
    2 euc_dist_digit_9 0.894293
    3 deviation_dist_from_mid_axis 0.893389
    4 euc_dist_digit_12 0.872921
    5 euc_dist_digit_8 0.871677
    111 height_digit_8 -0.467289
    112 height_digit_9 -0.479624
    113 height_digit_7 -0.488127
    114 height_digit_6 -0.491141
    115 height_digit_1 -0.560891

    euc_dist_digit_12

    (Jump to top)
    euc_dist_digit_12 Train Validation
    0 count 28909.000000 290.000000
    1 mean 28.629239 28.940690
    2 std 35.018626 34.119917
    3 min 0.000000 0.130000
    4 25% 5.200000 5.915000
    5 50% 11.960000 13.000000
    6 75% 31.330000 35.847500
    7 max 119.990000 119.600000
    8 unique values 912.000000 194.000000
    9 unique values / count 0.027800 0.535900
    10 NaNs 3868.000000 72.000000
    Most popular values (NaN = -999):
    euc_dist_digit_12 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.00 3868 72.0 0.11801 0.19890
    1 2.60 230 4.0 0.00702 0.01105
    2 1.56 209 2.0 0.00638 0.00552
    3 4.42 203 3.0 0.00619 0.00829
    4 2.34 199 2.0 0.00607 0.00552
    5 0.13 197 1.0 0.00601 0.00276
    6 1.04 194 1.0 0.00592 0.00276
    7 1.82 193 2.0 0.00589 0.00552
    8 3.38 190 3.0 0.00580 0.00829
    9 0.52 189 3.0 0.00577 0.00829
    Most correlated values with euc_dist_digit_12:
    Column Correlation with euc_dist_digit_12
    0 euc_dist_digit_12 1.000000
    1 deviation_dist_from_mid_axis 0.942951
    2 euc_dist_digit_1 0.895557
    3 euc_dist_digit_11 0.872921
    4 euc_dist_digit_3 0.856718
    5 euc_dist_digit_10 0.851229
    111 height_digit_8 -0.477431
    112 height_digit_9 -0.490168
    113 height_digit_7 -0.501306
    114 height_digit_6 -0.514303
    115 height_digit_1 -0.571832

    area_digit_1

    (Jump to top)
    area_digit_1 Train Validation
    0 count 25448.000000 255.000000
    1 mean 2308.107671 2388.235294
    2 std 1070.213451 1198.736244
    3 min 640.000000 735.000000
    4 25% 1537.000000 1539.000000
    5 50% 2065.000000 2112.000000
    6 75% 2816.000000 2928.000000
    7 max 9870.000000 8890.000000
    8 unique values 1966.000000 223.000000
    9 unique values / count 0.060000 0.616000
    10 NaNs 7329.000000 107.000000
    Most popular values (NaN = -999):
    area_digit_1 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 7329 107.0 0.22360 0.29972
    1 1512.0 143 0.0 0.00436 0.00000
    2 1456.0 128 0.0 0.00391 0.00000
    3 1350.0 117 3.0 0.00357 0.00840
    4 1680.0 112 0.0 0.00342 0.00000
    5 1560.0 109 1.0 0.00333 0.00280
    6 1300.0 109 3.0 0.00333 0.00840
    7 1740.0 105 1.0 0.00320 0.00280
    8 1400.0 104 2.0 0.00317 0.00560
    9 1728.0 103 2.0 0.00314 0.00560
    Most correlated values with area_digit_1:
    Column Correlation with area_digit_1
    0 area_digit_1 1.000000
    1 height_digit_1 0.626056
    2 area_digit_2 0.557006
    3 area_digit_3 0.544482
    4 area_digit_4 0.541271
    5 area_digit_12 0.539211
    111 11 dist from cen -0.219733
    112 2 dist from cen -0.228585
    113 3 dist from cen -0.242782
    114 1 dist from cen -0.261462
    115 12 dist from cen -0.274356

    area_digit_2

    (Jump to top)
    area_digit_2 Train Validation
    0 count 27855.000000 295.000000
    1 mean 4616.101562 4151.271186
    2 std 2365.657591 1909.520134
    3 min 768.000000 1188.000000
    4 25% 2942.000000 2756.000000
    5 50% 4104.000000 3721.000000
    6 75% 5716.000000 5045.000000
    7 max 25088.000000 12330.000000
    8 unique values 3201.000000 263.000000
    9 unique values / count 0.097700 0.726500
    10 NaNs 4922.000000 67.000000
    Most popular values (NaN = -999):
    area_digit_2 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4922 67.0 0.15017 0.18715
    1 2640.0 68 0.0 0.00207 0.00000
    2 5040.0 65 1.0 0.00198 0.00279
    3 4200.0 63 0.0 0.00192 0.00000
    4 3120.0 61 1.0 0.00186 0.00279
    5 3024.0 57 3.0 0.00174 0.00838
    6 3360.0 57 1.0 0.00174 0.00279
    7 3960.0 56 1.0 0.00171 0.00279
    8 2880.0 56 0.0 0.00171 0.00000
    9 2520.0 56 0.0 0.00171 0.00000
    Most correlated values with area_digit_2:
    Column Correlation with area_digit_2
    0 area_digit_2 1.000000
    1 height_digit_2 0.849204
    2 width_digit_2 0.826914
    3 area_digit_3 0.646747
    4 area_digit_4 0.634894
    5 area_digit_12 0.600113
    111 11 dist from cen -0.233081
    112 1 dist from cen -0.269999
    113 12 dist from cen -0.306253
    114 3 dist from cen -0.307171
    115 2 dist from cen -0.309652

    area_digit_3

    (Jump to top)
    area_digit_3 Train Validation
    0 count 28612.000000 297.000000
    1 mean 5046.115231 4978.585859
    2 std 2569.549735 2535.028759
    3 min 828.000000 1369.000000
    4 25% 3240.000000 3315.000000
    5 50% 4508.000000 4453.000000
    6 75% 6231.000000 6237.000000
    7 max 31482.000000 16380.000000
    8 unique values 3624.000000 280.000000
    9 unique values / count 0.110600 0.773500
    10 NaNs 4165.000000 65.000000
    Most popular values (NaN = -999):
    area_digit_3 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4165 65.0 0.12707 0.18362
    1 3600.0 61 1.0 0.00186 0.00282
    2 4080.0 60 0.0 0.00183 0.00000
    3 2520.0 57 1.0 0.00174 0.00282
    4 4620.0 55 0.0 0.00168 0.00000
    5 5040.0 54 1.0 0.00165 0.00282
    6 4200.0 53 0.0 0.00162 0.00000
    7 4050.0 53 0.0 0.00162 0.00000
    8 4320.0 51 0.0 0.00156 0.00000
    9 3900.0 51 0.0 0.00156 0.00000
    Most correlated values with area_digit_3:
    Column Correlation with area_digit_3
    0 area_digit_3 1.000000
    1 width_digit_3 0.807082
    2 height_digit_3 0.798223
    3 area_digit_4 0.680886
    4 area_digit_2 0.646747
    5 area_digit_6 0.639321
    111 1 dist from cen -0.264121
    112 2 dist from cen -0.280373
    113 between_digits_angle_ccw_sum -0.306574
    114 12 dist from cen -0.322698
    115 3 dist from cen -0.355528

    area_digit_4

    (Jump to top)
    area_digit_4 Train Validation
    0 count 27251.000000 269.000000
    1 mean 5793.115665 5627.163569
    2 std 2641.521129 2561.650984
    3 min 1036.000000 1620.000000
    4 25% 3915.000000 3780.000000
    5 50% 5270.000000 5070.000000
    6 75% 7105.500000 6820.000000
    7 max 29946.000000 19314.000000
    8 unique values 3816.000000 249.000000
    9 unique values / count 0.116400 0.687800
    10 NaNs 5526.000000 93.000000
    Most popular values (NaN = -999):
    area_digit_4 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5526 93.0 0.16859 0.26124
    1 5040.0 70 0.0 0.00214 0.00000
    2 4680.0 58 0.0 0.00177 0.00000
    3 3696.0 55 1.0 0.00168 0.00281
    4 4536.0 53 0.0 0.00162 0.00000
    5 6720.0 52 2.0 0.00159 0.00562
    6 4320.0 50 2.0 0.00153 0.00562
    7 3600.0 50 1.0 0.00153 0.00281
    8 5940.0 49 0.0 0.00149 0.00000
    9 5100.0 46 1.0 0.00140 0.00281
    Most correlated values with area_digit_4:
    Column Correlation with area_digit_4
    0 area_digit_4 1.000000
    1 height_digit_4 0.766798
    2 width_digit_4 0.691913
    3 area_digit_3 0.680886
    4 area_digit_5 0.660022
    5 area_digit_7 0.656411
    111 5 dist from cen -0.252993
    112 1 dist from cen -0.262208
    113 2 dist from cen -0.272024
    114 12 dist from cen -0.318134
    115 3 dist from cen -0.325382

    area_digit_5

    (Jump to top)
    area_digit_5 Train Validation
    0 count 26092.000000 265.000000
    1 mean 7214.179250 6691.577358
    2 std 3474.474015 3008.717328
    3 min 1152.000000 1599.000000
    4 25% 4758.000000 4636.000000
    5 50% 6525.000000 6318.000000
    6 75% 8840.000000 8349.000000
    7 max 32200.000000 22912.000000
    8 unique values 4275.000000 252.000000
    9 unique values / count 0.130400 0.696100
    10 NaNs 6685.000000 97.000000
    Most popular values (NaN = -999):
    area_digit_5 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 6685 97.0 0.20395 0.27324
    1 5040.0 59 1.0 0.00180 0.00282
    2 4620.0 52 0.0 0.00159 0.00000
    3 5460.0 42 0.0 0.00128 0.00000
    4 7560.0 42 0.0 0.00128 0.00000
    5 6300.0 41 1.0 0.00125 0.00282
    6 5760.0 39 0.0 0.00119 0.00000
    7 6480.0 39 0.0 0.00119 0.00000
    8 4536.0 39 0.0 0.00119 0.00000
    9 5544.0 38 0.0 0.00116 0.00000
    Most correlated values with area_digit_5:
    Column Correlation with area_digit_5
    0 area_digit_5 1.000000
    1 width_digit_5 0.830252
    2 height_digit_5 0.775551
    3 variance_area 0.678825
    4 area_digit_4 0.660022
    5 area_digit_8 0.603508
    111 2 dist from cen -0.212595
    112 6 dist from cen -0.229845
    113 5 dist from cen -0.235901
    114 3 dist from cen -0.253795
    115 12 dist from cen -0.266963

    area_digit_6

    (Jump to top)
    area_digit_6 Train Validation
    0 count 28407.000000 290.000000
    1 mean 6035.063259 6095.472414
    2 std 2742.576668 2732.864358
    3 min 805.000000 1584.000000
    4 25% 4041.000000 4002.500000
    5 50% 5580.000000 5678.500000
    6 75% 7560.000000 7717.500000
    7 max 27378.000000 19608.000000
    8 unique values 3960.000000 272.000000
    9 unique values / count 0.120800 0.751400
    10 NaNs 4370.000000 72.000000
    Most popular values (NaN = -999):
    area_digit_6 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4370 72.0 0.13333 0.20571
    1 5040.0 64 0.0 0.00195 0.00000
    2 4050.0 58 0.0 0.00177 0.00000
    3 5280.0 52 1.0 0.00159 0.00286
    4 4368.0 50 0.0 0.00153 0.00000
    5 5460.0 49 0.0 0.00149 0.00000
    6 6720.0 48 0.0 0.00146 0.00000
    7 6930.0 47 0.0 0.00143 0.00000
    8 5544.0 47 1.0 0.00143 0.00286
    9 4704.0 46 0.0 0.00140 0.00000
    Most correlated values with area_digit_6:
    Column Correlation with area_digit_6
    0 area_digit_6 1.000000
    1 height_digit_6 0.781435
    2 area_digit_7 0.684934
    3 area_digit_9 0.669600
    4 area_digit_4 0.655595
    5 area_digit_8 0.653968
    111 5 dist from cen -0.242708
    112 2 dist from cen -0.250784
    113 12 dist from cen -0.291534
    114 6 dist from cen -0.304178
    115 3 dist from cen -0.326111

    area_digit_7

    (Jump to top)
    area_digit_7 Train Validation
    0 count 28555.000000 288.000000
    1 mean 4942.821748 4876.631944
    2 std 2221.963276 2252.685580
    3 min 777.000000 1054.000000
    4 25% 3312.000000 3243.750000
    5 50% 4559.000000 4469.000000
    6 75% 6120.000000 6102.500000
    7 max 22866.000000 17030.000000
    8 unique values 3400.000000 264.000000
    9 unique values / count 0.103700 0.729300
    10 NaNs 4222.000000 74.000000
    Most popular values (NaN = -999):
    area_digit_7 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4222 74.0 0.12881 0.20845
    1 3600.0 59 1.0 0.00180 0.00282
    2 5040.0 57 0.0 0.00174 0.00000
    3 4050.0 56 0.0 0.00171 0.00000
    4 4704.0 54 0.0 0.00165 0.00000
    5 4536.0 54 0.0 0.00165 0.00000
    6 2520.0 54 0.0 0.00165 0.00000
    7 3150.0 53 0.0 0.00162 0.00000
    8 3360.0 53 0.0 0.00162 0.00000
    9 4800.0 51 0.0 0.00156 0.00000
    Most correlated values with area_digit_7:
    Column Correlation with area_digit_7
    0 area_digit_7 1.000000
    1 height_digit_7 0.746753
    2 area_digit_8 0.697879
    3 area_digit_6 0.684934
    4 width_digit_7 0.665595
    5 area_digit_9 0.660227
    111 5 dist from cen -0.222527
    112 9 dist from cen -0.227945
    113 12 dist from cen -0.265251
    114 6 dist from cen -0.280505
    115 3 dist from cen -0.281658

    area_digit_8

    (Jump to top)
    area_digit_8 Train Validation
    0 count 28755.000000 292.000000
    1 mean 5697.203373 5814.003425
    2 std 2741.527329 2932.203603
    3 min 1054.000000 1505.000000
    4 25% 3760.000000 3986.250000
    5 50% 5145.000000 5342.500000
    6 75% 7000.000000 6720.750000
    7 max 29503.000000 25542.000000
    8 unique values 3884.000000 268.000000
    9 unique values / count 0.118500 0.740300
    10 NaNs 4022.000000 70.000000
    Most popular values (NaN = -999):
    area_digit_8 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4022 70.0 0.12271 0.20115
    1 3600.0 65 1.0 0.00198 0.00287
    2 4320.0 60 3.0 0.00183 0.00862
    3 4368.0 58 0.0 0.00177 0.00000
    4 3696.0 58 0.0 0.00177 0.00000
    5 3150.0 51 0.0 0.00156 0.00000
    6 3360.0 51 0.0 0.00156 0.00000
    7 5096.0 50 0.0 0.00153 0.00000
    8 4080.0 50 0.0 0.00153 0.00000
    9 5940.0 49 0.0 0.00149 0.00000
    Most correlated values with area_digit_8:
    Column Correlation with area_digit_8
    0 area_digit_8 1.000000
    1 height_digit_8 0.731817
    2 width_digit_8 0.698923
    3 area_digit_7 0.697879
    4 area_digit_9 0.689404
    5 area_digit_6 0.653968
    111 11 dist from cen -0.229847
    112 9 dist from cen -0.258230
    113 6 dist from cen -0.263622
    114 3 dist from cen -0.263660
    115 12 dist from cen -0.270084

    area_digit_9

    (Jump to top)
    area_digit_9 Train Validation
    0 count 26974.000000 255.000000
    1 mean 5678.539964 5577.784314
    2 std 2563.651971 2316.266827
    3 min 870.000000 1628.000000
    4 25% 3848.000000 4022.000000
    5 50% 5280.000000 5184.000000
    6 75% 7038.000000 6856.000000
    7 max 34524.000000 14522.000000
    8 unique values 3707.000000 239.000000
    9 unique values / count 0.113100 0.660200
    10 NaNs 5803.000000 107.000000
    Most popular values (NaN = -999):
    area_digit_9 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5803 107.0 0.17704 0.30141
    1 3600.0 65 1.0 0.00198 0.00282
    2 5940.0 59 1.0 0.00180 0.00282
    3 4368.0 55 0.0 0.00168 0.00000
    4 4320.0 54 0.0 0.00165 0.00000
    5 4590.0 52 0.0 0.00159 0.00000
    6 4536.0 52 1.0 0.00159 0.00282
    7 5096.0 50 0.0 0.00153 0.00000
    8 4080.0 49 0.0 0.00149 0.00000
    9 5040.0 48 0.0 0.00146 0.00000
    Most correlated values with area_digit_9:
    Column Correlation with area_digit_9
    0 area_digit_9 1.000000
    1 height_digit_9 0.748285
    2 area_digit_8 0.689404
    3 area_digit_6 0.669600
    4 area_digit_7 0.660227
    5 width_digit_9 0.633842
    111 9 dist from cen -0.226936
    112 2 dist from cen -0.230707
    113 6 dist from cen -0.262340
    114 3 dist from cen -0.264134
    115 12 dist from cen -0.276428

    area_digit_10

    (Jump to top)
    area_digit_10 Train Validation
    0 count 27882.000000 274.000000
    1 mean 6647.253927 6535.178832
    2 std 3161.975614 3025.937766
    3 min 888.000000 1435.000000
    4 25% 4380.000000 4366.500000
    5 50% 6075.000000 5913.500000
    6 75% 8232.000000 7945.750000
    7 max 35280.000000 18810.000000
    8 unique values 3733.000000 257.000000
    9 unique values / count 0.113900 0.709900
    10 NaNs 4895.000000 88.000000
    Most popular values (NaN = -999):
    area_digit_10 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4895 88.0 0.14934 0.25287
    1 6160.0 54 1.0 0.00165 0.00287
    2 5544.0 51 1.0 0.00156 0.00287
    3 5040.0 50 2.0 0.00153 0.00575
    4 6480.0 48 0.0 0.00146 0.00000
    5 4680.0 48 0.0 0.00146 0.00000
    6 4620.0 47 0.0 0.00143 0.00000
    7 5460.0 45 1.0 0.00137 0.00287
    8 4800.0 44 0.0 0.00134 0.00000
    9 4158.0 41 1.0 0.00125 0.00287
    Most correlated values with area_digit_10:
    Column Correlation with area_digit_10
    0 area_digit_10 1.000000
    1 width_digit_10 0.856659
    2 height_digit_10 0.854912
    3 area_digit_11 0.646333
    4 area_digit_9 0.627542
    5 variance_area 0.626882
    111 between_digits_angle_ccw_sum -0.268909
    112 10 dist from cen -0.276785
    113 11 dist from cen -0.282686
    114 3 dist from cen -0.285387
    115 12 dist from cen -0.288583

    area_digit_11

    (Jump to top)
    area_digit_11 Train Validation
    0 count 27201.000000 279.000000
    1 mean 5393.460167 5250.641577
    2 std 2633.392241 2676.965577
    3 min 780.000000 1364.000000
    4 25% 3540.000000 3476.000000
    5 50% 4872.000000 4697.000000
    6 75% 6640.000000 6221.000000
    7 max 30338.000000 22490.000000
    8 unique values 3450.000000 266.000000
    9 unique values / count 0.105300 0.734800
    10 NaNs 5576.000000 83.000000
    Most popular values (NaN = -999):
    area_digit_11 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5576 83.0 0.17012 0.23380
    1 5544.0 59 0.0 0.00180 0.00000
    2 4200.0 57 0.0 0.00174 0.00000
    3 4536.0 55 0.0 0.00168 0.00000
    4 5460.0 53 1.0 0.00162 0.00282
    5 3360.0 51 0.0 0.00156 0.00000
    6 3600.0 50 0.0 0.00153 0.00000
    7 4680.0 50 0.0 0.00153 0.00000
    8 4620.0 48 0.0 0.00146 0.00000
    9 5760.0 48 0.0 0.00146 0.00000
    Most correlated values with area_digit_11:
    Column Correlation with area_digit_11
    0 area_digit_11 1.000000
    1 width_digit_11 0.825194
    2 height_digit_11 0.813478
    3 area_digit_10 0.646333
    4 area_digit_9 0.578697
    5 width_digit_10 0.577713
    111 10 dist from cen -0.236584
    112 6 dist from cen -0.238165
    113 3 dist from cen -0.255290
    114 12 dist from cen -0.260157
    115 11 dist from cen -0.281544

    area_digit_12

    (Jump to top)
    area_digit_12 Train Validation
    0 count 28937.000000 291.000000
    1 mean 6998.064450 6944.563574
    2 std 3525.529979 3800.925618
    3 min 1089.000000 1748.000000
    4 25% 4473.000000 4552.000000
    5 50% 6240.000000 5916.000000
    6 75% 8701.000000 8414.000000
    7 max 28362.000000 23406.000000
    8 unique values 4332.000000 271.000000
    9 unique values / count 0.132200 0.748600
    10 NaNs 3840.000000 71.000000
    Most popular values (NaN = -999):
    area_digit_12 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 3840 71.0 0.11716 0.20461
    1 5040.0 67 0.0 0.00204 0.00000
    2 5544.0 60 2.0 0.00183 0.00576
    3 4620.0 54 2.0 0.00165 0.00576
    4 6480.0 51 0.0 0.00156 0.00000
    5 4800.0 48 2.0 0.00146 0.00576
    6 5460.0 46 1.0 0.00140 0.00288
    7 4290.0 46 0.0 0.00140 0.00000
    8 6720.0 42 0.0 0.00128 0.00000
    9 3780.0 40 1.0 0.00122 0.00288
    Most correlated values with area_digit_12:
    Column Correlation with area_digit_12
    0 area_digit_12 1.000000
    1 width_digit_12 0.857415
    2 height_digit_12 0.832087
    3 variance_area 0.622523
    4 area_digit_3 0.619287
    5 area_digit_4 0.604102
    111 11 dist from cen