import mne import pandas as pd from scipy.stats import ttest_1samp, f_oneway from utils.file_utils import load_preprocessed_data, get_epochs VERBOSE_LEVEL = 'CRITICAL' def extract_erp_peak(raw, subject, stimulus, condition, channel): """ Extracts the erp peak for a given subject, stimulus and condition as a single value. :param raw: The raw object, from which the epochs are generated :param subject: The subject for which the peak is extracted :param stimulus: The stimulus we look at: Either 'car' or 'face' :param condition: The condition of the stimulus: Either 'intact' or 'scrambled' :param channel: The currently selected channel, for which the erp_peak should be extracted :return: A dictionary conforming to the data frame format: {'subject_id': subject, 'stimulus': stimulus, 'condition': condition, 'peak': peak} """ # Epoch the data epochs, _ = get_epochs(raw, [(stimulus, condition)], picks=channel) # Check only for negative peaks, as only the channels P7,P07,P8,P08 are used ch, latency, peak = epochs.average().get_peak(tmin=0.13, tmax=0.2, mode='neg', return_amplitude=True) return {'subject_id': subject, 'stimulus': stimulus, 'condition': condition, 'peak': peak} def precompute_erp_df(dataset): """ This method generates a .csv file where the erp peaks for each stimulus-condition pair for each subject are saved :param dataset: The dataset for which the erp peaks are computed """ chs = ['P7', 'PO7', 'P8', 'PO8'] events = [('face', 'intact'), ('face', 'scrambled'), ('car', 'intact'), ('car', 'scrambled')] for ch in chs: df = pd.DataFrame(data={'subject_id': [], 'stimulus': [], 'condition': [], 'peak': []}) for i in range(1, 41): subj = "0" + str(i) if len(str(i)) == 1: subj = "0" + subj # Load preprocessed .fif data files raw = load_preprocessed_data(subj, dataset) # Extract ERP peaks for ev in events: row = extract_erp_peak(raw, subj, ev[0], ev[1], ch) df = df.append(row, ignore_index=True) df.to_csv('cached_data/erp_peaks/erp_peaks_' + ch + '.csv') def create_peak_difference_feature(df, max_subj=40): """ Compute the difference of two N170 peaks for different conditions for all subjects. I.e. the difference of face(intact)-car(intact),face(scrambled)-car(scrambled),etc. :param max_subj: the maximum subject till which the features are computed. :param df: A pandas dataframe containing the peak information for all conditions and subjects :return: A pandas dataframe containing the peak-difference for multiple condition differences """ peak_diff_df = pd.DataFrame( data={'subject_id': [], 'mean_face': [], 'mean_car': [], 'peak_diff_overall': [], 'diff_intact': [], 'diff_scrambled': [], 'diff_face': [], 'diff_fc_ci': [], 'diff_fi_rest': []}) for i in range(1, max_subj + 1): subj = "0" + str(i) if len(str(i)) == 1: subj = "0" + subj sub_df = df.loc[df['subject_id'] == i] # difference of face and car (intact) diff_intact = sub_df.loc[df['condition'] == 'intact']['peak'].diff().to_numpy()[1] # difference of face and car (scrambled) diff_scrambled = sub_df.loc[df['condition'] == 'scrambled']['peak'].diff().to_numpy()[1] # Difference of Face intact and Face scrambled diff_face = sub_df.loc[df['stimulus'] == 'face']['peak'].diff().to_numpy()[1] # Difference of Face scrambled and Car intact diff_fs_ci = sub_df.loc[(df['stimulus'] == 'face') & (df['condition'] == 'scrambled')]['peak'].values[0] - \ sub_df.loc[(df['stimulus'] == 'car') & (df['condition'] == 'intact')]['peak'].values[0] # Mean of face (intact) and face (scrambled) mean_face = sub_df.loc[df['stimulus'] == 'face']['peak'].mean() # Mean of car (intact) and car (scrambled) mean_car = sub_df.loc[df['stimulus'] == 'car']['peak'].mean() mean_rest = sub_df.loc[(df['stimulus'] == 'car') | ((df['stimulus'] == 'face') & (df['condition'] == 'scrambled'))]['peak'].mean() diff_fi_rest = sub_df.loc[df['stimulus'] == 'face']['peak'].values[0] - mean_rest # Difference of face (overall) and car (overall) diff = mean_face - mean_car peak_diff_df = peak_diff_df.append( {'subject_id': subj, 'mean_face': mean_face, 'mean_car': mean_car, 'peak_diff_overall': diff, 'diff_intact': diff_intact, 'diff_scrambled': diff_scrambled, 'diff_face': diff_face, 'diff_fc_ci': diff_fs_ci, 'diff_fi_rest': diff_fi_rest}, ignore_index=True) return peak_diff_df def analyze_erp(channels, precompute=True): """ Execute several statistical tests for different hypothesis, to analyze ERPs :param channels: The channels for which the tests are executed :param precompute: If true, the peak-difference data will be computed. Else it will be loaded from a precomputed file, if it exists. This should only be set 'False' if the method was already executed once! """ if precompute: precompute_erp_df('N170') for c in channels: print("CHANNEL: " + c) erp_df = pd.read_csv('cached_data/erp_peaks/erp_peaks_' + c + '.csv', index_col=0) feature_df = create_peak_difference_feature(erp_df) # 1. H_a : There is a difference between the N170 peak of recognizing faces and cars # Run one-sample ttest against 0 mean stat, p_val = ttest_1samp(feature_df['peak_diff_overall'].to_numpy(), 0) print("Peak Difference Faces-Car (All)") print("P-Value=" + str(p_val)) # 2. H_a : There is a difference between the peak difference of intact faces&cars, # to the peak difference of scrambled faces&cars # Run ANOVA for two samples. 1. Diff of intact faces&cars, 2. Diff of scrambled faces&cars stat, p_val = f_oneway(feature_df['diff_intact'].to_numpy(), feature_df['diff_scrambled'].to_numpy()) print("Difference of peak-differences face-car (intact) vs. face-car (scrambled)") print("P-Value=" + str(p_val)) # # 3. H_a : There is a difference in the peak-difference of face-car (intact) stat, p_val = ttest_1samp(feature_df['diff_intact'].to_numpy(), 0) print("Peak Difference Faces-Car (Intact)") print("P-Value=" + str(p_val)) # # 4. H_a : There is a difference in the peak-difference of face-car (scrambled) stat, p_val = ttest_1samp(feature_df['diff_scrambled'].to_numpy(), 0) print("Peak Difference Faces-Car (Scrambled)") print("P-Value=" + str(p_val)) # # 5. H_a : There is a Difference between Face (scrambled) and Face (intact) in the peak difference stat, p_val = ttest_1samp(feature_df['diff_face'].to_numpy(), 0) print("Peak Difference Face intact and scrambled") print("P-Value=" + str(p_val)) stat, p_val = ttest_1samp(feature_df['diff_fi_rest'].to_numpy(), 0) print("Peak Difference Face intact and Rest") print("P-Value=" + str(p_val)) if __name__ == '__main__': mne.set_log_level(verbose=VERBOSE_LEVEL) analyze_erp(['P7', 'PO7', 'P8', 'PO8'], True)