semesterproject_lecture_eeg/erp_analysis.py

import mne
import pandas as pd
from scipy.stats import ttest_1samp, f_oneway

from utils.file_utils import load_preprocessed_data, get_epochs

VERBOSE_LEVEL = 'CRITICAL'


def extract_erp_peak(raw, subject, stimulus, condition, channel):
    """
    Extracts the erp peak for a given subject, stimulus and condition as a single value.

    :param raw: The raw object, from which the epochs are generated
    :param subject: The subject for which the peak is extracted
    :param stimulus: The stimulus we look at: Either 'car' or 'face'
    :param condition: The condition of the stimulus: Either 'intact' or 'scrambled'
    :param channel: The currently selected channel, for which the erp_peak should be extracted
    :return: A dictionary conforming to the data frame format:
             {'subject_id': subject, 'stimulus': stimulus, 'condition': condition, 'peak': peak}
    """
    # Epoch the data
    epochs, _ = get_epochs(raw, [(stimulus, condition)], picks=channel)
    # Check only for negative peaks, as only the channels P7,P07,P8,P08 are used
    ch, latency, peak = epochs.average().get_peak(tmin=0.13, tmax=0.2, mode='neg', return_amplitude=True)
    return {'subject_id': subject, 'stimulus': stimulus, 'condition': condition, 'peak': peak}


def precompute_erp_df(dataset):
    """
    This method generates a .csv file where the erp peaks for each stimulus-condition pair for each subject are saved
    :param dataset: The dataset for which the erp peaks are computed
    """
    chs = ['P7', 'PO7', 'P8', 'PO8']
    events = [('face', 'intact'), ('face', 'scrambled'), ('car', 'intact'), ('car', 'scrambled')]

    for ch in chs:
        df = pd.DataFrame(data={'subject_id': [], 'stimulus': [], 'condition': [], 'peak': []})
        for i in range(1, 41):
            subj = "0" + str(i)
            if len(str(i)) == 1:
                subj = "0" + subj
            # Load preprocessed .fif data files
            raw = load_preprocessed_data(subj, dataset)
            # Extract ERP peaks
            for ev in events:
                row = extract_erp_peak(raw, subj, ev[0], ev[1], ch)
                df = df.append(row, ignore_index=True)
        df.to_csv('cached_data/erp_peaks/erp_peaks_' + ch + '.csv')


def create_peak_difference_feature(df, max_subj=40):
    """
    Compute the difference of two N170 peaks for different conditions for all subjects.
    I.e. the difference of face(intact)-car(intact),face(scrambled)-car(scrambled),etc.
    :param max_subj: the maximum subject till which the features are computed.
    :param df: A pandas dataframe containing the peak information for all conditions and subjects
    :return: A pandas dataframe containing the peak-difference for multiple condition differences
    """
    peak_diff_df = pd.DataFrame(
        data={'subject_id': [], 'mean_face': [], 'mean_car': [], 'peak_diff_overall': [], 'diff_intact': [],
              'diff_scrambled': [], 'diff_face': [], 'diff_fc_ci': [], 'diff_fi_rest': []})

    for i in range(1, max_subj + 1):

        subj = "0" + str(i)
        if len(str(i)) == 1:
            subj = "0" + subj
        sub_df = df.loc[df['subject_id'] == i]
        # difference of face and car (intact)
        diff_intact = sub_df.loc[df['condition'] == 'intact']['peak'].diff().to_numpy()[1]
        # difference of face and car (scrambled)
        diff_scrambled = sub_df.loc[df['condition'] == 'scrambled']['peak'].diff().to_numpy()[1]
        # Difference of Face intact and Face scrambled
        diff_face = sub_df.loc[df['stimulus'] == 'face']['peak'].diff().to_numpy()[1]
        # Difference of Face scrambled and Car intact
        diff_fs_ci = sub_df.loc[(df['stimulus'] == 'face') & (df['condition'] == 'scrambled')]['peak'].values[0] - \
                     sub_df.loc[(df['stimulus'] == 'car') & (df['condition'] == 'intact')]['peak'].values[0]
        # Mean of face (intact) and face (scrambled)
        mean_face = sub_df.loc[df['stimulus'] == 'face']['peak'].mean()
        # Mean of car (intact) and car (scrambled)
        mean_car = sub_df.loc[df['stimulus'] == 'car']['peak'].mean()
        mean_rest = sub_df.loc[(df['stimulus'] == 'car') | ((df['stimulus'] == 'face') & (df['condition'] == 'scrambled'))]['peak'].mean()
        diff_fi_rest = sub_df.loc[df['stimulus'] == 'face']['peak'].values[0] - mean_rest
        # Difference of face (overall) and car (overall)
        diff = mean_face - mean_car
        peak_diff_df = peak_diff_df.append(
            {'subject_id': subj, 'mean_face': mean_face, 'mean_car': mean_car, 'peak_diff_overall': diff,
             'diff_intact': diff_intact, 'diff_scrambled': diff_scrambled, 'diff_face': diff_face,
             'diff_fc_ci': diff_fs_ci, 'diff_fi_rest': diff_fi_rest}, ignore_index=True)
    return peak_diff_df


def analyze_erp(channels, precompute=True):
    """
    Execute several statistical tests for different hypothesis, to analyse ERPs
    :param channels: The channels for which the tests are executed
    :param precompute: If true, the peak-difference data will be computed. Else it will be loaded from a precomputed file,
                        if it exists. This should only be set 'False' if the method was already executed once!
    """
    if precompute:
        # Precompute the erp peaks
        precompute_erp_df('N170')

    for c in channels:
        print("CHANNEL: " + c)
        # Load the erp peak data and create the features for the t-tests
        erp_df = pd.read_csv('cached_data/erp_peaks/erp_peaks_' + c + '.csv', index_col=0)
        feature_df = create_peak_difference_feature(erp_df)
        # 1. H_a : There is a difference between the N170 peak of recognizing faces and cars
        # Run one-sample ttest against 0 mean
        stat, p_val = ttest_1samp(feature_df['peak_diff_overall'].to_numpy(), 0)
        print("Peak Difference Faces-Car (All)")
        print("P-Value=" + str(p_val))
        # 2. H_a : There is a difference between the peak difference of intact faces&cars,
        # to the peak difference of scrambled faces&cars
        # Run ANOVA for two samples. 1. Diff of intact faces&cars, 2. Diff of scrambled faces&cars
        stat, p_val = f_oneway(feature_df['diff_intact'].to_numpy(), feature_df['diff_scrambled'].to_numpy())
        print("Difference of peak-differences face-car (intact) vs. face-car (scrambled)")
        print("P-Value=" + str(p_val))
        # # 3. H_a : There is a difference in the peak-difference of face-car (intact)
        stat, p_val = ttest_1samp(feature_df['diff_intact'].to_numpy(), 0)
        print("Peak Difference Faces-Car (Intact)")
        print("P-Value=" + str(p_val))
        # # 4. H_a : There is a difference in the peak-difference of face-car (scrambled)
        stat, p_val = ttest_1samp(feature_df['diff_scrambled'].to_numpy(), 0)
        print("Peak Difference Faces-Car (Scrambled)")
        print("P-Value=" + str(p_val))
        # # 5. H_a : There is a Difference between Face (scrambled) and Face (intact) in the peak difference
        stat, p_val = ttest_1samp(feature_df['diff_face'].to_numpy(), 0)
        print("Peak Difference Face intact and scrambled")
        print("P-Value=" + str(p_val))
        stat, p_val = ttest_1samp(feature_df['diff_fi_rest'].to_numpy(), 0)
        print("Peak Difference Face intact and Rest")
        print("P-Value=" + str(p_val))


if __name__ == '__main__':
    mne.set_log_level(verbose=VERBOSE_LEVEL)
    analyze_erp(['P7', 'PO7', 'P8', 'PO8'], True)