Files
semesterproject_lecture_eeg/erp_analysis.py

141 lines
7.3 KiB
Python

import mne
import pandas as pd
from scipy.stats import ttest_1samp, f_oneway
from utils.file_utils import load_preprocessed_data, get_epochs
VERBOSE_LEVEL = 'CRITICAL'
def extract_erp_peak(raw, subject, stimulus, condition, channel):
"""
Extracts the erp peak for a given subject, stimulus and condition as a single value.
:param raw: The raw object, from which the epochs are generated
:param subject: The subject for which the peak is extracted
:param stimulus: The stimulus we look at: Either 'car' or 'face'
:param condition: The condition of the stimulus: Either 'intact' or 'scrambled'
:param channel: The currently selected channel, for which the erp_peak should be extracted
:return: A dictionary conforming to the data frame format:
{'subject_id': subject, 'stimulus': stimulus, 'condition': condition, 'peak': peak}
"""
# Epoch the data
epochs, _ = get_epochs(raw, [(stimulus, condition)], picks=channel)
# Check only for negative peaks, as only the channels P7,P07,P8,P08 are used
ch, latency, peak = epochs.average().get_peak(tmin=0.13, tmax=0.2, mode='neg', return_amplitude=True)
return {'subject_id': subject, 'stimulus': stimulus, 'condition': condition, 'peak': peak}
def precompute_erp_df(dataset):
"""
This method generates a .csv file where the erp peaks for each stimulus-condition pair for each subject are saved
:param dataset: The dataset for which the erp peaks are computed
"""
chs = ['P7', 'PO7', 'P8', 'PO8']
events = [('face', 'intact'), ('face', 'scrambled'), ('car', 'intact'), ('car', 'scrambled')]
for ch in chs:
df = pd.DataFrame(data={'subject_id': [], 'stimulus': [], 'condition': [], 'peak': []})
for i in range(1, 41):
subj = "0" + str(i)
if len(str(i)) == 1:
subj = "0" + subj
# Load preprocessed .fif data files
raw = load_preprocessed_data(subj, dataset)
# Extract ERP peaks
for ev in events:
row = extract_erp_peak(raw, subj, ev[0], ev[1], ch)
df = df.append(row, ignore_index=True)
df.to_csv('cached_data/erp_peaks/erp_peaks_' + ch + '.csv')
def create_peak_difference_feature(df, max_subj=40):
"""
Compute the difference of two N170 peaks for different conditions for all subjects.
I.e. the difference of face(intact)-car(intact),face(scrambled)-car(scrambled),etc.
:param max_subj: the maximum subject till which the features are computed.
:param df: A pandas dataframe containing the peak information for all conditions and subjects
:return: A pandas dataframe containing the peak-difference for multiple condition differences
"""
peak_diff_df = pd.DataFrame(
data={'subject_id': [], 'mean_face': [], 'mean_car': [], 'peak_diff_overall': [], 'diff_intact': [],
'diff_scrambled': [], 'diff_face': [], 'diff_fc_ci': [], 'diff_fi_rest': []})
for i in range(1, max_subj + 1):
subj = "0" + str(i)
if len(str(i)) == 1:
subj = "0" + subj
sub_df = df.loc[df['subject_id'] == i]
# difference of face and car (intact)
diff_intact = sub_df.loc[df['condition'] == 'intact']['peak'].diff().to_numpy()[1]
# difference of face and car (scrambled)
diff_scrambled = sub_df.loc[df['condition'] == 'scrambled']['peak'].diff().to_numpy()[1]
# Difference of Face intact and Face scrambled
diff_face = sub_df.loc[df['stimulus'] == 'face']['peak'].diff().to_numpy()[1]
# Difference of Face scrambled and Car intact
diff_fs_ci = sub_df.loc[(df['stimulus'] == 'face') & (df['condition'] == 'scrambled')]['peak'].values[0] - \
sub_df.loc[(df['stimulus'] == 'car') & (df['condition'] == 'intact')]['peak'].values[0]
# Mean of face (intact) and face (scrambled)
mean_face = sub_df.loc[df['stimulus'] == 'face']['peak'].mean()
# Mean of car (intact) and car (scrambled)
mean_car = sub_df.loc[df['stimulus'] == 'car']['peak'].mean()
mean_rest = sub_df.loc[(df['stimulus'] == 'car') | ((df['stimulus'] == 'face') & (df['condition'] == 'scrambled'))]['peak'].mean()
diff_fi_rest = sub_df.loc[df['stimulus'] == 'face']['peak'].values[0] - mean_rest
# Difference of face (overall) and car (overall)
diff = mean_face - mean_car
peak_diff_df = peak_diff_df.append(
{'subject_id': subj, 'mean_face': mean_face, 'mean_car': mean_car, 'peak_diff_overall': diff,
'diff_intact': diff_intact, 'diff_scrambled': diff_scrambled, 'diff_face': diff_face,
'diff_fc_ci': diff_fs_ci, 'diff_fi_rest': diff_fi_rest}, ignore_index=True)
return peak_diff_df
def analyze_erp(channels, precompute=True):
"""
Execute several statistical tests for different hypothesis, to analyse ERPs
:param channels: The channels for which the tests are executed
:param precompute: If true, the peak-difference data will be computed. Else it will be loaded from a precomputed file,
if it exists. This should only be set 'False' if the method was already executed once!
"""
if precompute:
# Precompute the erp peaks
precompute_erp_df('N170')
for c in channels:
print("CHANNEL: " + c)
# Load the erp peak data and create the features for the t-tests
erp_df = pd.read_csv('cached_data/erp_peaks/erp_peaks_' + c + '.csv', index_col=0)
feature_df = create_peak_difference_feature(erp_df)
# 1. H_a : There is a difference between the N170 peak of recognizing faces and cars
# Run one-sample ttest against 0 mean
stat, p_val = ttest_1samp(feature_df['peak_diff_overall'].to_numpy(), 0)
print("Peak Difference Faces-Car (All)")
print("P-Value=" + str(p_val))
# 2. H_a : There is a difference between the peak difference of intact faces&cars,
# to the peak difference of scrambled faces&cars
# Run ANOVA for two samples. 1. Diff of intact faces&cars, 2. Diff of scrambled faces&cars
stat, p_val = f_oneway(feature_df['diff_intact'].to_numpy(), feature_df['diff_scrambled'].to_numpy())
print("Difference of peak-differences face-car (intact) vs. face-car (scrambled)")
print("P-Value=" + str(p_val))
# # 3. H_a : There is a difference in the peak-difference of face-car (intact)
stat, p_val = ttest_1samp(feature_df['diff_intact'].to_numpy(), 0)
print("Peak Difference Faces-Car (Intact)")
print("P-Value=" + str(p_val))
# # 4. H_a : There is a difference in the peak-difference of face-car (scrambled)
stat, p_val = ttest_1samp(feature_df['diff_scrambled'].to_numpy(), 0)
print("Peak Difference Faces-Car (Scrambled)")
print("P-Value=" + str(p_val))
# # 5. H_a : There is a Difference between Face (scrambled) and Face (intact) in the peak difference
stat, p_val = ttest_1samp(feature_df['diff_face'].to_numpy(), 0)
print("Peak Difference Face intact and scrambled")
print("P-Value=" + str(p_val))
stat, p_val = ttest_1samp(feature_df['diff_fi_rest'].to_numpy(), 0)
print("Peak Difference Face intact and Rest")
print("P-Value=" + str(p_val))
if __name__ == '__main__':
mne.set_log_level(verbose=VERBOSE_LEVEL)
analyze_erp(['P7', 'PO7', 'P8', 'PO8'], True)