135 lines
6.9 KiB
Python
135 lines
6.9 KiB
Python
import mne
|
|
import pandas as pd
|
|
from scipy.stats import ttest_1samp, f_oneway
|
|
|
|
from utils.file_utils import load_preprocessed_data, get_epochs
|
|
|
|
VERBOSE_LEVEL = 'CRITICAL'
|
|
|
|
|
|
def extract_erp_peak(raw, subject, stimulus, condition, channel):
|
|
"""
|
|
Extracts the erp peak for a given subject, stimulus and condition as a single value.
|
|
|
|
:param raw: The raw object, from which the epochs are generated
|
|
:param subject: The subject for which the peak is extracted
|
|
:param stimulus: The stimulus we look at: Either 'car' or 'face'
|
|
:param condition: The condition of the stimulus: Either 'intact' or 'scrambled'
|
|
:param channel: The currently selected channel, for which the erp_peak should be extracted
|
|
:return: A dictionary conforming to the data frame format:
|
|
{'subject_id': subject, 'stimulus': stimulus, 'condition': condition, 'peak': peak}
|
|
"""
|
|
# Epoch the data
|
|
epochs, _ = get_epochs(raw, [(stimulus, condition)], picks=channel)
|
|
# Check only for negative peaks, as only the channels P7,P07,P8,P08 are used
|
|
ch, latency, peak = epochs.average().get_peak(tmin=0.13, tmax=0.2, mode='neg', return_amplitude=True)
|
|
return {'subject_id': subject, 'stimulus': stimulus, 'condition': condition, 'peak': peak}
|
|
|
|
|
|
def precompute_erp_df(dataset):
|
|
"""
|
|
This method generates a .csv file where the erp peaks for each stimulus-condition pair for each subject are saved
|
|
:param dataset: The dataset for which the erp peaks are computed
|
|
"""
|
|
chs = ['P7', 'PO7', 'P8', 'PO8']
|
|
events = [('face', 'intact'), ('face', 'scrambled'), ('car', 'intact'), ('car', 'scrambled')]
|
|
|
|
for ch in chs:
|
|
df = pd.DataFrame(data={'subject_id': [], 'stimulus': [], 'condition': [], 'peak': []})
|
|
for i in range(1, 41):
|
|
subj = "0" + str(i)
|
|
if len(str(i)) == 1:
|
|
subj = "0" + subj
|
|
# Load preprocessed .fif data files
|
|
raw = load_preprocessed_data(subj, dataset)
|
|
# Extract ERP peaks
|
|
for ev in events:
|
|
row = extract_erp_peak(raw, subj, ev[0], ev[1], ch)
|
|
df = df.append(row, ignore_index=True)
|
|
df.to_csv('cached_data/erp_peaks/erp_peaks_' + ch + '.csv')
|
|
|
|
|
|
def create_peak_difference_feature(df, max_subj=40):
|
|
"""
|
|
Compute the difference of two N170 peaks for different conditions for all subjects.
|
|
I.e. the difference of face(intact)-car(intact),face(scrambled)-car(scrambled),etc.
|
|
:param max_subj: the maximum subject till which the features are computed.
|
|
:param df: A pandas dataframe containing the peak information for all conditions and subjects
|
|
:return: A pandas dataframe containing the peak-difference for multiple condition differences
|
|
"""
|
|
peak_diff_df = pd.DataFrame(
|
|
data={'subject_id': [], 'mean_face': [], 'mean_car': [], 'peak_diff_overall': [], 'diff_intact': [],
|
|
'diff_scrambled': [], 'diff_face': [], 'diff_fc_ci': [], 'diff_fi_rest': []})
|
|
|
|
for i in range(1, max_subj + 1):
|
|
|
|
subj = "0" + str(i)
|
|
if len(str(i)) == 1:
|
|
subj = "0" + subj
|
|
sub_df = df.loc[df['subject_id'] == i]
|
|
# difference of face and car (intact)
|
|
diff_intact = sub_df.loc[df['condition'] == 'intact']['peak'].diff().to_numpy()[1]
|
|
# difference of face and car (scrambled)
|
|
diff_scrambled = sub_df.loc[df['condition'] == 'scrambled']['peak'].diff().to_numpy()[1]
|
|
# Difference of Face intact and Face scrambled
|
|
diff_face = sub_df.loc[df['stimulus'] == 'face']['peak'].diff().to_numpy()[1]
|
|
# Difference of Face scrambled and Car intact
|
|
diff_fs_ci = sub_df.loc[(df['stimulus'] == 'face') & (df['condition'] == 'scrambled')]['peak'].values[0] - \
|
|
sub_df.loc[(df['stimulus'] == 'car') & (df['condition'] == 'intact')]['peak'].values[0]
|
|
# Mean of face (intact) and face (scrambled)
|
|
mean_face = sub_df.loc[df['stimulus'] == 'face']['peak'].mean()
|
|
# Mean of car (intact) and car (scrambled)
|
|
mean_car = sub_df.loc[df['stimulus'] == 'car']['peak'].mean()
|
|
mean_rest = sub_df.loc[(df['stimulus'] == 'car') | ((df['stimulus'] == 'face') & (df['condition'] == 'scrambled'))]['peak'].mean()
|
|
diff_fi_rest = sub_df.loc[df['stimulus'] == 'face']['peak'].values[0] - mean_rest
|
|
# Difference of face (overall) and car (overall)
|
|
diff = mean_face - mean_car
|
|
peak_diff_df = peak_diff_df.append(
|
|
{'subject_id': subj, 'mean_face': mean_face, 'mean_car': mean_car, 'peak_diff_overall': diff,
|
|
'diff_intact': diff_intact, 'diff_scrambled': diff_scrambled, 'diff_face': diff_face,
|
|
'diff_fc_ci': diff_fs_ci, 'diff_fi_rest': diff_fi_rest}, ignore_index=True)
|
|
return peak_diff_df
|
|
|
|
|
|
def analyze_erp(channels):
|
|
"""
|
|
Execute several statistical tests for different hypothesis, to analyze ERPs
|
|
:param channels: The channels for which the tests are executed
|
|
"""
|
|
for c in channels:
|
|
print("CHANNEL: " + c)
|
|
erp_df = pd.read_csv('cached_data/erp_peaks/erp_peaks_' + c + '.csv', index_col=0)
|
|
feature_df = create_peak_difference_feature(erp_df)
|
|
# 1. H_a : There is a difference between the N170 peak of recognizing faces and cars
|
|
# Run one-sample ttest against 0 mean
|
|
stat, p_val = ttest_1samp(feature_df['peak_diff_overall'].to_numpy(), 0)
|
|
print("Peak Difference Faces-Car (All)")
|
|
print("P-Value=" + str(p_val))
|
|
# 2. H_a : There is a difference between the peak difference of intact faces&cars,
|
|
# to the peak difference of scrambled faces&cars
|
|
# Run ANOVA for two samples. 1. Diff of intact faces&cars, 2. Diff of scrambled faces&cars
|
|
stat, p_val = f_oneway(feature_df['diff_intact'].to_numpy(), feature_df['diff_scrambled'].to_numpy())
|
|
print("Difference of peak-differences face-car (intact) vs. face-car (scrambled)")
|
|
print("P-Value=" + str(p_val))
|
|
# # 3. H_a : There is a difference in the peak-difference of face-car (intact)
|
|
stat, p_val = ttest_1samp(feature_df['diff_intact'].to_numpy(), 0)
|
|
print("Peak Difference Faces-Car (Intact)")
|
|
print("P-Value=" + str(p_val))
|
|
# # 4. H_a : There is a difference in the peak-difference of face-car (scrambled)
|
|
stat, p_val = ttest_1samp(feature_df['diff_scrambled'].to_numpy(), 0)
|
|
print("Peak Difference Faces-Car (Scrambled)")
|
|
print("P-Value=" + str(p_val))
|
|
# # 5. H_a : There is a Difference between Face (scrambled) and Face (intact) in the peak difference
|
|
stat, p_val = ttest_1samp(feature_df['diff_face'].to_numpy(), 0)
|
|
print("Peak Difference Face intact and scrambled")
|
|
print("P-Value=" + str(p_val))
|
|
stat, p_val = ttest_1samp(feature_df['diff_fi_rest'].to_numpy(), 0)
|
|
print("Peak Difference Face intact and Rest")
|
|
print("P-Value=" + str(p_val))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
mne.set_log_level(verbose=VERBOSE_LEVEL)
|
|
# precompute_erp_df('N170')
|
|
analyze_erp(['P7', 'PO7', 'P8', 'PO8'])
|