Initial commit

2021-03-27 14:58:49 +01:00
commit fc4eec6ac7
20 changed files with 1613 additions and 0 deletions
--- a/decoding_tf_analysis.py
+++ b/decoding_tf_analysis.py
@@ -0,0 +1,239 @@
 import math
 import mne
 import numpy as np
 import matplotlib.pyplot as plt
 from mne.decoding import SlidingEstimator, cross_val_multiscore, Vectorizer, Scaler
 from mne.time_frequency import tfr_morlet
 from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import make_pipeline
 from utils.file_utils import load_preprocessed_data, get_epochs
 from utils.plot_utils import plot_tf_cluster, plot_oscillation_bands
 VERBOSE_LEVEL = 'CRITICAL'
 def events_to_labels(evts, events_dict, mask=None):  # TODO Test schreiben
    """
    Converts the event labels of epochs to class labels for classification
    :param evts: the event labels to be converted
    :param events_dict: a dictionary of event keys
    :param mask: an optional label mask with 4-entries, where:
        1. entry: 'face intact', 2. entry: 'car intact', 3. entry: 'face scrambled', 4. entry: 'face scrambled'
        If None the entries are [0,1,0,1] i.e. all faces are in class 0 and all cars are in class 1
    :return: The list of class labels
    """
    events = evts.copy()
    if mask is None:
        mask = [0, 1, 0, 1]
    for i in range(len(events)):
        key = list(events_dict.keys())[list(events_dict.values()).index(events[i])]
        k = int(key.split(':')[1])
        if k < 41:
            events[i] = mask[0]  # Face intact
        elif 40 < k < 81:
            events[i] = mask[1]  # Car intact
        elif 100 < k < 141:
            events[i] = mask[2]  # Face scrambled
        elif 140 < k < 181:
            events[i] = mask[3]  # Car scrambled
    return events
 def permutation_test(baseline, score, n_iter):
    """
    An implementation of a permutation test for classification scores.
    :param baseline: The classification scores of the baseline, i.e. selection by chance
    :param score: The classification scores which are tested for significance
    :param n_iter: number of permutations
    :return: p-value
    """
    all_data = np.concatenate((baseline, score))
    # Base statistic. The statistic used here is the difference of means
    given_diff = np.mean(score) - np.mean(baseline)
    all_diffs = [given_diff]
    # Permutation iterations
    for i in range(n_iter):
        # Create a permutation of indices and then use indices from index 0 to len(baseline) to get data for baseline.
        # Analogously for scores
        perm_indices = np.random.permutation(list(range(len(all_data))))
        mean_diff = np.mean(all_data[perm_indices[len(baseline):]]) - np.mean(all_data[perm_indices[:len(baseline)]])
        all_diffs.append(mean_diff)
    p_val = len(np.where(np.asarray(all_diffs) >= given_diff)[0]) / (n_iter + 1)
    return p_val
 def decoding(dataset, filename, compute_metric=True, mask=None):
    """
    Runs decoding over time for all subjects
    :param dataset: The dataset for which the decoding is done
    :param filename: filename of either the file from which the classifier scores will be loaded
        or to which they will be saved
    :param compute_metric: If True the classifier will be run, else the result will be loaded from a precomputed file
    :param mask: an optional label mask with 4-entries, where:
        1. entry: 'face intact', 2. entry: 'car intact', 3. entry: 'face scrambled', 4. entry: 'face scrambled'
        If None the entries are [0,1,0,1] i.e. all faces are in class 0 and all cars are in class 1
    """
    if mask is None:
        mask = [0, 1, 0, 1]
    times = None
    time_scale = 1100
    metric = []
    p_values = []
    if compute_metric:
        # Computes classifier scores for all subjects
        for i in range(1, 41):
            subj = "0" + str(i)
            if len(str(i)) == 1:
                subj = "0" + subj
            # Load data
            raw = load_preprocessed_data(subj, dataset)
            epochs, events_dict = get_epochs(raw, picks=mne.pick_types(raw.info, eeg=True, eog=False))
            data = epochs.get_data()
            labels = events_to_labels(epochs.events[:, 2], events_dict, mask)
            # Classify
            clf = make_pipeline(Scaler(epochs.info), Vectorizer(), LogisticRegression(solver='lbfgs'))
            time_decode = SlidingEstimator(clf)
            scores = cross_val_multiscore(time_decode, data, labels, cv=10, n_jobs=4)
            metric.append(np.mean(scores, axis=0))
            if times is None:
                times = epochs.times
        np.save('cached_data/decoding_data/' + filename, metric)
    else:
        # Dummy time which is created according to epoch.times
        times = np.linspace(-0.09960938, 1, 1127)
        metric = np.load('cached_data/decoding_data/' + filename + '.npy')
    # Compute index of time point 0
    index = math.floor((len(metric[0]) / time_scale) * 100)
    baseline = np.array(metric[:index]).flatten()
    plt.plot(np.linspace(-200, 1000, 1127), np.mean(metric, axis=0))
    plt.ylabel('Accuracy (%)')
    plt.xlabel('Time (ms)')
    plt.title('Mean Accuracy over Subjects for Faces vs. Cars')
    plt.show()
    # Compute the permutation tests
    for t in range(len(metric[0][index:])):
        score_t = np.asarray(metric[:, t + index])
        p = permutation_test(baseline, score_t, 100)
        p_values.append(p)
        if t % 50 == 0:
            print(str(t) + " Out of " + str(len(metric[0][index:])))
    plt.plot(times[index:], p_values)
    plt.ylabel('P-Value')
    plt.xlabel('Time (ms)')
    plt.title('P-Values for Faces vs. Cars')
    plt.show()
 def create_tfr(raw, condition, freqs, n_cycles, response='induced', baseline=None):
    """
    Compute the time frequency representation (TFR) of data for a given condition via morlet wavelets
    :param raw: the data
    :param condition: the condition for which to compute the TFR. Given as a list of tuples of the form (stimulus, condition) # TODO ambiguous use of condition
    :param freqs: the frequencies for which to compute the TFR
    :param n_cycles: the number of cycles used by the morlet wavelets
    :param response: type of expected TFR. Can be total, induced or evoked. Default is induced
    :param baseline: baseline used to correct the power. A tuple of the form (start, end).
        Default is None and no baseline correction will be applid
    :return: The TFR or the given data for a given condition. Has type AverageTFR
    """
    epochs, _ = get_epochs(raw, condition, tmin=-0.2, tmax=1)
    print('    ' + str(condition))
    if response == 'total':
        print('    Power Total')
        power = tfr_morlet(epochs, freqs=freqs, n_cycles=n_cycles, return_itc=False, n_jobs=4)
    elif response == 'induced':
        print('    Power Induced')
        power = tfr_morlet(epochs.subtract_evoked(), freqs=freqs, n_cycles=n_cycles, return_itc=False, n_jobs=4)
    else:
        print('    Power Evoked')
        power_total = tfr_morlet(epochs, freqs=freqs, n_cycles=n_cycles, return_itc=False, n_jobs=4)
        power_induced = tfr_morlet(epochs.subtract_evoked(), freqs=freqs, n_cycles=n_cycles, return_itc=False, n_jobs=4)
        power = mne.combine_evoked([power_total, power_induced], weights=[1, -1])
    # power.plot(picks='P7')
    power.apply_baseline(mode='ratio', baseline=baseline)
    # plot_oscillation_bands(power)
    # power.plot(picks='P7')
    return power
 def time_frequency(dataset, filename, compute_tfr=True):
    """
    Runs time frequency analysis
    :param dataset: The dataset for which the decoding is done
    :param filename: Filename of either the file from which the TFRs will be loaded
        or to which they will be saved
    :param compute_tfr: If True the TFRs will be created, else the TFRs will be loaded from a precomputed file
    """
    # Parameters
    # Frequency space (from, to, steps) -> Control frequency resolution : Between num=50-80 good for 1-50Hz
    freqs = np.logspace(*np.log10([0.5, 50]), num=50)  #
    # Number of cycles -> Controls time resolution ? At ~freqs/2 good for high frequency resolution
    n_cycles = freqs / 2  # 1 for high time resolution & freq smoothing, freqs/2 for high freq resolution & time smooth
    # Baseline -> Should not go post-stimulus, i.e. > 0 -> Best ist pre-stimulus (e.g. -400 to -200ms)
    baseline = [-0.5, 0]
    cond1 = []
    cond2 = []
    times = None
    if compute_tfr:
        for i in range(1, 41):
            subj = "0" + str(i)
            if len(str(i)) == 1:
                subj = "0" + subj
            print("########## SUBJECT " + subj + " ##########")
            # Load data
            raw = load_preprocessed_data(subj, dataset)
            raw.set_channel_types({'HEOG_left': 'eog', 'HEOG_right': 'eog', 'VEOG_lower': 'eog'})
            raw.set_montage('standard_1020', match_case=False)
            # Create the two conditions we want to compare
            power_cond1 = create_tfr(raw, [('face', 'intact')], freqs, n_cycles, 'induced', (-0.2, 0))
            print('    CONDITION 1 LOADED')
            cond1.append(power_cond1)
            power_cond2 = create_tfr(raw, [('face', 'scrambled'), ('car', None)], freqs, n_cycles, 'induced',
                                     (-0.2, 0))
            print('    CONDITION 2 LOADED')
            cond2.append(power_cond2)
            print('    DONE')
        np.save('cached_data/tf_data/' + filename + '_cond1', cond1)
        np.save('cached_data/tf_data/' + filename + '_cond2', cond2)
    else:
        cond1 = np.load('cached_data/tf_data/' + filename + '_cond1.npy', allow_pickle=True).tolist()
        cond2 = np.load('cached_data/tf_data/' + filename + '_cond2.npy', allow_pickle=True).tolist()
    if times is None:
        times = cond1[0].times
    mne.grand_average(cond2).plot(picks=['P7'], vmin=-3, vmax=3, title='Grand Average P7')
    plot_oscillation_bands(mne.grand_average(cond1))
    plot_oscillation_bands(mne.grand_average(cond2))
    F, clusters, cluster_p_values, h0 = mne.stats.permutation_cluster_test(
        [mne.grand_average(cond1).data, mne.grand_average(cond2).data], n_jobs=4, verbose='INFO',
        seed=123)
    plot_tf_cluster(F, clusters, cluster_p_values, freqs, times)
 if __name__ == '__main__':
    mne.set_log_level(verbose=VERBOSE_LEVEL)
    ds = 'N170'
    # decoding(ds, 'faces_vs_cars_100iters', False)
    time_frequency(ds, 'face_intact_vs_all_0.1_50hz_ncf2', False)
--- a/erp_analysis.py
+++ b/erp_analysis.py
@@ -0,0 +1,134 @@
 import mne
 import pandas as pd
 from scipy.stats import ttest_1samp, f_oneway
 from utils.file_utils import load_preprocessed_data, get_epochs
 VERBOSE_LEVEL = 'CRITICAL'
 def extract_erp_peak(raw, subject, stimulus, condition, channel):
    """
    Extracts the erp peak for a given subject, stimulus and condition as a single value.
    :param raw: The raw object, from which the epochs are generated
    :param subject: The subject for which the peak is extracted
    :param stimulus: The stimulus we look at: Either 'car' or 'face'
    :param condition: The condition of the stimulus: Either 'intact' or 'scrambled'
    :param channel: The currently selected channel, for which the erp_peak should be extracted
    :return: A dictionary conforming to the data frame format:
             {'subject_id': subject, 'stimulus': stimulus, 'condition': condition, 'peak': peak}
    """
    # Epoch the data
    epochs, _ = get_epochs(raw, [(stimulus, condition)], picks=channel)
    # Check only for negative peaks, as only the channels P7,P07,P8,P08 are used
    ch, latency, peak = epochs.average().get_peak(tmin=0.13, tmax=0.2, mode='neg', return_amplitude=True)
    return {'subject_id': subject, 'stimulus': stimulus, 'condition': condition, 'peak': peak}
 def precompute_erp_df(dataset):
    """
    This method generates a .csv file where the erp peaks for each stimulus-condition pair for each subject are saved
    :param dataset: The dataset for which the erp peaks are computed
    """
    chs = ['P7', 'PO7', 'P8', 'PO8']
    events = [('face', 'intact'), ('face', 'scrambled'), ('car', 'intact'), ('car', 'scrambled')]
    for ch in chs:
        df = pd.DataFrame(data={'subject_id': [], 'stimulus': [], 'condition': [], 'peak': []})
        for i in range(1, 41):
            subj = "0" + str(i)
            if len(str(i)) == 1:
                subj = "0" + subj
            # Load preprocessed .fif data files
            raw = load_preprocessed_data(subj, dataset)
            # Extract ERP peaks
            for ev in events:
                row = extract_erp_peak(raw, subj, ev[0], ev[1], ch)
                df = df.append(row, ignore_index=True)
        df.to_csv('cached_data/erp_peaks/erp_peaks_' + ch + '.csv')
 def create_peak_difference_feature(df, max_subj=40):
    """
    Compute the difference of two N170 peaks for different conditions for all subjects.
    I.e. the difference of face(intact)-car(intact),face(scrambled)-car(scrambled),etc.
    :param max_subj: the maximum subject till which the features are computed.
    :param df: A pandas dataframe containing the peak information for all conditions and subjects
    :return: A pandas dataframe containing the peak-difference for multiple condition differences
    """
    peak_diff_df = pd.DataFrame(
        data={'subject_id': [], 'mean_face': [], 'mean_car': [], 'peak_diff_overall': [], 'diff_intact': [],
              'diff_scrambled': [], 'diff_face': [], 'diff_fc_ci': [], 'diff_fi_rest': []})
    for i in range(1, max_subj + 1):
        subj = "0" + str(i)
        if len(str(i)) == 1:
            subj = "0" + subj
        sub_df = df.loc[df['subject_id'] == i]
        # difference of face and car (intact)
        diff_intact = sub_df.loc[df['condition'] == 'intact']['peak'].diff().to_numpy()[1]
        # difference of face and car (scrambled)
        diff_scrambled = sub_df.loc[df['condition'] == 'scrambled']['peak'].diff().to_numpy()[1]
        # Difference of Face intact and Face scrambled
        diff_face = sub_df.loc[df['stimulus'] == 'face']['peak'].diff().to_numpy()[1]
        # Difference of Face scrambled and Car intact
        diff_fs_ci = sub_df.loc[(df['stimulus'] == 'face') & (df['condition'] == 'scrambled')]['peak'].values[0] - \
                     sub_df.loc[(df['stimulus'] == 'car') & (df['condition'] == 'intact')]['peak'].values[0]
        # Mean of face (intact) and face (scrambled)
        mean_face = sub_df.loc[df['stimulus'] == 'face']['peak'].mean()
        # Mean of car (intact) and car (scrambled)
        mean_car = sub_df.loc[df['stimulus'] == 'car']['peak'].mean()
        mean_rest = sub_df.loc[(df['stimulus'] == 'car') | ((df['stimulus'] == 'face') & (df['condition'] == 'scrambled'))]['peak'].mean()
        diff_fi_rest = sub_df.loc[df['stimulus'] == 'face']['peak'].values[0] - mean_rest
        # Difference of face (overall) and car (overall)
        diff = mean_face - mean_car
        peak_diff_df = peak_diff_df.append(
            {'subject_id': subj, 'mean_face': mean_face, 'mean_car': mean_car, 'peak_diff_overall': diff,
             'diff_intact': diff_intact, 'diff_scrambled': diff_scrambled, 'diff_face': diff_face,
             'diff_fc_ci': diff_fs_ci, 'diff_fi_rest': diff_fi_rest}, ignore_index=True)
    return peak_diff_df
 def analyze_erp(channels):
    """
    Execute several statistical tests for different hypothesis, to analyze ERPs
    :param channels: The channels for which the tests are executed
    """
    for c in channels:
        print("CHANNEL: " + c)
        erp_df = pd.read_csv('cached_data/erp_peaks/erp_peaks_' + c + '.csv', index_col=0)
        feature_df = create_peak_difference_feature(erp_df)
        # 1. H_a : There is a difference between the N170 peak of recognizing faces and cars
        # Run one-sample ttest against 0 mean
        stat, p_val = ttest_1samp(feature_df['peak_diff_overall'].to_numpy(), 0)
        print("Peak Difference Faces-Car (All)")
        print("P-Value=" + str(p_val))
        # 2. H_a : There is a difference between the peak difference of intact faces&cars,
        # to the peak difference of scrambled faces&cars
        # Run ANOVA for two samples. 1. Diff of intact faces&cars, 2. Diff of scrambled faces&cars
        stat, p_val = f_oneway(feature_df['diff_intact'].to_numpy(), feature_df['diff_scrambled'].to_numpy())
        print("Difference of peak-differences face-car (intact) vs. face-car (scrambled)")
        print("P-Value=" + str(p_val))
        # # 3. H_a : There is a difference in the peak-difference of face-car (intact)
        stat, p_val = ttest_1samp(feature_df['diff_intact'].to_numpy(), 0)
        print("Peak Difference Faces-Car (Intact)")
        print("P-Value=" + str(p_val))
        # # 4. H_a : There is a difference in the peak-difference of face-car (scrambled)
        stat, p_val = ttest_1samp(feature_df['diff_scrambled'].to_numpy(), 0)
        print("Peak Difference Faces-Car (Scrambled)")
        print("P-Value=" + str(p_val))
        # # 5. H_a : There is a Difference between Face (scrambled) and Face (intact) in the peak difference
        stat, p_val = ttest_1samp(feature_df['diff_face'].to_numpy(), 0)
        print("Peak Difference Face intact and scrambled")
        print("P-Value=" + str(p_val))
        stat, p_val = ttest_1samp(feature_df['diff_fi_rest'].to_numpy(), 0)
        print("Peak Difference Face intact and Rest")
        print("P-Value=" + str(p_val))
 if __name__ == '__main__':
    mne.set_log_level(verbose=VERBOSE_LEVEL)
    # precompute_erp_df('N170')
    analyze_erp(['P7', 'PO7', 'P8', 'PO8'])
--- a/plotting.py
+++ b/plotting.py
@@ -0,0 +1,110 @@
 import mne
 from mne.preprocessing import create_eog_epochs
 from mne_bids import BIDSPath, read_raw_bids
 from utils.ccs_eeg_utils import read_annotations_core
 from utils.file_utils import get_epochs
 def load_unprocessed_subject(subject, dataset):
    """
    Load the eeg data of a subject
    :param subject: The subject of which the data will be loaded
    :param dataset: The dataset which will be loaded
    :return: the subject data
    """
    bids_path = BIDSPath(subject=subject, task=dataset, session=dataset, datatype='eeg', suffix='eeg',
                         root='Dataset\\' + dataset)
    raw = read_raw_bids(bids_path)
    # Add annotations
    read_annotations_core(bids_path, raw)
    return raw
 def filter_data(raw):
    """
    Filter the data of a single subject with a bandpass filter.
    The lower bound ist 0.5Hz to compensate the slow drifts.
    The upper bound is 50Hz to compensate the high frequencies, including the power line spike at 60Hz
    :param raw: The data to be filtered
    :return: The filtered data
    """
    raw.filter(0.5, 48, fir_design='firwin')
    return raw
 def plot_filter_data():
    ds = 'N170'
    for subj in ['014']:
        data = load_unprocessed_subject(subj, ds)
        data.load_data()
        # data.plot(n_channels=len(data.ch_names), block=True, scalings=40e-6)
        filter_data(data)
        fig = mne.viz.plot_raw_psd(data, fmax=80, average=True, show=False)
        fig.savefig("plots/frequency_filtered_subj_" + subj + "_48Hz.png")
        # data.plot(n_channels=len(data.ch_names), block=True, scalings=40e-6)
 def plot_filter_data_epoched(subj):
    ds = 'N170'
    data = load_unprocessed_subject(subj, ds)
    data.load_data()
    filter_data(data)
    get_epochs(data)[0].average().plot()
 def plot_cleaning():
    ds = 'N170'
    for subj in ['014']:
        data = load_unprocessed_subject(subj, ds)
        data.load_data()
        filter_data(data)
        folder = "Dataset\\" + ds + "\\sub-" + subj + "\\ses-" + ds + "\\eeg\\"
        filepath = folder + "sub-" + subj + "_task-" + ds
        print(filepath)
        ann = mne.read_annotations(filepath + "_" + "badannotations.csv")
        data.annotations.append(ann.onset, ann.duration, ann.description)
        data.plot(n_channels=len(data.ch_names), block=True, scalings=40e-6)
 def plot_ica():
    ds = 'N170'
    for subj in ['014']:
        data = load_unprocessed_subject(subj, ds)
        folder = "Dataset\\" + ds + "\\sub-" + subj + "\\ses-" + ds + "\\eeg\\"
        filepath = folder + "sub-" + subj + "_task-" + ds
        ann = mne.read_annotations(filepath + "_" + "badannotations.csv")
        data.annotations.append(ann.onset, ann.duration, ann.description)
        data.load_data()
        data.set_channel_types({'HEOG_left': 'eog', 'HEOG_right': 'eog', 'VEOG_lower': 'eog'})
        data.set_montage('standard_1020', match_case=False)
        ica_raw = data.copy()
        ica_raw.filter(l_freq=1, h_freq=None)
        # Then run ICA
        ica = mne.preprocessing.ICA(method="fastica",
                                    random_state=123)  # Use a random state for reproducable results #TODO Old Random state 123 or new one?
        ica.fit(ica_raw, verbose=True)
        ica_raw.load_data()
        # ica.plot_components(inst=ica_raw, ch_type='eeg', contours=0, topomap_args={'extrapolate': 'head'},
        #                    psd_args={'fmin': 0, 'fmax': 80})
        ica.plot_sources(ica_raw)
        ica.plot_properties(inst=ica_raw, dB=False, topomap_args={'extrapolate': 'head', 'contours': 0},
                            psd_args={'fmin': 3, 'fmax': 50}, picks=['eeg'])
 def plot_joint_eog_plots():
    ds = 'N170'
    for subj in ['014']:
        data = load_unprocessed_subject(subj, ds)
        data.load_data()
        data.set_channel_types({'HEOG_left': 'eog', 'HEOG_right': 'eog', 'VEOG_lower': 'eog'})
        data.set_montage('standard_1020', match_case=False)
        eog_evoked = create_eog_epochs(data).average()
        eog_evoked.apply_baseline(baseline=(None, -0.2))
        eog_evoked.plot_joint()
 # plot_ica()
 # plot_joint_eog_plots()
 plot_filter_data_epoched('003')
--- a/preprocessing_and_cleaning.py
+++ b/preprocessing_and_cleaning.py
@@ -0,0 +1,182 @@
 import os
 import mne
 from mne_bids import (BIDSPath, read_raw_bids)
 from utils.ccs_eeg_semesterproject import load_precomputed_badData, load_precomputed_ica
 from utils.ccs_eeg_utils import read_annotations_core
 def load_subject(subject, dataset):
    """
    Load the eeg data of a subject
    :param subject: The subject of which the data will be loaded
    :param dataset: The dataset which will be loaded
    :return: the subject data
    """
    bids_path = BIDSPath(subject=subject, task=dataset, session=dataset, datatype='eeg', suffix='eeg',
                         root='Dataset\\' + dataset)
    raw = read_raw_bids(bids_path)
    # Add annotations
    read_annotations_core(bids_path, raw)
    return raw
 def load_given_preprocessing_data(subject, dataset):
    """
    Loads given pre-processing information for a given subject.
    This is used for all subjects which were not manually preprocessed
    :param subject: The subject to load the data for
    :param dataset: The dataset currently viewed
    :return: The bad annotations, bad channels, ica object, bad ICs
    """
    anno, bc = load_precomputed_badData("Dataset\\" + dataset + "\\", subject,
                                        dataset)  # Loads annotations and bad channels
    ica, bad_comp = load_precomputed_ica("Dataset\\" + dataset + "\\", subject,
                                         dataset)  # Loads ica and bad components
    return anno, bc, ica, bad_comp
 def save_subject(raw, subject, dataset):
    """
    Save a raw object to a .fif file
    :param raw: the raw object to be saved
    :param subject: the subject, which the raw object belongs to
    :param dataset: the dataset currently viewed
    """
    folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
    filepath = folder + "sub-" + subject + "_task-" + dataset
    raw.save(filepath + "_cleaned.fif", overwrite=True)
 def filter_data(raw):
    """
    Filter the data of a single subject with a bandpass filter.
    The lower bound ist 0.5Hz to compensate the slow drifts.
    The upper bound is 50Hz to compensate the high frequencies, including the power line spike at 60Hz
    :param raw: The data to be filtered
    :return: The filtered data
    """
    raw.filter(0.5, 48, fir_design='firwin')
    return raw
 def clean_data(raw, subject, dataset, cleaned=False):
    """
    Clean the data of a single subject, meaning finding the bad segments and channels of a subject.
    If these were already found, they are loaded onto the data
    :param raw: the subject data
    :param subject: the subject which data will be viewed
    :param cleaned: If True the data was already viewed and the 'BAD_' annotations as well as the bad channels will be loaded
    :return: the bad channels
    """
    channels = None
    folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
    filepath = folder + "sub-" + subject + "_task-" + dataset
    # If nothing was marked yet, plot the data to mark bad segments
    if not cleaned:
        raw.plot(n_channels=len(raw.ch_names), block=True, scalings=40e-6)
        # Get indices of bad annotations
        bad_idx = [idx for (idx, annot) in enumerate(raw.annotations) if annot['description'] == "BAD_"]
        # If bad intervals were found save
        if bad_idx:
            raw.annotations[bad_idx].save(filepath + "_badannotations.csv")
    if os.path.isfile(filepath + "_badannotations.csv"):
        annotations = mne.read_annotations(filepath + "_badannotations.csv")
        raw.annotations.append(annotations.onset, annotations.duration, annotations.description)
    # Set the bad channels for each subject
    if subject == '001':
        channels = ['F8']  # Maybe also FP2?
    elif subject == '003':
        channels = []
    elif subject == '014':
        channels = []
    return channels
 def run_ica(raw, dataset, subject, search='manual'):
    """
    Runs Independent Component Analysis. Depending on the 'search' mode, it is either used to find bad ICs or to exclude
    bad ICs
    :param raw: the data to be preprocessed
    :param dataset: the dataset currently viewed
    :param subject: the subject currently viewed
    :param search: default value 'manual': The user views different plots for all ICs found
                   'eog' : Uses the eog channels to find bad ICs
                   'done' : Applies the bad ICs that were found
    """
    # First filter the data to remove slow drifts - this is done with 1Hz, as proposed by the mne Tutorial at:
    # https://mne.tools/dev/auto_tutorials/preprocessing/plot_40_artifact_correction_ica.html#filtering-to-remove-slow-drifts
    ica_raw = raw.copy()
    ica_raw.filter(l_freq=1, h_freq=None)
    # Then run ICA
    ica = mne.preprocessing.ICA(method="fastica", random_state=123)  # Use a random state for reproducable results
    ica.fit(ica_raw, verbose=True)
    if search == 'manual':
        ica_raw.load_data()
        # ica.plot_components(inst=ica_raw, ch_type='eeg', contours=0, topomap_args={'extrapolate': 'head'},
        #                    psd_args={'fmin': 0, 'fmax': 80})
        ica.plot_properties(inst=ica_raw, dB=False, topomap_args={'extrapolate': 'head', 'contours': 0},
                            psd_args={'fmin': 0, 'fmax': 50}, picks=['eeg'])
        ica.plot_sources(ica_raw)
    elif search == 'eog':
        eog_indices, _ = ica.find_bads_eog(raw)
        ica.exclude = eog_indices
        print('BAD COMPONENTS VIA EOG: ' + str(eog_indices))
        ica.plot_overlay(ica_raw, exclude=eog_indices)
    elif search == 'done':
        exclude = None
        if subj == '001':
            exclude = [0, 1, 2, 4, 8, 14, 16, 25]  # Through eog: 0,1
        elif subj == '003':
            exclude = [0, 2]  # Through eog: 0, 2
        elif subj == '014':
            exclude = [0, 1, 9]  # Through eog: 0,1
        # ica.plot_overlay(ica_raw, exclude=exclude)  # Plot differences through exclude
        # ica.exclude = exclude
        # Apply ica to the raw object
        raw.load_data()
        # ica.plot_overlay(ica_raw, exclude=exclude)
        raw = ica.apply(raw, exclude=exclude)
        # Lastly save the ica to a file
        folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
        filepath = folder + "sub-" + subject + "_task-" + dataset
        ica.save(filepath + "-ica.fif")
        return raw
 if __name__ == '__main__':
    ds = 'N170'
    for i in range(1, 41):
        subj = "0" + str(i)
        if len(str(i)) == 1:
            subj = "0" + subj
        data = load_subject(subj, ds)
        # Load data into memory
        data.load_data()
        # Filter data with a bandpass filter
        filter_data(data)
        if subj in ["001", "003", "014"]:
            # Manual preprocessing
            # Clean the data
            b_ch = clean_data(data, subj, ds, True)
            # Run ICA
            data.set_channel_types({'HEOG_left': 'eog', 'HEOG_right': 'eog', 'VEOG_lower': 'eog'})
            data.set_montage('standard_1020', match_case=False)
            data = run_ica(data, ds, subj, 'done')
        else:
            # Provided cleaning and preprocessing information
            ann, b_ch, ica_pre, bad_component = load_given_preprocessing_data(subj, ds)
            data.annotations.append(ann.onset, ann.duration, ann.description)
            data = ica_pre.apply(data, exclude=bad_component)
        # Interpolate bad channels
        data.interpolate_bads(b_ch)
        # Re-Reference the data
        data_re = data.copy().set_eeg_reference('average')
        # Save preprocessed and cleaned data set
        save_subject(data_re, subj, ds)
--- a/test/test.py
+++ b/test/test.py
@@ -0,0 +1,86 @@
 import unittest
 import mne
 import pandas as pd
 from decoding_tf_analysis import events_to_labels, permutation_test
 from erp_analysis import create_peak_difference_feature
 from utils.file_utils import get_keys_for_events, get_epochs
 from pandas.testing import assert_frame_equal
 class TestFileUtils(unittest.TestCase):
    def setUp(self):
        # Load true values for keys
        with open('test_files/face.txt') as f:
            face = f.readlines()
        self.face = [x.strip() for x in face]
        with open('test_files/face_intact.txt') as f:
            face_intact = f.readlines()
        self.face_intact = [x.strip() for x in face_intact]
        with open('test_files/face_scrambled.txt') as f:
            face_scrambled = f.readlines()
        self.face_scrambled = [x.strip() for x in face_scrambled]
        # Load true epochs
        self.raw = mne.io.read_raw_fif("..\\Dataset\\n170\\sub-001\\ses-n170\\eeg\\sub-001_task-n170_cleaned.fif")
        wanted_keys = get_keys_for_events("face", "intact")
        events, events_dict = mne.events_from_annotations(self.raw)
        events_dict_key = dict((k, events_dict[k]) for k in wanted_keys if k in events_dict)
        self.given = mne.Epochs(self.raw, events, events_dict_key, tmin=-0.2, tmax=0.5, reject_by_annotation=False,
                                picks='P7')
        self.given.drop_bad()
        self.given.load_data()
    def test_keys_for_events(self):
        # Test only for face key generation, as the rest is generated analogously
        self.assertEqual("stimulus", get_keys_for_events(stimulus=None, condition=None))
        self.assertEqual(self.face, get_keys_for_events(stimulus='face', condition=None))
        self.assertEqual(self.face_intact, get_keys_for_events(stimulus='face', condition='intact'))
        self.assertEqual(self.face_scrambled, get_keys_for_events(stimulus='face', condition='scrambled'))
    def test_get_epochs(self):
        # Get a epoch to compare against
        epochs, key = get_epochs(self.raw, [("face", "intact")], picks='P7', tmin=-0.2, tmax=0.5)
        epochs.load_data()
        self.assertEqual(self.given, epochs)
 class TestERPAnalysis(unittest.TestCase):
    def test_difference_features(self):
        # Check if the correct features are created
        subject_ids = [1, 1, 1, 1]
        stimuli = ['face', 'face', 'car', 'car']
        conditions = ['intact', 'scrambled', 'intact', 'scrambled']
        peaks = [1, 2, 3, 4]
        df = pd.DataFrame(data={'subject_id': subject_ids, 'stimulus': stimuli, 'condition': conditions,
                                'peak': peaks})
        diff_df_true = pd.DataFrame(
            data={'subject_id': ['001'], 'mean_face': [1.5], 'mean_car': [3.5], 'peak_diff_overall': [-2.0],
                  'diff_intact': [2.0], 'diff_scrambled': [2.0], 'diff_face': [1.0], 'diff_fc_ci': [-1.0],
                  'diff_fi_rest': [-2.0]})
        diff_df = create_peak_difference_feature(df, 1)
        assert_frame_equal(diff_df_true, diff_df)
 class TestDecodingTFAnalysis(unittest.TestCase):
    def test_events_to_labels(self):
        # Only check for stimuli 1-40, 41-80, 101-140, 141-180 as no other stimuli are possible
        events_dict = {'stimulus:1': 1, 'stimulus:40': 2, 'stimulus:41': 3, 'stimulus:80': 4, 'stimulus:101': 5,
                       'stimulus:140': 6, 'stimulus:141': 7, 'stimulus:180': 8}
        labels = events_to_labels([1, 2, 3, 4, 5, 6, 7, 8], events_dict, [0, 1, 2, 3])
        self.assertCountEqual([0, 0, 1, 1, 2, 2, 3, 3], labels)
    def test_permutation_test(self):
        # Check permutation test
        p = permutation_test([0, 0, 0, 0], [0, 0, 0, 0], 100)
        self.assertEqual(1, p)
        p = permutation_test([0, 0, 0, 0, 0], [5, 10, 15, 10, 5], 100)
        self.assertGreater(0.05, p)
 if __name__ == '__main__':
    mne.set_log_level(verbose='WARNING')  # Avoid full console
    unittest.main()
--- a/test/test_files/face.csv
+++ b/test/test_files/face.csv
@@ -0,0 +1,160 @@
 stimulus:1
 stimulus:2
 stimulus:3
 stimulus:4
 stimulus:5
 stimulus:6
 stimulus:7
 stimulus:8
 stimulus:9
 stimulus:10
 stimulus:11
 stimulus:12
 stimulus:13
 stimulus:14
 stimulus:15
 stimulus:16
 stimulus:17
 stimulus:18
 stimulus:19
 stimulus:20
 stimulus:21
 stimulus:22
 stimulus:23
 stimulus:24
 stimulus:25
 stimulus:26
 stimulus:27
 stimulus:28
 stimulus:29
 stimulus:30
 stimulus:31
 stimulus:32
 stimulus:33
 stimulus:34
 stimulus:35
 stimulus:36
 stimulus:37
 stimulus:38
 stimulus:39
 stimulus:40
 stimulus:41
 stimulus:42
 stimulus:43
 stimulus:44
 stimulus:45
 stimulus:46
 stimulus:47
 stimulus:48
 stimulus:49
 stimulus:50
 stimulus:51
 stimulus:52
 stimulus:53
 stimulus:54
 stimulus:55
 stimulus:56
 stimulus:57
 stimulus:58
 stimulus:59
 stimulus:60
 stimulus:61
 stimulus:62
 stimulus:63
 stimulus:64
 stimulus:65
 stimulus:66
 stimulus:67
 stimulus:68
 stimulus:69
 stimulus:70
 stimulus:71
 stimulus:72
 stimulus:73
 stimulus:74
 stimulus:75
 stimulus:76
 stimulus:77
 stimulus:78
 stimulus:79
 stimulus:80
 stimulus:101
 stimulus:102
 stimulus:103
 stimulus:104
 stimulus:105
 stimulus:106
 stimulus:107
 stimulus:108
 stimulus:109
 stimulus:110
 stimulus:111
 stimulus:112
 stimulus:113
 stimulus:114
 stimulus:115
 stimulus:116
 stimulus:117
 stimulus:118
 stimulus:119
 stimulus:120
 stimulus:121
 stimulus:122
 stimulus:123
 stimulus:124
 stimulus:125
 stimulus:126
 stimulus:127
 stimulus:128
 stimulus:129
 stimulus:130
 stimulus:131
 stimulus:132
 stimulus:133
 stimulus:134
 stimulus:135
 stimulus:136
 stimulus:137
 stimulus:138
 stimulus:139
 stimulus:140
 stimulus:141
 stimulus:142
 stimulus:143
 stimulus:144
 stimulus:145
 stimulus:146
 stimulus:147
 stimulus:148
 stimulus:149
 stimulus:150
 stimulus:151
 stimulus:152
 stimulus:153
 stimulus:154
 stimulus:155
 stimulus:156
 stimulus:157
 stimulus:158
 stimulus:159
 stimulus:160
 stimulus:161
 stimulus:162
 stimulus:163
 stimulus:164
 stimulus:165
 stimulus:166
 stimulus:167
 stimulus:168
 stimulus:169
 stimulus:170
 stimulus:171
 stimulus:172
 stimulus:173
 stimulus:174
 stimulus:175
 stimulus:176
 stimulus:177
 stimulus:178
 stimulus:179
 stimulus:180
--- a/test/test_files/face.txt
+++ b/test/test_files/face.txt
@@ -0,0 +1,80 @@
 stimulus:1
 stimulus:2
 stimulus:3
 stimulus:4
 stimulus:5
 stimulus:6
 stimulus:7
 stimulus:8
 stimulus:9
 stimulus:10
 stimulus:11
 stimulus:12
 stimulus:13
 stimulus:14
 stimulus:15
 stimulus:16
 stimulus:17
 stimulus:18
 stimulus:19
 stimulus:20
 stimulus:21
 stimulus:22
 stimulus:23
 stimulus:24
 stimulus:25
 stimulus:26
 stimulus:27
 stimulus:28
 stimulus:29
 stimulus:30
 stimulus:31
 stimulus:32
 stimulus:33
 stimulus:34
 stimulus:35
 stimulus:36
 stimulus:37
 stimulus:38
 stimulus:39
 stimulus:40
 stimulus:101
 stimulus:102
 stimulus:103
 stimulus:104
 stimulus:105
 stimulus:106
 stimulus:107
 stimulus:108
 stimulus:109
 stimulus:110
 stimulus:111
 stimulus:112
 stimulus:113
 stimulus:114
 stimulus:115
 stimulus:116
 stimulus:117
 stimulus:118
 stimulus:119
 stimulus:120
 stimulus:121
 stimulus:122
 stimulus:123
 stimulus:124
 stimulus:125
 stimulus:126
 stimulus:127
 stimulus:128
 stimulus:129
 stimulus:130
 stimulus:131
 stimulus:132
 stimulus:133
 stimulus:134
 stimulus:135
 stimulus:136
 stimulus:137
 stimulus:138
 stimulus:139
 stimulus:140
--- a/test/test_files/face_intact.txt
+++ b/test/test_files/face_intact.txt
@@ -0,0 +1,40 @@
 stimulus:1
 stimulus:2
 stimulus:3
 stimulus:4
 stimulus:5
 stimulus:6
 stimulus:7
 stimulus:8
 stimulus:9
 stimulus:10
 stimulus:11
 stimulus:12
 stimulus:13
 stimulus:14
 stimulus:15
 stimulus:16
 stimulus:17
 stimulus:18
 stimulus:19
 stimulus:20
 stimulus:21
 stimulus:22
 stimulus:23
 stimulus:24
 stimulus:25
 stimulus:26
 stimulus:27
 stimulus:28
 stimulus:29
 stimulus:30
 stimulus:31
 stimulus:32
 stimulus:33
 stimulus:34
 stimulus:35
 stimulus:36
 stimulus:37
 stimulus:38
 stimulus:39
 stimulus:40
--- a/test/test_files/face_scrambled.txt
+++ b/test/test_files/face_scrambled.txt
@@ -0,0 +1,40 @@
 stimulus:101
 stimulus:102
 stimulus:103
 stimulus:104
 stimulus:105
 stimulus:106
 stimulus:107
 stimulus:108
 stimulus:109
 stimulus:110
 stimulus:111
 stimulus:112
 stimulus:113
 stimulus:114
 stimulus:115
 stimulus:116
 stimulus:117
 stimulus:118
 stimulus:119
 stimulus:120
 stimulus:121
 stimulus:122
 stimulus:123
 stimulus:124
 stimulus:125
 stimulus:126
 stimulus:127
 stimulus:128
 stimulus:129
 stimulus:130
 stimulus:131
 stimulus:132
 stimulus:133
 stimulus:134
 stimulus:135
 stimulus:136
 stimulus:137
 stimulus:138
 stimulus:139
 stimulus:140
--- a/test/visual_sanity_checks.py
+++ b/test/visual_sanity_checks.py
@@ -0,0 +1,19 @@
 from utils.file_utils import load_preprocessed_data, get_epochs
 def check_peaks():
    """
    Sanity check for the "get_peaks" method
    """
    import matplotlib.pyplot as plt
    raw = load_preprocessed_data('002', 'N170')
    epochs, _ = get_epochs(raw, [('face', 'intact')], picks='P7')
    ch, latency, peak = epochs.average().get_peak(tmin=0.13, tmax=0.2, mode='neg', return_amplitude=True)
    import numpy as np
    plt.plot(epochs.times, np.squeeze(epochs.average().data.T))
    plt.vlines([0.13, 0.2], -0.00001, 0.00001, colors='r', linestyles='dotted')
    plt.vlines(latency, -0.00001, 0.00001, colors='gray', linestyles='dotted')
    plt.show()
 check_peaks()
--- a/utils/pycache/ccs_eeg_semesterproject.cpython-37.pyc
+++ b/utils/pycache/ccs_eeg_semesterproject.cpython-37.pyc
--- a/utils/pycache/ccs_eeg_semesterproject.cpython-38.pyc
+++ b/utils/pycache/ccs_eeg_semesterproject.cpython-38.pyc
--- a/utils/pycache/ccs_eeg_utils.cpython-37.pyc
+++ b/utils/pycache/ccs_eeg_utils.cpython-37.pyc
--- a/utils/pycache/ccs_eeg_utils.cpython-38.pyc
+++ b/utils/pycache/ccs_eeg_utils.cpython-38.pyc
--- a/utils/pycache/file_utils.cpython-37.pyc
+++ b/utils/pycache/file_utils.cpython-37.pyc
--- a/utils/pycache/plot_utils.cpython-37.pyc
+++ b/utils/pycache/plot_utils.cpython-37.pyc
--- a/utils/ccs_eeg_semesterproject.py
+++ b/utils/ccs_eeg_semesterproject.py
@@ -0,0 +1,70 @@
 import os
 import mne
 import numpy as np
 import pandas as pd
 from mne_bids import (BIDSPath, read_raw_bids)
 def _get_filepath(bids_root, subject_id, task):
    bids_path = BIDSPath(subject=subject_id, task=task, session=task,
                         datatype='eeg', suffix='eeg',
                         root=bids_root)
    # this is not a bids-conform file format, but a derivate/extension. Therefore we have to hack a bit
    # Depending on path structure, this might push a warning.
    fn = os.path.splitext(bids_path.fpath.__str__())[0]
    assert (fn[-3:] == "eeg")
    fn = fn[0:-3]
    return fn
 def load_precomputed_ica(bids_root, subject_id, task):
    # returns ICA and badComponents (starting at component = 0).
    # Note the existance of add_ica_info in case you want to plot something.
    fn = _get_filepath(bids_root, subject_id, task) + 'ica'
    # import the eeglab ICA. I used eeglab because the "amica" ICA is a bit more powerful than runica
    ica = mne.preprocessing.read_ica_eeglab(fn + '.set')
    # ica = custom_read_eeglab_ica(fn+'.set')
    # Potentially for plotting one might want to copy over the raw.info, but in this function we dont have access / dont want to load it
    # ica.info = raw.info
    ica._update_ica_names()
    badComps = np.loadtxt(fn + '.tsv', delimiter="\t")
    badComps -= 1  # start counting at 0
    # if only a single component is in the file, we get an error here because it is an ndarray with n-dim = 0.
    if len(badComps.shape) == 0:
        badComps = [float(badComps)]
    return ica, badComps
 def add_ica_info(raw, ica):
    # This function exists due to a MNE bug: https://github.com/mne-tools/mne-python/issues/8581
    # In case you want to plot your ICA components, this function will generate a ica.info
    ch_raw = raw.info['ch_names']
    ch_ica = ica.ch_names
    ix = [k for k, c in enumerate(ch_raw) if c in ch_ica and not c in raw.info['bads']]
    info = raw.info.copy()
    mne.io.pick.pick_info(info, ix, copy=False)
    ica.info = info
    return ica
 def load_precomputed_badData(bids_root, subject_id, task):
    # return precomputed annotations and bad channels (first channel = 0)
    fn = _get_filepath(bids_root, subject_id, task)
    print(fn)
    tmp = pd.read_csv(fn + 'badSegments.csv')
    # print(tmp)
    annotations = mne.Annotations(tmp.onset, tmp.duration, tmp.description)
    # Unfortunately MNE assumes that csv files are in milliseconds and only txt files in seconds.. wth?
    # annotations = mne.read_annotations(fn+'badSegments.csv')
    badChannels = np.loadtxt(fn + 'badChannels.tsv', delimiter='\t')
    badChannels = badChannels.astype(int)
    badChannels -= 1  # start counting at 0
    # badChannels = [int(b) for b in badChannels]
    return annotations, badChannels
--- a/utils/ccs_eeg_utils.py
+++ b/utils/ccs_eeg_utils.py
@@ -0,0 +1,275 @@
 from osfclient import cli
 import os
 from mne_bids.read import _from_tsv, _drop
 from mne_bids import (BIDSPath, read_raw_bids)
 import mne
 import numpy as np
 import scipy.ndimage
 import scipy.signal
 from numpy import sin as sin
 def read_annotations_core(bids_path, raw):
    tsv = os.path.join(bids_path.directory, bids_path.update(suffix="events", extension=".tsv").basename)
    _handle_events_reading_core(tsv, raw)
 def _handle_events_reading_core(events_fname, raw):
    """Read associated events.tsv and populate raw.
    Handle onset, duration, and description of each event.
    """
    events_dict = _from_tsv(events_fname)
    if ('value' in events_dict) and ('trial_type' in events_dict):
        events_dict = _drop(events_dict, 'n/a', 'trial_type')
        events_dict = _drop(events_dict, 'n/a', 'value')
        descriptions = np.asarray([a + ':' + b for a, b in zip(events_dict["trial_type"], events_dict["value"])],
                                  dtype=str)
        # Get the descriptions of the events
    elif 'trial_type' in events_dict:
        # Drop events unrelated to a trial type
        events_dict = _drop(events_dict, 'n/a', 'trial_type')
        descriptions = np.asarray(events_dict['trial_type'], dtype=str)
    # If we don't have a proper description of the events, perhaps we have
    # at least an event value?
    elif 'value' in events_dict:
        # Drop events unrelated to value
        events_dict = _drop(events_dict, 'n/a', 'value')
        descriptions = np.asarray(events_dict['value'], dtype=str)
    # Worst case, we go with 'n/a' for all events
    else:
        descriptions = 'n/a'
    # Deal with "n/a" strings before converting to float
    ons = [np.nan if on == 'n/a' else on for on in events_dict['onset']]
    dus = [0 if du == 'n/a' else du for du in events_dict['duration']]
    onsets = np.asarray(ons, dtype=float)
    durations = np.asarray(dus, dtype=float)
    # Keep only events where onset is known
    good_events_idx = ~np.isnan(onsets)
    onsets = onsets[good_events_idx]
    durations = durations[good_events_idx]
    descriptions = descriptions[good_events_idx]
    del good_events_idx
    # Add Events to raw as annotations
    annot_from_events = mne.Annotations(onset=onsets,
                                        duration=durations,
                                        description=descriptions,
                                        orig_time=None)
    raw.set_annotations(annot_from_events)
    return raw
 # taken from the osfclient tutorial https://github.com/ubcbraincircuits/osfclienttutorial
 class args:
    def __init__(self, project, username=None, update=True, force=False, destination=None, source=None, recursive=False,
                 target=None, output=None, remote=None, local=None):
        self.project = project
        self.username = username
        self.update = update  # applies to upload, clone, and fetch
        self.force = force  # applies to fetch and upload
        # upload arguments:
        self.destination = destination
        self.source = source
        self.recursive = recursive
        # remove argument:
        self.target = target
        # clone argument:
        self.output = output
        # fetch arguments:
        self.remote = remote
        self.local = local
 def download_erpcore(task="MMN", subject=1, localpath="local/bids/"):
    project = "9f5w7"  # after recent change they put everything as "sessions" in one big BIDS file
    arguments = args(project)  # project ID
    for extension in ["channels.tsv", "events.tsv", "eeg.fdt", "eeg.json", "eeg.set"]:
        targetpath = '/sub-{:03d}/ses-{}/eeg/sub-{:03d}_ses-{}_task-{}_{}'.format(subject, task, subject, task, task,
                                                                                  extension)
        print("Downloading {}".format(targetpath))
        arguments.remote = "\\ERP_CORE_BIDS_Raw_Files/" + targetpath
        arguments.local = localpath + targetpath
        cli.fetch(arguments)
 def simulate_ICA(dims=4):
    A = [[-0.3, 0.2], [.2, 0.1]]
    sample_rate = 100.0
    nsamples = 1000
    t = np.arange(nsamples) / sample_rate
    s = []
    # boxcars
    s.append(np.mod(np.array(range(0, nsamples)), 250) > 125)
    # a triangle staircase + trend
    s.append((np.mod(np.array(range(0, nsamples)), 100) + np.array(range(0, nsamples)) * 0.05) / 100)
    if dims == 4:
        A = np.array([[.7, 0.3, 0.2, -0.5], [0.2, -0.5, -0.2, 0.3], [-.3, 0.1, 0, 0.2], [-0.5, -0.3, -0.2, 0.8]])
        # some sinosoids
        s.append(np.cos(2 * np.pi * 0.5 * t) + 0.2 * np.sin(2 * np.pi * 2.5 * t + 0.1) + 0.2 * np.sin(
            2 * np.pi * 15.3 * t) + 0.1 * np.sin(2 * np.pi * 16.7 * t + 0.1) + 0.1 * np.sin(2 * np.pi * 23.45 * t + .8))
        # uniform noise
        s.append(0.2 * np.random.rand(nsamples))
    x = np.matmul(A, np.array(s))
    return x
 def spline_matrix(x, knots):
    # bah, spline-matrices are a pain to implement. 
    # But package "patsy" with function "bs" crashed my notebook...
    # Anyway, knots define where the spline should be anchored. The default should work
    # X defines where the spline set should be evaluated.
    # e.g. call using: spline_matrix(np.linspace(0,0.95,num=100))
    import scipy.interpolate as si
    x_tup = si.splrep(knots, knots, k=3)
    nknots = len(x_tup[0])
    x_i = np.empty((len(x), nknots - 4))
    for i in range(nknots - 4):
        vec = np.zeros(nknots)
        vec[i] = 1.0
        x_list = list(x_tup)
        x_list[1] = vec.tolist()
        x_i[:, i] = si.splev(x, x_list)
    return x_i
 def simulate_TF(signal=1, noise=True):
    # signal can be 1 (image), 2(chirp) or 3 (steps)
    import imageio
    if signal == 2:
        im = imageio.imread('ex9_tf.png')
        im = im[0:60, :, 3] - im[0:60, :, 1]
        # im = scipy.ndimage.zoom(im,[1,1])
        im = np.flip(im, axis=0)
        # plt.imshow(im,origin='lower')
        # sig = (scipy.fft.irfft(im.T,axis=1))
        nov = 10;
        im.shape[0] * 0.5
        nperseg = 50;
        im.shape[0] - 1
        t, sig = scipy.signal.istft(im, fs=500, noverlap=nov, nperseg=nperseg)
        sig = sig / 300  # normalize
    elif signal == 3:
        sig = scipy.signal.chirp(t=np.arange(0, 10, 1 / 500), f0=1, f1=100, t1=2, method='linear', phi=90)
    elif signal == 1:
        x = np.arange(0, 2, 1 / 500)
        sig_steps = np.concatenate([1.0 * sin(2 * np.pi * x * 50), 1.2 * sin(2 * np.pi * x * 55 + np.pi / 2),
                                    0.8 * sin(2 * np.pi * x * 125 + np.pi),
                                    1.0 * sin(2 * np.pi * x * 120 + 3 * np.pi / 2)])
        sig = sig_steps
    if noise:
        sig = sig + 0.1 * np.std(sig) * np.random.randn(sig.shape[0])
    return sig
 def get_TF_dataset(subject_id='002', bids_root="../local/bids"):
    bids_path = BIDSPath(subject=subject_id, task="P3", session="P3",
                         datatype='eeg', suffix='eeg',
                         root=bids_root)
    raw = read_raw_bids(bids_path)
    read_annotations_core(bids_path, raw)
    # raw.pick_channels(["Cz"])#["Pz","Fz","Cz"])
    raw.load_data()
    raw.set_montage('standard_1020', match_case=False)
    evts, evts_dict = mne.events_from_annotations(raw)
    wanted_keys = [e for e in evts_dict.keys() if "response" in e]
    evts_dict_stim = dict((k, evts_dict[k]) for k in wanted_keys if k in evts_dict)
    epochs = mne.Epochs(raw, evts, evts_dict_stim, tmin=-1, tmax=2)
    return epochs
 def get_classification_dataset(subject=1, typeInt=4):
    # TypeInt:
    # Task 1 (open and close left or right fist)
    # Task 2 (imagine opening and closing left or right fist)
    # Task 3 (open and close both fists or both feet)
    # Task 4 (imagine opening and closing both fists or both feet)
    assert (typeInt >= 1)
    assert (typeInt <= 4)
    from mne.io import concatenate_raws, read_raw_edf
    from mne.datasets import eegbci
    tmin, tmax = -1., 4.
    runs = [3, 7, 11]
    runs = [r + typeInt - 1 for r in runs]
    print("loading subject {} with runs {}".format(subject, runs))
    if typeInt <= 1:
        event_id = dict(left=2, right=3)
    else:
        event_id = dict(hands=2, feet=3)
    raw_fnames = eegbci.load_data(subject, runs)
    raws = [read_raw_edf(f, preload=True) for f in raw_fnames]
    raw = concatenate_raws(raws)
    raw.filter(7., 30., fir_design='firwin', skip_by_annotation='edge')
    eegbci.standardize(raw)  # set channel names
    montage = mne.channels.make_standard_montage('standard_1005')
    raw.set_montage(montage)
    raw.rename_channels(lambda x: x.strip('.'))
    events, _ = mne.events_from_annotations(raw, event_id=dict(T1=2, T2=3))
    picks = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False,
                           exclude='bads')
    # Read epochs (train will be done only between 1 and 2s)
    # Testing will be done with a running classifier
    epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks,
                        baseline=None, preload=True)
    return (epochs)
 def ex8_simulateData(width=40, n_subjects=15, signal_mean=100, noise_between=30, noise_within=10, smooth_sd=4,
                     rng_seed=43):
    # adapted and extended from https://mne.tools/dev/auto_tutorials/discussions/plot_background_statistics.html#sphx-glr-auto-tutorials-discussions-plot-background-statistics-py
    rng = np.random.RandomState(rng_seed)
    # For each "subject", make a smoothed noisy signal with a centered peak
    X = noise_within * rng.randn(n_subjects, width, width)
    # Add three signals
    X[:, width // 6 * 2, width // 6 * 2] -= signal_mean / 3 * 3 + rng.randn(n_subjects) * noise_between
    X[:, width // 6 * 4, width // 6 * 4] += signal_mean / 3 * 2 + rng.randn(n_subjects) * noise_between
    X[:, width // 6 * 5, width // 6 * 5] += signal_mean / 3 * 2 + rng.randn(n_subjects) * noise_between
    # Spatially smooth with a 2D Gaussian kernel
    size = width // 2 - 1
    gaussian = np.exp(-(np.arange(-size, size + 1) ** 2 / float(smooth_sd ** 2)))
    for si in range(X.shape[0]):
        for ri in range(X.shape[1]):
            X[si, ri, :] = np.convolve(X[si, ri, :], gaussian, 'same')
        for ci in range(X.shape[2]):
            X[si, :, ci] = np.convolve(X[si, :, ci], gaussian, 'same')
    # X += 10 * rng.randn(n_subjects, width, width)
    return X
 def stc_plot2img(h, title="SourceEstimate", closeAfterwards=False, crop=True):
    h.add_text(0.1, 0.9, title, 'title', font_size=16)
    screenshot = h.screenshot()
    if closeAfterwards:
        h.close()
    if crop:
        nonwhite_pix = (screenshot != 255).any(-1)
        nonwhite_row = nonwhite_pix.any(1)
        nonwhite_col = nonwhite_pix.any(0)
        screenshot = screenshot[nonwhite_row][:, nonwhite_col]
    return screenshot
--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@@ -0,0 +1,89 @@
 import os
 import mne
 def load_bad_annotations(filepath, fileending="badSegments.csv"):
    """
    Loads the annotations for bad segments
    :param filepath: The path to the file we want to load
    :param fileending: Depending if the subject, for which we load the annotations, was preprocessed manually,
                       the ending of the filename will be different.
                       The default are file endings of the given preprocessed annotations: "badSegments.csv"
                       For the manual preprocessed annotations, the file endings are: "badannotations.csv"
    :return: The mne annotations
    """
    if os.path.isfile(filepath + "_" + fileending):
        return mne.read_annotations(filepath + "_" + fileending)
 def load_preprocessed_data(subject, dataset):
    """
    Load the raw object as well as the annotations of the preprocessed file
    :param subject: The subject, for which we want to load the raw object
    :param dataset: The currently viewed dataset
    :param selected_subjects: The manually preprocessed subjects
    :return: The raw object
    """
    folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
    filepath = folder + "sub-" + subject + "_task-" + dataset
    raw = mne.io.read_raw_fif(filepath + "_cleaned.fif")
    return raw
 def get_keys_for_events(stimulus=None, condition=None):
    """
    For a given stimulus and condition get all the event keys.
    :param stimulus: Either 'face' or 'car' or 'None' for no stimulus
    :param condition: Either 'intact' or 'scrambled' or 'None' for no condition
    :return: A list of keys or 'stimulus' if neither stimulus or condition was given
    """
    if stimulus == 'face':
        if condition == 'intact':
            return ["stimulus:{}".format(k) for k in range(1, 41)]
        elif condition == 'scrambled':
            return ["stimulus:{}".format(k) for k in range(101, 141)]
        else:  # All faces
            return ["stimulus:{}".format(k) for k in list(range(1, 41)) + list(range(101, 141))]
    elif stimulus == 'car':
        if condition == 'intact':
            return ["stimulus:{}".format(k) for k in range(41, 81)]
        elif condition == 'scrambled':
            return ["stimulus:{}".format(k) for k in range(141, 181)]
        else:  # All cars
            return ["stimulus:{}".format(k) for k in list(range(41, 81)) + list(range(141, 181))]
    else:  # If no stimulus is given
        if condition == 'intact':
            return ["stimulus:{}".format(k) for k in range(1, 41) and range(41, 81)]
        elif condition == 'scrambled':
            return ["stimulus:{}".format(k) for k in list(range(101, 141)) + list(range(141, 181))]
        else:  # Every stimulus
            return "stimulus"
 def get_epochs(raw, conditions=None, picks=None, tmin=-0.1, tmax=1):
    """
    Returns the epochs for a given dataset
    :param raw: the dataset
    :param conditions: A List of tuples, of the Form [(stimulus, condition), (stimulus,condition)]
        i.e. [('face',None), ('car', 'scrambled')] returns the epochs where the stimulus is face and the stim+condition is car+scrambled
        default is None, i.e. everything
    :param picks: a list. Additional criteria for picking the epochs, e.g. channels, etc.
    :param tmin: onset before the event
    :param tmax: end after the event
    :return:
    """
    events, events_dict = mne.events_from_annotations(raw)
    events_dict_key = {}
    if conditions is None:
        conditions = [(None, None)]
    for condition in conditions:
        wanted_keys = get_keys_for_events(condition[0], condition[1])
        if wanted_keys == "stimulus":
            wanted_keys = [e for e in events_dict.keys() if "stimulus" in e]
        events_dict_key.update(dict((k, events_dict[k]) for k in wanted_keys if k in events_dict))
    epochs = mne.Epochs(raw, events, events_dict_key, tmin=tmin, tmax=tmax, reject_by_annotation=False, picks=picks)
    epochs.drop_bad()
    return epochs, events_dict_key
--- a/utils/plot_utils.py
+++ b/utils/plot_utils.py
@@ -0,0 +1,89 @@
 import mne
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib import cm
 from matplotlib.colors import LogNorm
 from utils.file_utils import load_preprocessed_data, get_keys_for_events
 def plot_grand_average(dataset):
    """
    Plot the grand average ERPs
    :param dataset: the datset for which the grand average is computed
    """
    evtss = [('face', 'intact'), ('face', 'scrambled'), ('car', 'intact'), ('car', 'scrambled')]
    chs = ['P7', 'PO7', 'P8', 'PO8']
    for ch in chs:
        fi = []
        fs = []
        ci = []
        cs = []
        for i in range(1, 41):
            subj = "0" + str(i)
            if len(str(i)) == 1:
                subj = "0" + subj
            # Load preprocessed .fif data files
            raw = load_preprocessed_data(subj, dataset)
            # Epoch the data
            for ev in evtss:
                wanted_keys = get_keys_for_events(ev[0], ev[1])
                events, events_dict = mne.events_from_annotations(raw)
                events_dict_key = dict((k, events_dict[k]) for k in wanted_keys if k in events_dict)
                epochs = mne.Epochs(raw, events, events_dict_key, tmin=-0.1, tmax=1, reject_by_annotation=True,
                                    picks=[ch])
                # Get the N170 peak
                # First construct a data frame
                if ev[0] == 'face' and ev[1] == 'intact':
                    fi.append(epochs.average(picks=[ch]))
                elif ev[0] == 'face' and ev[1] == 'scrambled':
                    fs.append(epochs.average(picks=[ch]))
                elif ev[0] == 'car' and ev[1] == 'intact':
                    ci.append(epochs.average(picks=[ch]))
                elif ev[0] == 'car' and ev[1] == 'scrambled':
                    cs.append(epochs.average(picks=[ch]))
        ga_fi = mne.grand_average(fi)
        ga_ci = mne.grand_average(ci)
        ga_fs = mne.grand_average(fs)
        ga_cs = mne.grand_average(cs)
        ga_fi.comment = 'Face Intact'
        ga_ci.comment = 'Car Intact'
        ga_fs.comment = 'Face Scrambled'
        ga_cs.comment = 'Car Scrambled'
        mne.viz.plot_compare_evokeds([ga_fi, ga_ci, ga_fs, ga_cs], picks=ch, colors=['blue', 'red', 'blue', 'red'],
                                     linestyles=['solid', 'solid', 'dotted', 'dotted'])
 def plot_tf_cluster(F, clusters, cluster_p_values, freqs, times):
    """
    Plot teh F-Statistic values of permutation clusters with p-values <= 0.05 in color and > 0.05 in grey.
    :param F: F-Statistics of the permutation clusters
    :param clusters: all permutation clusters
    :param cluster_p_values: p-values of the clusters
    :param freqs: frequency domain
    :param times: time domain
    """
    good_c = np.nan * np.ones_like(F)
    for clu, p_val in zip(clusters, cluster_p_values):
        if p_val <= 0.05:
            good_c[clu] = F[clu]
    bbox = [times[0], times[-1], freqs[0], freqs[-1]]
    plt.imshow(F, aspect='auto', origin='lower', cmap=cm.gray, extent=bbox, interpolation='None')
    a = plt.imshow(good_c, cmap=cm.RdBu_r, aspect='auto', origin='lower', extent=bbox, interpolation='None')
    plt.colorbar(a)
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.show()
 def plot_oscillation_bands(condition):
    fig, axis = plt.subplots(1, 5, figsize=(25, 5))
    condition.plot_topomap(baseline=(-0.2, 0), fmin=0, fmax=4, title='Delta', axes=axis[0], show=False, vmin=0, vmax=1.5, tmin=0, tmax=1)
    condition.plot_topomap(baseline=(-0.2, 0), fmin=4, fmax=8, title='Theta', axes=axis[1], show=False, vmin=0, vmax=0.7, tmin=0, tmax=1)
    condition.plot_topomap(baseline=(-0.2, 0), fmin=8, fmax=12, title='Alpha', axes=axis[2], show=False, vmin=-0.15, vmax=0.2, tmin=0, tmax=1)
    condition.plot_topomap(baseline=(-0.2, 0), fmin=13, fmax=30, title='Beta', axes=axis[3], show=False, vmin=-0.18, vmax=0.2, tmin=0, tmax=1)
    condition.plot_topomap(baseline=(-0.2, 0), fmin=30, fmax=45, title='Gamma', axes=axis[4], vmin=0, vmax=0.2, tmin=0, tmax=1)