semesterproject_lecture_eeg/preprocessing_and_cleaning.py

import os
import mne

from mne_bids import (BIDSPath, read_raw_bids)
from utils.ccs_eeg_semesterproject import load_precomputed_badData, load_precomputed_ica
from utils.ccs_eeg_utils_reduced import read_annotations_core


def load_subject(subject, dataset):
    """
    Load the eeg data of a subject
    :param subject: The subject of which the data will be loaded
    :param dataset: The dataset which will be loaded
    :return: the subject data
    """
    bids_path = BIDSPath(subject=subject, task=dataset, session=dataset, datatype='eeg', suffix='eeg',
                         root='Dataset\\' + dataset)
    raw = read_raw_bids(bids_path)
    # Add annotations
    read_annotations_core(bids_path, raw)
    return raw


def load_given_preprocessing_data(subject, dataset):
    """
    Loads given pre-processing information for a given subject.
    This is used for all subjects which were not manually preprocessed
    :param subject: The subject to load the data for
    :param dataset: The dataset currently viewed
    :return: The bad annotations, bad channels, ica object, bad ICs
    """
    anno, bc = load_precomputed_badData("Dataset\\" + dataset + "\\", subject,
                                        dataset)  # Loads annotations and bad channels
    ica, bad_comp = load_precomputed_ica("Dataset\\" + dataset + "\\", subject,
                                         dataset)  # Loads ica and bad components
    return anno, bc, ica, bad_comp


def save_subject(raw, subject, dataset):
    """
    Save a raw object to a .fif file
    :param raw: the raw object to be saved
    :param subject: the subject, which the raw object belongs to
    :param dataset: the dataset currently viewed
    """
    folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
    filepath = folder + "sub-" + subject + "_task-" + dataset
    raw.save(filepath + "_cleaned.fif", overwrite=True)


def filter_data(raw):
    """
    Filter the data of a single subject with a bandpass filter.
    The lower bound ist 0.5Hz to compensate the slow drifts.
    The upper bound is 50Hz to compensate the high frequencies, including the power line spike at 60Hz
    :param raw: The data to be filtered
    :return: The filtered data
    """
    raw.filter(0.5, 48, fir_design='firwin')
    return raw


def clean_data(raw, subject, dataset, cleaned=False):
    """
    Clean the data of a single subject, meaning finding the bad segments and channels of a subject.
    If these were already found, they are loaded onto the data
    :param raw: the subject data
    :param subject: the subject which data will be viewed
    :param cleaned: If True the data was already viewed and the 'BAD_' annotations as well as the bad channels will be loaded
    :return: the bad channels
    """
    channels = None
    filepath = "Dataset/preprocessed/sub-" + subject + "_task-" + dataset + "_badannotations.csv"

    # If nothing was marked yet, plot the data to mark bad segments
    if not cleaned:
        raw.plot(n_channels=len(raw.ch_names), block=True, scalings=40e-6)
        # Get indices of bad annotations
        bad_idx = [idx for (idx, annot) in enumerate(raw.annotations) if annot['description'] == "BAD_"]
        # If bad intervals were found save
        if bad_idx:
            raw.annotations[bad_idx].save(filepath)

    if os.path.isfile(filepath):
        annotations = mne.read_annotations(filepath)
        raw.annotations.append(annotations.onset, annotations.duration, annotations.description)

    # Set the bad channels for each subject
    if subject == '001':
        channels = ['F8']
    elif subject == '003':
        channels = []
    elif subject == '014':
        channels = []

    return channels


def run_ica(raw, dataset, subject, search='manual'):
    """
    Runs Independent Component Analysis. Depending on the 'search' mode, it is either used to find bad ICs or to exclude
    bad ICs
    :param raw: the data to be preprocessed
    :param dataset: the dataset currently viewed
    :param subject: the subject currently viewed
    :param search: default value 'manual': The user views different plots for all ICs found
                   'eog' : Uses the eog channels to find bad ICs
                   'done' : Applies the bad ICs that were found
    """
    # First filter the data to remove slow drifts - this is done with 1Hz, as proposed by the mne Tutorial at:
    # https://mne.tools/dev/auto_tutorials/preprocessing/plot_40_artifact_correction_ica.html#filtering-to-remove-slow-drifts
    ica_raw = raw.copy()
    ica_raw.filter(l_freq=1, h_freq=None)

    # Then run ICA
    ica = mne.preprocessing.ICA(method="fastica", random_state=123)  # Use a random state for reproducable results
    ica.fit(ica_raw, verbose=True)

    if search == 'manual':
        ica_raw.load_data()
        ica.plot_properties(inst=ica_raw, dB=False, topomap_args={'extrapolate': 'head', 'contours': 0},
                            psd_args={'fmin': 0, 'fmax': 50}, picks=['eeg'])
        ica.plot_sources(ica_raw)
    elif search == 'eog':
        eog_indices, _ = ica.find_bads_eog(raw)
        ica.exclude = eog_indices
        print('BAD COMPONENTS VIA EOG: ' + str(eog_indices))
        ica.plot_overlay(ica_raw, exclude=eog_indices)
    elif search == 'done':
        exclude = None
        if subj == '001':
            exclude = [0, 1, 2, 4, 8, 14, 16, 25]  # Through eog: 0,1
        elif subj == '003':
            exclude = [0, 2]  # Through eog: 0, 2
        elif subj == '014':
            exclude = [0, 1, 9]  # Through eog: 0,1
        # Apply ica to the raw object
        raw.load_data()
        # ica.plot_overlay(ica_raw,
        #                 title="Signals before (red) applying ICA and after (black) applying ICA on subject 003",
        #                 exclude=exclude)
        raw = ica.apply(raw, exclude=exclude)
        # Lastly save the ica to a file
        folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
        filepath = folder + "sub-" + subject + "_task-" + dataset
        ica.save(filepath + "-ica.fif")
        return raw


if __name__ == '__main__':
    ds = 'N170'
    for i in range(1, 41):
        subj = "0" + str(i)
        if len(str(i)) == 1:
            subj = "0" + subj
        data = load_subject(subj, ds)
        # Load data into memory
        data.load_data()
        # Filter data with a bandpass filter
        filter_data(data)
        if subj in ["001", "003", "014"]:
            # Manual preprocessing
            # Clean the data
            b_ch = clean_data(data, subj, ds, True)
            # Run ICA
            data.set_channel_types({'HEOG_left': 'eog', 'HEOG_right': 'eog', 'VEOG_lower': 'eog'})
            data.set_montage('standard_1020', match_case=False)
            data = run_ica(data, ds, subj, 'done')
        else:
            # Provided cleaning and preprocessing information
            ann, b_ch, ica_pre, bad_component = load_given_preprocessing_data(subj, ds)
            data.annotations.append(ann.onset, ann.duration, ann.description)
            data = ica_pre.apply(data, exclude=bad_component)
        # Interpolate bad channels
        data.interpolate_bads(b_ch)
        # Re-Reference the data
        data_re = data.copy().set_eeg_reference('average')
        # Save preprocessed and cleaned data set
        save_subject(data_re, subj, ds)