Initial commit

2021-03-27 14:58:49 +01:00
commit fc4eec6ac7
20 changed files with 1613 additions and 0 deletions
--- a/preprocessing_and_cleaning.py
+++ b/preprocessing_and_cleaning.py
@@ -0,0 +1,182 @@
+import os
+import mne
+
+from mne_bids import (BIDSPath, read_raw_bids)
+from utils.ccs_eeg_semesterproject import load_precomputed_badData, load_precomputed_ica
+from utils.ccs_eeg_utils import read_annotations_core
+
+
+def load_subject(subject, dataset):
+    """
+    Load the eeg data of a subject
+    :param subject: The subject of which the data will be loaded
+    :param dataset: The dataset which will be loaded
+    :return: the subject data
+    """
+    bids_path = BIDSPath(subject=subject, task=dataset, session=dataset, datatype='eeg', suffix='eeg',
+                         root='Dataset\\' + dataset)
+    raw = read_raw_bids(bids_path)
+    # Add annotations
+    read_annotations_core(bids_path, raw)
+    return raw
+
+
+def load_given_preprocessing_data(subject, dataset):
+    """
+    Loads given pre-processing information for a given subject.
+    This is used for all subjects which were not manually preprocessed
+    :param subject: The subject to load the data for
+    :param dataset: The dataset currently viewed
+    :return: The bad annotations, bad channels, ica object, bad ICs
+    """
+    anno, bc = load_precomputed_badData("Dataset\\" + dataset + "\\", subject,
+                                        dataset)  # Loads annotations and bad channels
+    ica, bad_comp = load_precomputed_ica("Dataset\\" + dataset + "\\", subject,
+                                         dataset)  # Loads ica and bad components
+    return anno, bc, ica, bad_comp
+
+
+def save_subject(raw, subject, dataset):
+    """
+    Save a raw object to a .fif file
+    :param raw: the raw object to be saved
+    :param subject: the subject, which the raw object belongs to
+    :param dataset: the dataset currently viewed
+    """
+    folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
+    filepath = folder + "sub-" + subject + "_task-" + dataset
+    raw.save(filepath + "_cleaned.fif", overwrite=True)
+
+
+def filter_data(raw):
+    """
+    Filter the data of a single subject with a bandpass filter.
+    The lower bound ist 0.5Hz to compensate the slow drifts.
+    The upper bound is 50Hz to compensate the high frequencies, including the power line spike at 60Hz
+    :param raw: The data to be filtered
+    :return: The filtered data
+    """
+    raw.filter(0.5, 48, fir_design='firwin')
+    return raw
+
+
+def clean_data(raw, subject, dataset, cleaned=False):
+    """
+    Clean the data of a single subject, meaning finding the bad segments and channels of a subject.
+    If these were already found, they are loaded onto the data
+    :param raw: the subject data
+    :param subject: the subject which data will be viewed
+    :param cleaned: If True the data was already viewed and the 'BAD_' annotations as well as the bad channels will be loaded
+    :return: the bad channels
+    """
+    channels = None
+    folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
+    filepath = folder + "sub-" + subject + "_task-" + dataset
+
+    # If nothing was marked yet, plot the data to mark bad segments
+    if not cleaned:
+        raw.plot(n_channels=len(raw.ch_names), block=True, scalings=40e-6)
+        # Get indices of bad annotations
+        bad_idx = [idx for (idx, annot) in enumerate(raw.annotations) if annot['description'] == "BAD_"]
+        # If bad intervals were found save
+        if bad_idx:
+            raw.annotations[bad_idx].save(filepath + "_badannotations.csv")
+
+    if os.path.isfile(filepath + "_badannotations.csv"):
+        annotations = mne.read_annotations(filepath + "_badannotations.csv")
+        raw.annotations.append(annotations.onset, annotations.duration, annotations.description)
+
+    # Set the bad channels for each subject
+    if subject == '001':
+        channels = ['F8']  # Maybe also FP2?
+    elif subject == '003':
+        channels = []
+    elif subject == '014':
+        channels = []
+
+    return channels
+
+
+def run_ica(raw, dataset, subject, search='manual'):
+    """
+    Runs Independent Component Analysis. Depending on the 'search' mode, it is either used to find bad ICs or to exclude
+    bad ICs
+    :param raw: the data to be preprocessed
+    :param dataset: the dataset currently viewed
+    :param subject: the subject currently viewed
+    :param search: default value 'manual': The user views different plots for all ICs found
+                   'eog' : Uses the eog channels to find bad ICs
+                   'done' : Applies the bad ICs that were found
+    """
+    # First filter the data to remove slow drifts - this is done with 1Hz, as proposed by the mne Tutorial at:
+    # https://mne.tools/dev/auto_tutorials/preprocessing/plot_40_artifact_correction_ica.html#filtering-to-remove-slow-drifts
+    ica_raw = raw.copy()
+    ica_raw.filter(l_freq=1, h_freq=None)
+
+    # Then run ICA
+    ica = mne.preprocessing.ICA(method="fastica", random_state=123)  # Use a random state for reproducable results
+    ica.fit(ica_raw, verbose=True)
+
+    if search == 'manual':
+        ica_raw.load_data()
+        # ica.plot_components(inst=ica_raw, ch_type='eeg', contours=0, topomap_args={'extrapolate': 'head'},
+        #                    psd_args={'fmin': 0, 'fmax': 80})
+        ica.plot_properties(inst=ica_raw, dB=False, topomap_args={'extrapolate': 'head', 'contours': 0},
+                            psd_args={'fmin': 0, 'fmax': 50}, picks=['eeg'])
+        ica.plot_sources(ica_raw)
+    elif search == 'eog':
+        eog_indices, _ = ica.find_bads_eog(raw)
+        ica.exclude = eog_indices
+        print('BAD COMPONENTS VIA EOG: ' + str(eog_indices))
+        ica.plot_overlay(ica_raw, exclude=eog_indices)
+    elif search == 'done':
+        exclude = None
+        if subj == '001':
+            exclude = [0, 1, 2, 4, 8, 14, 16, 25]  # Through eog: 0,1
+        elif subj == '003':
+            exclude = [0, 2]  # Through eog: 0, 2
+        elif subj == '014':
+            exclude = [0, 1, 9]  # Through eog: 0,1
+        # ica.plot_overlay(ica_raw, exclude=exclude)  # Plot differences through exclude
+        # ica.exclude = exclude
+        # Apply ica to the raw object
+        raw.load_data()
+        # ica.plot_overlay(ica_raw, exclude=exclude)
+        raw = ica.apply(raw, exclude=exclude)
+        # Lastly save the ica to a file
+        folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
+        filepath = folder + "sub-" + subject + "_task-" + dataset
+        ica.save(filepath + "-ica.fif")
+        return raw
+
+
+if __name__ == '__main__':
+    ds = 'N170'
+    for i in range(1, 41):
+        subj = "0" + str(i)
+        if len(str(i)) == 1:
+            subj = "0" + subj
+        data = load_subject(subj, ds)
+        # Load data into memory
+        data.load_data()
+        # Filter data with a bandpass filter
+        filter_data(data)
+        if subj in ["001", "003", "014"]:
+            # Manual preprocessing
+            # Clean the data
+            b_ch = clean_data(data, subj, ds, True)
+            # Run ICA
+            data.set_channel_types({'HEOG_left': 'eog', 'HEOG_right': 'eog', 'VEOG_lower': 'eog'})
+            data.set_montage('standard_1020', match_case=False)
+            data = run_ica(data, ds, subj, 'done')
+        else:
+            # Provided cleaning and preprocessing information
+            ann, b_ch, ica_pre, bad_component = load_given_preprocessing_data(subj, ds)
+            data.annotations.append(ann.onset, ann.duration, ann.description)
+            data = ica_pre.apply(data, exclude=bad_component)
+        # Interpolate bad channels
+        data.interpolate_bads(b_ch)
+        # Re-Reference the data
+        data_re = data.copy().set_eeg_reference('average')
+        # Save preprocessed and cleaned data set
+        save_subject(data_re, subj, ds)