Files
semesterproject_lecture_eeg/preprocessing_and_cleaning.py

183 lines
7.6 KiB
Python

import os
import mne
from mne_bids import (BIDSPath, read_raw_bids)
from utils.ccs_eeg_semesterproject import load_precomputed_badData, load_precomputed_ica
from utils.ccs_eeg_utils_reduced import read_annotations_core
def load_subject(subject, dataset):
"""
Load the eeg data of a subject
:param subject: The subject of which the data will be loaded
:param dataset: The dataset which will be loaded
:return: the subject data
"""
bids_path = BIDSPath(subject=subject, task=dataset, session=dataset, datatype='eeg', suffix='eeg',
root='Dataset\\' + dataset)
raw = read_raw_bids(bids_path)
# Add annotations
read_annotations_core(bids_path, raw)
return raw
def load_given_preprocessing_data(subject, dataset):
"""
Loads given pre-processing information for a given subject.
This is used for all subjects which were not manually preprocessed
:param subject: The subject to load the data for
:param dataset: The dataset currently viewed
:return: The bad annotations, bad channels, ica object, bad ICs
"""
anno, bc = load_precomputed_badData("Dataset\\" + dataset + "\\", subject,
dataset) # Loads annotations and bad channels
ica, bad_comp = load_precomputed_ica("Dataset\\" + dataset + "\\", subject,
dataset) # Loads ica and bad components
return anno, bc, ica, bad_comp
def save_subject(raw, subject, dataset):
"""
Save a raw object to a .fif file
:param raw: the raw object to be saved
:param subject: the subject, which the raw object belongs to
:param dataset: the dataset currently viewed
"""
folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
filepath = folder + "sub-" + subject + "_task-" + dataset
raw.save(filepath + "_cleaned.fif", overwrite=True)
def filter_data(raw):
"""
Filter the data of a single subject with a bandpass filter.
The lower bound ist 0.5Hz to compensate the slow drifts.
The upper bound is 50Hz to compensate the high frequencies, including the power line spike at 60Hz
:param raw: The data to be filtered
:return: The filtered data
"""
raw.filter(0.5, 48, fir_design='firwin')
return raw
def clean_data(raw, subject, dataset, cleaned=False):
"""
Clean the data of a single subject, meaning finding the bad segments and channels of a subject.
If these were already found, they are loaded onto the data
:param raw: the subject data
:param subject: the subject which data will be viewed
:param cleaned: If True the data was already viewed and the 'BAD_' annotations as well as the bad channels will be loaded
:return: the bad channels
"""
channels = None
folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
filepath = folder + "sub-" + subject + "_task-" + dataset
# If nothing was marked yet, plot the data to mark bad segments
if not cleaned:
raw.plot(n_channels=len(raw.ch_names), block=True, scalings=40e-6)
# Get indices of bad annotations
bad_idx = [idx for (idx, annot) in enumerate(raw.annotations) if annot['description'] == "BAD_"]
# If bad intervals were found save
if bad_idx:
raw.annotations[bad_idx].save(filepath + "_badannotations.csv")
if os.path.isfile(filepath + "_badannotations.csv"):
annotations = mne.read_annotations(filepath + "_badannotations.csv")
raw.annotations.append(annotations.onset, annotations.duration, annotations.description)
# Set the bad channels for each subject
if subject == '001':
channels = ['F8'] # Maybe also FP2?
elif subject == '003':
channels = []
elif subject == '014':
channels = []
return channels
def run_ica(raw, dataset, subject, search='manual'):
"""
Runs Independent Component Analysis. Depending on the 'search' mode, it is either used to find bad ICs or to exclude
bad ICs
:param raw: the data to be preprocessed
:param dataset: the dataset currently viewed
:param subject: the subject currently viewed
:param search: default value 'manual': The user views different plots for all ICs found
'eog' : Uses the eog channels to find bad ICs
'done' : Applies the bad ICs that were found
"""
# First filter the data to remove slow drifts - this is done with 1Hz, as proposed by the mne Tutorial at:
# https://mne.tools/dev/auto_tutorials/preprocessing/plot_40_artifact_correction_ica.html#filtering-to-remove-slow-drifts
ica_raw = raw.copy()
ica_raw.filter(l_freq=1, h_freq=None)
# Then run ICA
ica = mne.preprocessing.ICA(method="fastica", random_state=123) # Use a random state for reproducable results
ica.fit(ica_raw, verbose=True)
if search == 'manual':
ica_raw.load_data()
# ica.plot_components(inst=ica_raw, ch_type='eeg', contours=0, topomap_args={'extrapolate': 'head'},
# psd_args={'fmin': 0, 'fmax': 80})
ica.plot_properties(inst=ica_raw, dB=False, topomap_args={'extrapolate': 'head', 'contours': 0},
psd_args={'fmin': 0, 'fmax': 50}, picks=['eeg'])
ica.plot_sources(ica_raw)
elif search == 'eog':
eog_indices, _ = ica.find_bads_eog(raw)
ica.exclude = eog_indices
print('BAD COMPONENTS VIA EOG: ' + str(eog_indices))
ica.plot_overlay(ica_raw, exclude=eog_indices)
elif search == 'done':
exclude = None
if subj == '001':
exclude = [0, 1, 2, 4, 8, 14, 16, 25] # Through eog: 0,1
elif subj == '003':
exclude = [0, 2] # Through eog: 0, 2
elif subj == '014':
exclude = [0, 1, 9] # Through eog: 0,1
# Apply ica to the raw object
raw.load_data()
# ica.plot_overlay(ica_raw,
# title="Signals before (red) applying ICA and after (black) applying ICA on subject 003",
# exclude=exclude)
raw = ica.apply(raw, exclude=exclude)
# Lastly save the ica to a file
folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
filepath = folder + "sub-" + subject + "_task-" + dataset
ica.save(filepath + "-ica.fif")
return raw
if __name__ == '__main__':
ds = 'N170'
for i in range(1, 41):
subj = "0" + str(i)
if len(str(i)) == 1:
subj = "0" + subj
data = load_subject(subj, ds)
# Load data into memory
data.load_data()
# Filter data with a bandpass filter
filter_data(data)
if subj in ["001", "003", "014"]:
# Manual preprocessing
# Clean the data
b_ch = clean_data(data, subj, ds, True)
# Run ICA
data.set_channel_types({'HEOG_left': 'eog', 'HEOG_right': 'eog', 'VEOG_lower': 'eog'})
data.set_montage('standard_1020', match_case=False)
data = run_ica(data, ds, subj, 'done')
else:
# Provided cleaning and preprocessing information
ann, b_ch, ica_pre, bad_component = load_given_preprocessing_data(subj, ds)
data.annotations.append(ann.onset, ann.duration, ann.description)
data = ica_pre.apply(data, exclude=bad_component)
# Interpolate bad channels
data.interpolate_bads(b_ch)
# Re-Reference the data
data_re = data.copy().set_eeg_reference('average')
# Save preprocessed and cleaned data set
save_subject(data_re, subj, ds)