Initial commit
This commit is contained in:
182
preprocessing_and_cleaning.py
Normal file
182
preprocessing_and_cleaning.py
Normal file
@@ -0,0 +1,182 @@
|
||||
import os
|
||||
import mne
|
||||
|
||||
from mne_bids import (BIDSPath, read_raw_bids)
|
||||
from utils.ccs_eeg_semesterproject import load_precomputed_badData, load_precomputed_ica
|
||||
from utils.ccs_eeg_utils import read_annotations_core
|
||||
|
||||
|
||||
def load_subject(subject, dataset):
|
||||
"""
|
||||
Load the eeg data of a subject
|
||||
:param subject: The subject of which the data will be loaded
|
||||
:param dataset: The dataset which will be loaded
|
||||
:return: the subject data
|
||||
"""
|
||||
bids_path = BIDSPath(subject=subject, task=dataset, session=dataset, datatype='eeg', suffix='eeg',
|
||||
root='Dataset\\' + dataset)
|
||||
raw = read_raw_bids(bids_path)
|
||||
# Add annotations
|
||||
read_annotations_core(bids_path, raw)
|
||||
return raw
|
||||
|
||||
|
||||
def load_given_preprocessing_data(subject, dataset):
|
||||
"""
|
||||
Loads given pre-processing information for a given subject.
|
||||
This is used for all subjects which were not manually preprocessed
|
||||
:param subject: The subject to load the data for
|
||||
:param dataset: The dataset currently viewed
|
||||
:return: The bad annotations, bad channels, ica object, bad ICs
|
||||
"""
|
||||
anno, bc = load_precomputed_badData("Dataset\\" + dataset + "\\", subject,
|
||||
dataset) # Loads annotations and bad channels
|
||||
ica, bad_comp = load_precomputed_ica("Dataset\\" + dataset + "\\", subject,
|
||||
dataset) # Loads ica and bad components
|
||||
return anno, bc, ica, bad_comp
|
||||
|
||||
|
||||
def save_subject(raw, subject, dataset):
|
||||
"""
|
||||
Save a raw object to a .fif file
|
||||
:param raw: the raw object to be saved
|
||||
:param subject: the subject, which the raw object belongs to
|
||||
:param dataset: the dataset currently viewed
|
||||
"""
|
||||
folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
|
||||
filepath = folder + "sub-" + subject + "_task-" + dataset
|
||||
raw.save(filepath + "_cleaned.fif", overwrite=True)
|
||||
|
||||
|
||||
def filter_data(raw):
|
||||
"""
|
||||
Filter the data of a single subject with a bandpass filter.
|
||||
The lower bound ist 0.5Hz to compensate the slow drifts.
|
||||
The upper bound is 50Hz to compensate the high frequencies, including the power line spike at 60Hz
|
||||
:param raw: The data to be filtered
|
||||
:return: The filtered data
|
||||
"""
|
||||
raw.filter(0.5, 48, fir_design='firwin')
|
||||
return raw
|
||||
|
||||
|
||||
def clean_data(raw, subject, dataset, cleaned=False):
|
||||
"""
|
||||
Clean the data of a single subject, meaning finding the bad segments and channels of a subject.
|
||||
If these were already found, they are loaded onto the data
|
||||
:param raw: the subject data
|
||||
:param subject: the subject which data will be viewed
|
||||
:param cleaned: If True the data was already viewed and the 'BAD_' annotations as well as the bad channels will be loaded
|
||||
:return: the bad channels
|
||||
"""
|
||||
channels = None
|
||||
folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
|
||||
filepath = folder + "sub-" + subject + "_task-" + dataset
|
||||
|
||||
# If nothing was marked yet, plot the data to mark bad segments
|
||||
if not cleaned:
|
||||
raw.plot(n_channels=len(raw.ch_names), block=True, scalings=40e-6)
|
||||
# Get indices of bad annotations
|
||||
bad_idx = [idx for (idx, annot) in enumerate(raw.annotations) if annot['description'] == "BAD_"]
|
||||
# If bad intervals were found save
|
||||
if bad_idx:
|
||||
raw.annotations[bad_idx].save(filepath + "_badannotations.csv")
|
||||
|
||||
if os.path.isfile(filepath + "_badannotations.csv"):
|
||||
annotations = mne.read_annotations(filepath + "_badannotations.csv")
|
||||
raw.annotations.append(annotations.onset, annotations.duration, annotations.description)
|
||||
|
||||
# Set the bad channels for each subject
|
||||
if subject == '001':
|
||||
channels = ['F8'] # Maybe also FP2?
|
||||
elif subject == '003':
|
||||
channels = []
|
||||
elif subject == '014':
|
||||
channels = []
|
||||
|
||||
return channels
|
||||
|
||||
|
||||
def run_ica(raw, dataset, subject, search='manual'):
|
||||
"""
|
||||
Runs Independent Component Analysis. Depending on the 'search' mode, it is either used to find bad ICs or to exclude
|
||||
bad ICs
|
||||
:param raw: the data to be preprocessed
|
||||
:param dataset: the dataset currently viewed
|
||||
:param subject: the subject currently viewed
|
||||
:param search: default value 'manual': The user views different plots for all ICs found
|
||||
'eog' : Uses the eog channels to find bad ICs
|
||||
'done' : Applies the bad ICs that were found
|
||||
"""
|
||||
# First filter the data to remove slow drifts - this is done with 1Hz, as proposed by the mne Tutorial at:
|
||||
# https://mne.tools/dev/auto_tutorials/preprocessing/plot_40_artifact_correction_ica.html#filtering-to-remove-slow-drifts
|
||||
ica_raw = raw.copy()
|
||||
ica_raw.filter(l_freq=1, h_freq=None)
|
||||
|
||||
# Then run ICA
|
||||
ica = mne.preprocessing.ICA(method="fastica", random_state=123) # Use a random state for reproducable results
|
||||
ica.fit(ica_raw, verbose=True)
|
||||
|
||||
if search == 'manual':
|
||||
ica_raw.load_data()
|
||||
# ica.plot_components(inst=ica_raw, ch_type='eeg', contours=0, topomap_args={'extrapolate': 'head'},
|
||||
# psd_args={'fmin': 0, 'fmax': 80})
|
||||
ica.plot_properties(inst=ica_raw, dB=False, topomap_args={'extrapolate': 'head', 'contours': 0},
|
||||
psd_args={'fmin': 0, 'fmax': 50}, picks=['eeg'])
|
||||
ica.plot_sources(ica_raw)
|
||||
elif search == 'eog':
|
||||
eog_indices, _ = ica.find_bads_eog(raw)
|
||||
ica.exclude = eog_indices
|
||||
print('BAD COMPONENTS VIA EOG: ' + str(eog_indices))
|
||||
ica.plot_overlay(ica_raw, exclude=eog_indices)
|
||||
elif search == 'done':
|
||||
exclude = None
|
||||
if subj == '001':
|
||||
exclude = [0, 1, 2, 4, 8, 14, 16, 25] # Through eog: 0,1
|
||||
elif subj == '003':
|
||||
exclude = [0, 2] # Through eog: 0, 2
|
||||
elif subj == '014':
|
||||
exclude = [0, 1, 9] # Through eog: 0,1
|
||||
# ica.plot_overlay(ica_raw, exclude=exclude) # Plot differences through exclude
|
||||
# ica.exclude = exclude
|
||||
# Apply ica to the raw object
|
||||
raw.load_data()
|
||||
# ica.plot_overlay(ica_raw, exclude=exclude)
|
||||
raw = ica.apply(raw, exclude=exclude)
|
||||
# Lastly save the ica to a file
|
||||
folder = "Dataset\\" + dataset + "\\sub-" + subject + "\\ses-" + dataset + "\\eeg\\"
|
||||
filepath = folder + "sub-" + subject + "_task-" + dataset
|
||||
ica.save(filepath + "-ica.fif")
|
||||
return raw
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
ds = 'N170'
|
||||
for i in range(1, 41):
|
||||
subj = "0" + str(i)
|
||||
if len(str(i)) == 1:
|
||||
subj = "0" + subj
|
||||
data = load_subject(subj, ds)
|
||||
# Load data into memory
|
||||
data.load_data()
|
||||
# Filter data with a bandpass filter
|
||||
filter_data(data)
|
||||
if subj in ["001", "003", "014"]:
|
||||
# Manual preprocessing
|
||||
# Clean the data
|
||||
b_ch = clean_data(data, subj, ds, True)
|
||||
# Run ICA
|
||||
data.set_channel_types({'HEOG_left': 'eog', 'HEOG_right': 'eog', 'VEOG_lower': 'eog'})
|
||||
data.set_montage('standard_1020', match_case=False)
|
||||
data = run_ica(data, ds, subj, 'done')
|
||||
else:
|
||||
# Provided cleaning and preprocessing information
|
||||
ann, b_ch, ica_pre, bad_component = load_given_preprocessing_data(subj, ds)
|
||||
data.annotations.append(ann.onset, ann.duration, ann.description)
|
||||
data = ica_pre.apply(data, exclude=bad_component)
|
||||
# Interpolate bad channels
|
||||
data.interpolate_bads(b_ch)
|
||||
# Re-Reference the data
|
||||
data_re = data.copy().set_eeg_reference('average')
|
||||
# Save preprocessed and cleaned data set
|
||||
save_subject(data_re, subj, ds)
|
||||
Reference in New Issue
Block a user