Source code for eegunity.unifieddataset
import copy
from eegunity.modules.batch.eeg_batch import EEGBatch
from eegunity.modules.correction.eeg_correction import EEGCorrection
from eegunity.modules.parser.eeg_parser import EEGParser
from eegunity.modules.llm_booster.eeg_llm_booster import EEGLLMBooster
from eegunity._share_attributes import _UDatasetSharedAttributes
[docs]
class UnifiedDataset(_UDatasetSharedAttributes):
"""
This is the kernel class to manage mutiple EEG datasets and associated processing tools.
Attributes:
-----------
dataset_path : str, optional
Path to the dataset (folder). Should not be provided alongside locator_path.
locator_path : str, optional
Path to the locator. Should not be provided alongside dataset_path.
is_unzip : bool, optional
If set to True, any Zip files in the specified dataset will be unzipped. Be aware that unzipping may modify the dataset.
domain_tag : str, optional
The domain tag identifies the dataset name and is required if you specify a dataset path.
verbose : str, optional
Level of verbosity for logging (default is 'CRITICAL').
eeg_parser : EEGParser
EEGParser module
eeg_batch : EEGBatch
EEGBatch module
eeg_correction : EEGCorrection
EEGCorrection module
llm_booster : EEGLLMBooster
EEGLLMBooster module
"""
def __init__(self, domain_tag: str = None, dataset_path: str = None, locator_path: str = None,
is_unzip: bool = True, verbose: str = 'CRITICAL'):
"""
Initialize the class with either dataset_path or locator_path. Only one of
these parameters should be provided. If dataset_path is provided, domain_tag is required.
Parameters:
-----------
domain_tag : str, optional
The domain tag identifies the dataset name. Note: Do not provide domain_tag if you are using locator_path.
dataset_path : str, optional
Path to the dataset (folder). Note: Do not provide dataset_path if you are using locator_path.
locator_path : str, optional
The file path to the locator (a CSV-like file) that stores all metadata for the UnifiedDataset in EEGUnity. Note: Do not provide locator_path if you are using dataset_path.
is_unzip : bool, optional
A flag indicating whether the dataset should be unzipped (default is True).
verbose : str, optional
The verbosity level for logging (default is 'CRITICAL').
Raises:
-------
ValueError
If both dataset_path and locator_path are provided, or neither is provided.
If dataset_path is provided without domain_tag.
"""
super().__init__()
# Ensure only one of dataset_path or locator_path is provided
if dataset_path and locator_path:
raise ValueError("Only one of 'dataset_path' or 'locator_path' can be provided, not both.")
if not dataset_path and not locator_path:
raise ValueError("One of 'dataset_path' or 'locator_path' must be provided.")
# Ensure domain_tag is provided when dataset_path is used
if dataset_path and not domain_tag:
raise ValueError("A 'domain_tag' must be provided when 'dataset_path' is specified.")
# Set attributes
self.set_shared_attr({'dataset_path': dataset_path})
self.set_shared_attr({'locator_path': locator_path})
self.set_shared_attr({'is_unzip': is_unzip})
self.set_shared_attr({'domain_tag': domain_tag})
self.set_shared_attr({'verbose': verbose})
# Initialize associated modules
self.eeg_parser = EEGParser(self)
self.eeg_batch = EEGBatch(self)
self.eeg_correction = EEGCorrection(self)
self.module_eeg_llm_booster = EEGLLMBooster(self)
[docs]
def copy(self):
"""
Create a deep copy of the UnifiedDataset instance.
Returns:
--------
UnifiedDataset
A deep copy of the current UnifiedDataset instance.
"""
return copy.deepcopy(self)
[docs]
def save_locator(self, path):
"""
Save the locator of this UnifiedDataset to a CSV file at the specified path. This file is helpful for checking the current status and metadata after data processing.
You can also reload the UnifiedDataset later by using this locator file, for example:
unified_dataset = UnifiedDataset(locator_path="your_locator_path")
Parameters:
-----------
path : str
The file path where the locator should be saved.
"""
self.get_shared_attr()['locator'].to_csv(path, index=False)
[docs]
def get_locator(self):
"""
Return the locator in DataFrame.
Returns:
--------
pandas.DataFrame
The locator DataFrame associated with the dataset.
"""
return self.get_shared_attr()['locator']
[docs]
def set_locator(self, new_locator):
"""
Set a new locator for this UnifiedDataset instance.
This allows you to update the metadata for the entire dataset without altering the original raw file.
Parameters:
-----------
new_locator : pandas.DataFrame
The new locator DataFrame to associate with the dataset.
"""
self.get_shared_attr()['locator'] = new_locator