import random
from .FolderDataset import FolderDataset
[docs]class ClonedFolderDataset(FolderDataset):
"""
Dataset that can be initialized with a dictionary of internal parameters (useful when trying to clone a FolderDataset)
:param data: (list):
list of data on which the dataset operates
:param meta_data: (dict):
parameters that correspond to the target dataset's attributes
:param kwargs: (args):
variable set of key-value pairs to set as attributes for the dataset
"""
def __init__(self, data, meta_data, **kwargs):
if len(data) == 0:
raise (RuntimeError('No data provided'))
print('Initializing with %i data items' % len(data))
self.data = data
# Source: https://stackoverflow.com/questions/2466191/set-attributes-from-dictionary-in-python
# generic way of initializing the object
for key in meta_data:
setattr(self, key, meta_data[key])
for key in kwargs:
setattr(self, key, kwargs[key])
[docs]def random_split_dataset(orig_dataset, splitRatio=0.8, random_seed=None):
'''
Randomly split the given dataset into two datasets based on the provided ratio
:param orig_dataset: (UsefulDataset):
dataset to split (of type pywick.datasets.UsefulDataset)
:param splitRatio: (float):
ratio to use when splitting the data
:param random_seed: (int):
random seed for replicability of results
:return: tuple of split ClonedFolderDatasets
'''
random.seed(a=random_seed)
# not cloning the dictionary at this point... maybe it should be?
orig_dict = orig_dataset.getmeta_data()
part1 = []
part2 = []
for i, item in enumerate(orig_dataset.getdata()):
if random.random() < splitRatio:
part1.append(item)
else:
part2.append(item)
return ClonedFolderDataset(part1, orig_dict), ClonedFolderDataset(part2, orig_dict)