Skip to content

Assays

Generate snailz assays.

DateTimeEncoder

Bases: JSONEncoder

Encode date and datetime objects as JSON.

Source code in snailz/assays.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
class DateTimeEncoder(json.JSONEncoder):
    '''Encode date and datetime objects as JSON.'''

    def default(self, obj: date|datetime) -> str:
        '''Encode date or datetime.

        Args:
            obj: what to encode.

        Returns:
            String representation.
        '''
        if isinstance(obj, (date, datetime)):
            return obj.isoformat()

default(obj)

Encode date or datetime.

Parameters:

Name Type Description Default
obj date | datetime

what to encode.

required

Returns:

Type Description
str

String representation.

Source code in snailz/assays.py
18
19
20
21
22
23
24
25
26
27
28
def default(self, obj: date|datetime) -> str:
    '''Encode date or datetime.

    Args:
        obj: what to encode.

    Returns:
        String representation.
    '''
    if isinstance(obj, (date, datetime)):
        return obj.isoformat()

assays(options)

Main driver for assay generation.

  1. Load previously-generated samples and staff.
  2. Generate experiments for some samples, each performed by one or more staff.
  3. Save.

Parameters:

Name Type Description Default
options Namespace

options object.

required
Source code in snailz/assays.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def assays(options: Namespace) -> None:
    '''Main driver for assay generation.

    1.  Load previously-generated samples and staff.
    2.  Generate experiments for some samples, each performed by one or more staff.
    3.  Save.

    Args:
        options: options object.
    '''
    assert options.params != options.outfile, 'Cannot use same filename for options and parameters'
    options.params = load_params(AssayParams, options.params)
    random.seed(options.params.seed)

    mutant_status = _reload_mutant_status(options)
    staff_ids = _reload_staff(options)
    result = _make_experiments(options.params, mutant_status, staff_ids)
    _save(options.outfile, result)

_reload_mutant_status(options)

Re-create sample genomic information.

Parameters:

Name Type Description Default
options Namespace

controlling options.

required

Returns:

Type Description
list

List of Booleans showing which samples are for mutant snails.

Source code in snailz/assays.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def _reload_mutant_status(options: Namespace) -> list:
    '''Re-create sample genomic information.

    Args:
        options: controlling options.

    Returns:
        List of Booleans showing which samples are for mutant snails.
    '''
    genomes = json.loads(Path(options.genomes).read_text())
    susc_loc = genomes['susceptible_loc']
    susc_base = genomes['susceptible_base']
    samples = pl.read_csv(options.samples)
    return [g[susc_loc] == susc_base for g in samples['sequence']]

_reload_staff(options)

Re-load staff information.

Parameters:

Name Type Description Default
options Namespace

controlling options.

required

Returns:

Type Description
DataFrame

Dataframe with staff information.

Source code in snailz/assays.py
67
68
69
70
71
72
73
74
75
76
def _reload_staff(options: Namespace) -> pl.DataFrame:
    '''Re-load staff information.

    Args:
        options: controlling options.

    Returns:
        Dataframe with staff information.
    '''
    return pl.read_csv(options.staff)['staff_id'].to_list()

_make_experiments(params, mutant_status, staff_ids)

Create experiments and their data.

Parameters:

Name Type Description Default
params AssayParams

assay generation parameters.

required
mutant_status list

list showing which samples are mutants.

required
staff_ids list

list of staff IDs.

required

Returns:

Type Description
dict

Dictionary holding data to serialize.

Source code in snailz/assays.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def _make_experiments(params: AssayParams, mutant_status: list, staff_ids: list) -> dict:
    '''Create experiments and their data.

    Args:
        params: assay generation parameters.
        mutant_status: list showing which samples are mutants.
        staff_ids: list of staff IDs.

    Returns:
        Dictionary holding data to serialize.
    '''

    # Setup.
    kinds = list(params.assay_types)
    experiments = []
    performed = []
    plates = []

    # Which samples have experiments been done on?
    num_samples = len(mutant_status)
    keepers = set(random.sample(list(range(num_samples)), k=int(params.fraction * num_samples)))

    filename_gen = _make_random_filename_generator(params)
    for i, flag in enumerate(mutant_status):
        # Skip samples that aren't experimented on.
        if i not in keepers:
            continue

        # Create basic facts about experiment.
        sample_id = i + 1
        kind = random.choice(kinds)
        started, ended = _random_experiment_duration(params, kind)
        experiments.append(
            {'sample_id': sample_id, 'kind': kind, 'start': _round_date(started), 'end': _round_date(ended)}
        )

        # Keep track of staff involved in experiment.
        num_staff = random.randint(*params.assay_staff)
        performed.extend(
            [{'staff_id': s, 'sample_id': sample_id} for s in random.sample(staff_ids, num_staff)]
        )

        # Only generate plates for experiments that have finished.
        if ended is not None:
            plates.extend(
                _random_plates(params, kind, sample_id, len(plates), started, filename_gen)
            )

    # Invalidate some plates.
    invalidated = _invalidate_plates(params, staff_ids, plates)

    # Return structure with results.
    return {
        'experiment': experiments,
        'performed': performed,
        'plate': plates,
        'invalidated': invalidated
    }

_invalidate_plates(params, staff_ids, plates)

Invalidate a random set of plates.

Parameters:

Name Type Description Default
params AssayParams

assay generation parameters.

required
staff_ids int

list of staff IDs.

required
plates list

list of generated plates.

required

Returns:

Type Description
list

List of dictionaries describing invalidated plates.

Source code in snailz/assays.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def _invalidate_plates(params: AssayParams, staff_ids: int, plates: list) -> list:
    '''Invalidate a random set of plates.

    Args:
        params: assay generation parameters.
        staff_ids: list of staff IDs.
        plates: list of generated plates.

    Returns:
        List of dictionaries describing invalidated plates.
    '''
    selected = [
        (i, p['date']) for (i, p) in enumerate(plates) if random.random() < params.invalid
    ]
    return [
        {
            'plate_id': plate_id,
            'staff_id': random.choice(staff_ids),
            'date': _random_date_interval(exp_date, params.enddate),
        }
        for (plate_id, exp_date) in selected
    ]

_make_random_filename_generator(params)

Create a random filename generator.

Parameters:

Name Type Description Default
params AssayParams

assay generation parameters.

required

Returns:

Type Description
callable

Unique random filename each time generator is invoked.

Source code in snailz/assays.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def _make_random_filename_generator(params: AssayParams) -> callable:
    '''Create a random filename generator.

    Args:
        params: assay generation parameters.

    Returns:
        Unique random filename each time generator is invoked.
    '''
    filenames = set([''])
    result = ''
    while True:
        while result in filenames:
            stem = ''.join(random.choices(string.hexdigits, k=params.filename_length)).lower()
            result = f'{stem}.csv'
        filenames.add(result)
        yield result

_random_experiment_duration(params, kind)

Choose random start date and end date for experiment.

The start date is uniformly selected from the experiment period. The end date is the same as or later than the start date, and None if the experiment hasn't finished.

Parameters:

Name Type Description Default
params AssayParams

assay generation parameters.

required
kind str

experimental procedure used.

required

Returns:

Type Description
tuple

A pair with a start date and either an end date or None.

Source code in snailz/assays.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
def _random_experiment_duration(params: AssayParams, kind: str) -> tuple:
    '''Choose random start date and end date for experiment.

    The start date is uniformly selected from the experiment period.
      The end date is the same as or later than the start date,
      and `None` if the experiment hasn't finished.

    Args:
        params: assay generation parameters.
        kind: experimental procedure used.

    Returns:
        A pair with a start date and either an end date or `None`.
    '''
    start = random.uniform(params.startdate.timestamp(), params.enddate.timestamp())
    start = datetime.fromtimestamp(start, tz=timezone.utc)
    duration = timedelta(days=random.randint(*params.assay_duration))
    end = start + duration
    end = None if end > params.enddate else end
    return start, end

_random_plates(params, kind, sample_id, start_id, start_date, filename_gen)

Generate random plate data.

Parameters:

Name Type Description Default
params AssayParams

assay generation parameters.

required
kind str

experimental procedure used.

required
sample_id int

sample used in the experiment.

required
start_id int

starting ID of plates.

required
start_date date

when experiment started.

required
filename_gen str

random filename generator

required

Returns:

Type Description
list

List of dictionaries of plate data.

Source code in snailz/assays.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def _random_plates(params: AssayParams, kind: str, sample_id: int, start_id: int, start_date: date, filename_gen: str) -> list:
    '''Generate random plate data.

    Args:
        params: assay generation parameters.
        kind: experimental procedure used.
        sample_id: sample used in the experiment.
        start_id: starting ID of plates.
        start_date: when experiment started.
        filename_gen: random filename generator

    Returns:
        List of dictionaries of plate data.
    '''
    return [
        {
            'plate_id': start_id + i + 1,
            'sample_id': sample_id,
            'date': _random_date_interval(start_date, params.enddate),
            'filename': next(filename_gen),
        }
        for i in range(random.randint(*params.assay_plates))
    ]

_random_date_interval(start_date, end_date)

Choose a random date (inclusive).

Parameters:

Name Type Description Default
start_date date

earliest allowed date.

required
end_date date

last possible date.

required

Returns:

Type Description
date

Randomly-selected date.

Source code in snailz/assays.py
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def _random_date_interval(start_date: date, end_date: date) -> date:
    '''Choose a random date (inclusive).

    Args:
        start_date: earliest allowed date.
        end_date: last possible date.

    Returns:
        Randomly-selected date.
    '''
    # Convert date to datetime at midnight UTC
    start_datetime = datetime.combine(start_date, time.min).replace(tzinfo=pytz.UTC)
    end_datetime = datetime.combine(end_date, time.max).replace(tzinfo=pytz.UTC)

    choice_timestamp = random.uniform(start_datetime.timestamp(), end_datetime.timestamp())
    choice = datetime.fromtimestamp(choice_timestamp, tz=pytz.UTC)
    return _round_date(choice)

_round_date(raw)

Round time to whole day.

Parameters:

Name Type Description Default
raw datetime | None

starting datetime (or None).

required

Returns:

Type Description
date | None

Input rounded to nearest whole day (or None).

Source code in snailz/assays.py
246
247
248
249
250
251
252
253
254
255
def _round_date(raw: datetime|None) -> date|None:
    '''Round time to whole day.

    Args:
        raw: starting datetime (or `None`).

    Returns:
        Input rounded to nearest whole day (or `None`).
    '''
    return None if raw is None else raw.date()

_save(outfile, result)

Save or show generated data.

Parameters:

Name Type Description Default
outfile str

where to write (or None for standard output).

required
result dict

data to write.

required
Source code in snailz/assays.py
257
258
259
260
261
262
263
264
265
266
267
268
def _save(outfile: str, result: dict) -> None:
    '''Save or show generated data.

    Args:
        outfile: where to write (or `None` for standard output).
        result: data to write.
    '''
    as_text = json.dumps(result, indent=4, cls=DateTimeEncoder)
    if outfile:
        Path(outfile).write_text(as_text)
    else:
        print(as_text)