Skip to content

Mangle

Mangle readings plate files to simulate poor formatting.

mangle(options)

Main driver for mangling readings plates.

  • options.dbfile: path to SQLite database.
  • options.outdir: output directory.
  • options.tidy: directory containing tidy readings files.

Mangled files are written to the specified output directory. The files have the same name as the original (tidy) files.

Parameters:

Name Type Description Default
options Namespace

see above.

required
Source code in snailz/mangle.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def mangle(options: Namespace) -> None:
    '''Main driver for mangling readings plates.

    -   options.dbfile: path to SQLite database.
    -   options.outdir: output directory.
    -   options.tidy: directory containing tidy readings files.

    Mangled files are written to the specified output directory.
      The files have the same name as the original (tidy) files.

    Args:
        options: see above.
    '''
    con = sqlite3.connect(options.dbfile)
    con.row_factory = sqlite3.Row
    records = list(dict(r) for r in con.execute(SELECT).fetchall())
    random.seed(len(records))
    records = _consolidate(records)
    for rec in records:
        _mangle_file(options, rec)

_consolidate(records)

Pick a single (plate, staff) pair for each plate.

Parameters:

Name Type Description Default
records list

list of (plate, staff) pairs from database.

required

Returns:

Type Description
list

One (plate, staff) pair for each plate.

Source code in snailz/mangle.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def _consolidate(records: list) -> list:
    '''Pick a single (plate, staff) pair for each plate.

    Args:
        records: list of (plate, staff) pairs from database.

    Returns:
        One (plate, staff) pair for each plate.
    '''
    grouped = {}
    for r in records:
        if r['plate_id'] not in grouped:
            grouped[r['plate_id']] = []
        grouped[r['plate_id']].append(r)

    result = []
    for group in grouped.values():
        result.append(random.choice(group))
    return result

_mangle_file(options, record)

Mangle a single file.

  1. Read file as header and body sections.
  2. Apply randomly-chosen mangling functions to modify in place.
  3. Save result.

Parameters:

Name Type Description Default
options Namespace

see above.

required
record dict

dictionary of database query results for a single record.

required
Source code in snailz/mangle.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def _mangle_file(options: Namespace, record: dict) -> None:
    '''Mangle a single file.

    1.  Read file as header and body sections.
    2.  Apply randomly-chosen mangling functions to modify in place.
    3.  Save result.

    Args:
        options: see above.
        record: dictionary of database query results for a single record.
    '''
    sections = _read_sections(options, record['filename'])
    for func in (_do_staff_name, _do_date, _do_footer, _do_indent,):
        if random.random() < func.prob:
            func(record, sections)
    _write_sections(options, record['filename'], sections)

_do_date(record, sections)

Mangle data in place by adding date in header.

Parameters:

Name Type Description Default
record dict

entire record data.

required
sections dict

dictionary of header, body, and footer.

required
Source code in snailz/mangle.py
89
90
91
92
93
94
95
96
97
98
99
def _do_date(record: dict, sections: dict) -> None:
    '''Mangle data in place by adding date in header.

    Args:
        record: entire record data.
        sections: dictionary of header, body, and footer.
    '''
    row = [''] * len(sections['header'][0])
    row[0] = 'Date'
    row[1] = record['date']
    sections['header'].append(row)

Mangle data in place by adding a footer.

Parameters:

Name Type Description Default
record dict

entire record data.

required
sections dict

dictionary of header, body, and footer.

required
Source code in snailz/mangle.py
103
104
105
106
107
108
109
110
111
112
113
def _do_footer(record: dict, sections: dict) -> None:
    '''Mangle data in place by adding a footer.

    Args:
        record: entire record data.
        sections: dictionary of header, body, and footer.
    '''
    blank = [''] * len(sections['header'][0])
    foot = [''] * len(sections['header'][0])
    foot[0] = record['staff_id']
    sections['footer'] = [blank, foot]

_do_indent(record, sections)

Mangle data in place by indenting all rows by one space

Parameters:

Name Type Description Default
record dict

entire record data.

required
sections dict

dictionary of header, body, and footer.

required
Source code in snailz/mangle.py
117
118
119
120
121
122
123
124
125
126
def _do_indent(record: dict, sections: dict) -> None:
    '''Mangle data in place by indenting all rows by one space

    Args:
        record: entire record data.
        sections: dictionary of header, body, and footer.
    '''
    for section in sections.values():
        for row in section:
            row.insert(0, '')

_do_staff_name(record, sections)

Mangle data in place by adding staff name.

Parameters:

Name Type Description Default
record dict

entire record data.

required
sections dict

dictionary of header, body, and footer.

required
Source code in snailz/mangle.py
130
131
132
133
134
135
136
137
def _do_staff_name(record: dict, sections: dict) -> None:
    '''Mangle data in place by adding staff name.

    Args:
        record: entire record data.
        sections: dictionary of header, body, and footer.
    '''
    sections['header'][0][-2] = f'{record["personal"]} {record["family"]}'

_read_sections(options, filename)

Read tidy readings file and split into sections.

Parameters:

Name Type Description Default
options Namespace

see above.

required
filename str

file to read from.

required

Returns:

Type Description
dict

Dictionary with header, head-to-body spacing, body, and footer (empty in tidy file).

Source code in snailz/mangle.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def _read_sections(options: Namespace, filename: str) -> dict:
    '''Read tidy readings file and split into sections.

    Args:
        options: see above.
        filename: file to read from.

    Returns:
        Dictionary with header, head-to-body spacing, body, and footer (empty in tidy file).
    '''
    with open(Path(options.tidy, filename), 'r') as raw:
        rows = [row for row in csv.reader(raw)]
    return {
        'header': rows[0:1],
        'headspace': rows[1:2],
        'body': rows[2:],
        'footer': []
    }

_write_sections(options, filename, sections)

Write sections of mangled file to file.

Parameters:

Name Type Description Default
options Namespace

see above.

required
filename str

file to write to.

required
sections dict

dictionary of header, head-to-body spacing, body, and footer.

required
Source code in snailz/mangle.py
161
162
163
164
165
166
167
168
169
170
171
172
def _write_sections(options: Namespace, filename: str, sections: dict) -> None:
    '''Write sections of mangled file to file.

    Args:
        options: see above.
        filename: file to write to.
        sections: dictionary of header, head-to-body spacing, body, and footer.
    '''
    with open(Path(options.outdir, filename), 'w') as raw:
        writer = csv.writer(raw, lineterminator='\n')
        for section in sections.values():
            writer.writerows(section)