Skip to content

Db

Generate snailz SQLite database from data files.

db(options)

Main driver for database generator.

Parameters:

Name Type Description Default
options Namespace

controlling options.

required
  • dbfile: output database file.
  • assays: generated assay data.
  • samples: generated sample data.
  • sites: site parameters.
  • staff: generated staff data.
  • surveys: survey parameters.
Source code in snailz/db.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def db(options: Namespace) -> None:
    '''Main driver for database generator.

    Args:
        options: controlling options.

    -   dbfile: output database file.
    -   assays: generated assay data.
    -   samples: generated sample data.
    -   sites: site parameters.
    -   staff: generated staff data.
    -   surveys: survey parameters.
    '''
    _create_schema(options.dbfile)

    url = f'sqlite:///{options.dbfile}'

    _csv_to_db(url, 'staff', options.staff)
    _csv_to_db(url, 'sample', options.samples)
    _csv_to_db(url, 'site', options.sites)
    _csv_to_db(url, 'survey', options.surveys, dates=['date'], columns=['survey_id', 'site_id', 'date'])

    assays = json.load(open(options.assays, 'r'))
    _json_to_db(url, assays, 'experiment', dates=['start', 'end'])
    _json_to_db(url, assays, 'performed')
    _json_to_db(url, assays, 'plate', dates=['date'])
    _json_to_db(url, assays, 'invalidated', dates=['date'])

_create_schema(dbfile)

Create database schema.

Parameters:

Name Type Description Default
dbfile str

database file path.

required
Source code in snailz/db.py
105
106
107
108
109
110
111
112
def _create_schema(dbfile: str) -> None:
    '''Create database schema.

    Args:
        dbfile: database file path.
    '''
    connection = sqlite3.connect(dbfile)
    connection.executescript(SCHEMA)

_csv_to_db(url, name, source, dates=[], columns=[])

Fill table from CSV.

Parameters:

Name Type Description Default
url str

connection URL for database.

required
name str

table name.

required
source str

path to CSV source file.

required
dates list

columns to convert to dates.

[]
columns list

subset of columns to store in table.

[]
Source code in snailz/db.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def _csv_to_db(url: str, name: str, source: str, dates: list = [], columns: list = []) -> None:
    '''Fill table from CSV.

    Args:
        url: connection URL for database.
        name: table name.
        source: path to CSV source file.
        dates: columns to convert to dates.
        columns: subset of columns to store in table.
    '''
    df = pl.read_csv(source)
    if columns:
        df = df[columns]
    df = _convert_dates(df, dates)
    df.write_database(name, url, if_table_exists='append')

_json_to_db(url, data, name, dates=[])

Fill table from JSON.

Parameters:

Name Type Description Default
url str

connection URL for database.

required
data dict

JSON data.

required
name str

table name (must be field in data).

required
dates list

columns to convert to dates.

[]
Source code in snailz/db.py
132
133
134
135
136
137
138
139
140
141
142
143
def _json_to_db(url: str, data: dict, name: str, dates: list = []) -> None:
    '''Fill table from JSON.

    Args:
        url: connection URL for database.
        data: JSON data.
        name: table name (must be field in `data`).
        dates: columns to convert to dates.
    '''
    df = pl.DataFrame(data[name])
    df = _convert_dates(df, dates)
    df.write_database(name, url, if_table_exists='append')

_convert_dates(df, dates)

Convert text columns to dates.

Parameters:

Name Type Description Default
df DataFrame

input dataframe.

required
dates list

list of names of columns to convert.

required

Returns:

Type Description
DataFrame

Updated dataframe.

Source code in snailz/db.py
146
147
148
149
150
151
152
153
154
155
156
157
158
def _convert_dates(df: pl.DataFrame, dates: list) -> pl.DataFrame:
    '''Convert text columns to dates.

    Args:
        df: input dataframe.
        dates: list of names of columns to convert.

    Returns:
        Updated dataframe.
    '''
    for colname in dates:
        df = df.with_columns(pl.col(colname).str.to_date(DATE_FORMAT))
    return df