This page shows the technical details of what happened when authorised researcher Christine Cunningham requested one or more actions to be run against real patient data in the project, within a secure environment.
By cross-referencing the indicated Requested Actions with the
Pipeline section below, you can infer what
security level
various outputs were written to. Outputs marked as
highly_sensitive
can never be viewed directly by a researcher; they can only
request that code runs against them. Outputs marked as
moderately_sensitive
can be viewed by an approved researcher by logging into a highly
secure environment. Only outputs marked as
moderately_sensitive
can be requested for release to the public, via a controlled
output review service.
Jobs
-
- Job identifier:
-
5qz4wa2ifnsin5lb
Pipeline
Show project.yaml
version: '3.0'
expectations:
population_size: 1000
actions:
generate_study_population_report_ethnicity:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_ethnicity_report --output-dir output/report --output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/report/input_ethnicity_report.csv.gz
### Curation check ###
curation_monthly:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_report
--index-date-range "2019-01-01 to 2019-01-01 by month"
--param frequency=monthly
--output-dir=output/curation
--output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/curation/input_report_2019-01-01.csv.gz
dataset_report_monthly:
run: python:latest python analysis/dataset_report.py
--input-files output/curation/input_report_2019-01-01.csv.gz
--output-dir output/curation/
--granularity "year"
needs: [curation_monthly]
outputs:
moderately_sensitive:
# Only output the single summary file
cohort_report: output/curation/input_report_2019-01-01.html
curation_weekly:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_report
--index-date-range "2022-07-01 to 2022-07-01 by week"
--param frequency=weekly
--output-dir=output/curation
--output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/curation/input_report_2022-07-01.csv.gz
dataset_report_weekly:
run: python:latest python analysis/dataset_report.py
--input-files output/curation/input_report_2022-07-01.csv.gz
--output-dir output/curation/
--granularity "day"
needs: [curation_weekly]
outputs:
moderately_sensitive:
# Only output the single summary file
cohort_report: output/curation/input_report_2022-07-01.html
curation_diagnostics:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_diagnostics_curation
--index-date-range "2019-01-01 to 2019-01-01 by month"
--output-dir=output/curation
--output-format=feather
outputs:
highly_sensitive:
cohort: output/curation/input_diagnostics_curation_2019-01-01.feather
dataset_report_diagnostics:
run: python:latest python analysis/dataset_report.py
--input-files output/curation/input_diagnostics_curation_2019-01-01.feather
--output-dir output/curation/
--granularity "day"
needs: [curation_diagnostics]
outputs:
moderately_sensitive:
# Only output the single summary file
cohort_report: output/curation/input_diagnostics_curation_2019-01-01.html
### End curation check ###
### MONTHLY ###
generate_study_population_report_monthly_0:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_report
--index-date-range "2018-01-01 to 2018-12-01 by month"
--param frequency=monthly
--output-dir=output/report
--output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/report/input_*-01.csv.gz
generate_study_population_report_monthly_1:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_report
--index-date-range "2019-01-01 to 2019-12-01 by month"
--param frequency=monthly
--output-dir=output/report
--output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/report/input*-01.csv.gz
generate_study_population_report_monthly_2:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_report
--index-date-range "2020-01-01 to 2020-12-01 by month"
--param frequency=monthly
--output-dir=output/report
--output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/report/inpu*-01.csv.gz
generate_study_population_report_monthly_3:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_report
--index-date-range "2021-01-01 to 2021-12-01 by month"
--param frequency=monthly
--output-dir=output/report
--output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/report/inp*-01.csv.gz
generate_study_population_report_monthly_4:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_report
--index-date-range "2022-01-01 to 2022-12-01 by month"
--param frequency=monthly
--output-dir=output/report
--output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/report/in*-01.csv.gz
join_cohorts_report:
run: >
python:latest python analysis/cohort_joiner.py
--lhs output/report/input_report_20*.csv.gz
--rhs output/report/input_ethnicity_report.csv.gz
--output-dir output/report/joined
needs: [
generate_study_population_report_monthly_0,
generate_study_population_report_monthly_1,
generate_study_population_report_monthly_2,
generate_study_population_report_monthly_3,
generate_study_population_report_monthly_4,
generate_study_population_report_ethnicity]
outputs:
highly_sensitive:
cohort: output/report/joined/input_report_20*.csv.gz
generate_measures_report:
run: cohortextractor:latest generate_measures --study-definition study_definition_report --output-dir=output/report/joined
needs: [join_cohorts_report]
outputs:
moderately_sensitive:
measure_csv: output/report/joined/measure_event_*_rate.csv
join_measures_rounded:
run: python:latest python analysis/join_and_round.py
--input-files output/report/joined/measure_*_rate.csv
--exclude-files output/report/joined/measure_*practice*_rate.csv
--exclude-files output/report/joined/measure_event_code*_rate.csv
--output-dir output/report/results
--output-name "measure_rounded.csv"
needs: [generate_measures_report]
outputs:
moderately_sensitive:
# Only output the single summary file
measure_csv: output/report/results/measure_rounded.csv
join_measures_unrounded:
run: python:latest python analysis/join_and_round.py
--input-files output/report/joined/measure_*practice*_rate.csv
--input-files output/report/joined/measure_event_code*_rate.csv
--output-dir output/report/results
--output-name "measure_unrounded.csv"
--allow-practice
--skip-round
needs: [generate_measures_report]
outputs:
moderately_sensitive:
# Only output the single summary file
measure_csv: output/report/results/measure_unrounded.csv
top_5_table_report:
run: >
python:latest python analysis/report/top_5_report.py
--input-file output/report/results/measure_unrounded.csv
--output-dir output/report/results
needs: [join_measures_unrounded]
outputs:
moderately_sensitive:
tables: output/report/results/top_5*.csv
figures: output/report/results/*top_5_codes_over_time.png
data: output/report/results/*top_5_codes_over_time.csv
plot_measure_report:
run: >
python:latest python analysis/report/plot_measures_report.py
--measure-path output/report/results/measure_rounded.csv
--output-dir output/report/results
needs: [join_measures_rounded]
outputs:
moderately_sensitive:
measure: output/report/results/*_bar_measures*.jpeg
panel_plots:
run: >
python:latest python analysis/report/create_panels.py
--input-file output/report/results/measure_rounded.csv
--practice-file output/report/results/measure_unrounded.csv
--output-dir output/report/results
needs: [join_measures_rounded, join_measures_unrounded]
outputs:
moderately_sensitive:
measure: output/report/results/*.png
deciles_tables: output/report/results/deciles_table*.csv
panel_plots_stacked_medications:
run: >
python:latest python analysis/report/panel_plots.py
--input-file output/report/results/measure_rounded.csv
--measures-list "event_amoxicillin_rate"
--measures-list "event_azithromycin_rate"
--measures-list "event_clarithromycin_rate"
--measures-list "event_erythromycin_rate"
--measures-list "event_phenoxymethylpenicillin_rate"
--measures-list "event_cefalexin_rate"
--measures-list "event_co_amoxiclav_rate"
--measures-list "event_flucloxacillin_rate"
--output-dir output/report/results
--output-name "all_medications_by_year"
--scale "rate"
--stack-years
needs: [join_measures_rounded]
outputs:
moderately_sensitive:
measure: output/report/results/all_medications_by_year.png
panel_plots_stacked_diagnostics:
run: >
python:latest python analysis/report/panel_plots.py
--input-file output/report/results/measure_rounded.csv
--measures-list "event_scarlet_fever_rate"
--measures-list "event_strep_a_sore_throat_rate"
--measures-list "event_invasive_strep_a_rate"
--output-dir output/report/results
--output-name "all_diagnostics_by_year"
--scale "rate"
--stack-years
needs: [join_measures_rounded]
outputs:
moderately_sensitive:
measure: output/report/results/all_diagnostics_by_year.png
event_counts_report:
run: >
python:latest python analysis/report/event_counts.py
--input-dir="output/report/joined/"
--output-dir="output/report/results"
--measures-list="amoxicillin"
--measures-list="azithromycin"
--measures-list="clarithromycin"
--measures-list="erythromycin"
--measures-list="phenoxymethylpenicillin"
--measures-list="flucloxacillin"
--measures-list="cefalexin"
--measures-list="co_amoxiclav"
--measures-list="scarlet_fever"
--measures-list="strep_a_sore_throat"
--measures-list="invasive_strep_a"
needs: [join_cohorts_report]
outputs:
moderately_sensitive:
measure: output/report/results/event_counts_*.json
### WEEKLY ###
generate_study_population_report_weekly_1:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_report
--index-date-range "2022-09-01 to 2022-11-09 by week"
--param frequency=weekly
--output-dir=output/report/weekly
--output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/report/weekly/input_*.csv.gz
generate_study_population_report_weekly_2:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_report
--index-date-range "2022-11-10 to 2023-01-25 by week"
--param frequency=weekly
--output-dir=output/report/weekly
--output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/report/weekly/input*.csv.gz
generate_study_population_report_weekly_3:
run: cohortextractor:latest generate_cohort
--study-definition study_definition_report_weekly
--index-date-range "2023-01-26 to 2023-02-01 by week"
--output-dir=output/report/weekly
--output-format=csv.gz
outputs:
highly_sensitive:
cohort: output/report/weekly/inpu*.csv.gz
join_cohorts_report_weekly:
run: >
python:latest python analysis/cohort_joiner.py
--lhs output/report/weekly/input_report_20*.csv.gz
--rhs output/report/input_ethnicity_report.csv.gz
--output-dir output/report/weekly/joined
needs: [generate_study_population_report_weekly_1, generate_study_population_report_weekly_2, generate_study_population_report_ethnicity]
outputs:
highly_sensitive:
cohort: output/report/weekly/joined/input_report_20*.csv.gz
generate_measures_report_weekly:
run: cohortextractor:latest generate_measures --study-definition study_definition_report --output-dir=output/report/weekly/joined
needs: [join_cohorts_report_weekly]
outputs:
moderately_sensitive:
measure_csv: output/report/weekly/joined/measure_event_*_rate.csv
generate_measures_report_weekly_1:
run: cohortextractor:latest generate_measures --study-definition study_definition_report_weekly --output-dir=output/report/weekly
needs: [generate_study_population_report_weekly_3]
outputs:
moderately_sensitive:
measure_csv: output/report/weekly/measure_event_*_rate.csv
join_measures_rounded_weekly:
run: python:latest python analysis/join_and_round.py
--input-files output/report/weekly/joined/measure_*_rate.csv
--input-files output/report/weekly/measure_*_rate.csv
--exclude-files output/report/weekly/joined/measure_*practice*_rate.csv
--exclude-files output/report/weekly/joined/measure_event_code*_rate.csv
--exclude-files output/report/weekly/measure_*practice*_rate.csv
--exclude-files output/report/weekly/measure_event_code*_rate.csv
--output-dir output/report/weekly/results
--output-name "measure_rounded_weekly.csv"
needs: [generate_measures_report_weekly, generate_measures_report_weekly_1]
outputs:
moderately_sensitive:
# Only output the single summary file
measure_csv: output/report/weekly/results/measure_rounded_weekly.csv
join_measures_unrounded_weekly:
run: python:latest python analysis/join_and_round.py
--input-files output/report/weekly/joined/measure_*practice*_rate.csv
--input-files output/report/weekly/joined/measure_event_code*_rate.csv
--input-files output/report/weekly/measure_*practice*_rate.csv
--input-files output/report/weekly/measure_event_code*_rate.csv
--output-dir output/report/weekly/results
--output-name "measure_unrounded_weekly.csv"
--allow-practice
--skip-round
needs: [generate_measures_report_weekly, generate_measures_report_weekly_1]
outputs:
moderately_sensitive:
# Only output the single summary file
measure_csv: output/report/weekly/results/measure_unrounded_weekly.csv
top_5_table_report_weekly:
run: >
python:latest python analysis/report/top_5_report.py
--input-file output/report/weekly/results/measure_unrounded_weekly.csv
--output-dir output/report/weekly/results
needs: [join_measures_unrounded_weekly]
outputs:
moderately_sensitive:
tables: output/report/weekly/results/top_5*.csv
figures: output/report/weekly/results/*top_5_codes_over_time.png
data: output/report/weekly/results/*top_5_codes_over_time.csv
plot_measure_report_weekly:
run: >
python:latest python analysis/report/plot_measures_report.py
--measure-path output/report/weekly/results/measure_rounded_weekly.csv
--output-dir output/report/weekly/results
needs: [join_measures_rounded_weekly]
outputs:
moderately_sensitive:
measure: output/report/weekly/results/*_bar_measures*.jpeg
panel_plots_weekly:
run: >
python:latest python analysis/report/create_panels.py
--input-file output/report/weekly/results/measure_rounded_weekly.csv
--practice-file output/report/weekly/results/measure_unrounded_weekly.csv
--output-dir output/report/weekly/results
needs: [join_measures_rounded_weekly, join_measures_unrounded_weekly]
outputs:
moderately_sensitive:
measure: output/report/weekly/results/*.png
deciles_tables: output/report/weekly/results/deciles_t*.csv
event_counts_report_weekly:
run: >
python:latest python analysis/report/event_counts.py
--input-dir="output/report/weekly/"
--output-dir="output/report/weekly/results"
--latest-period-only
--measures-list="amoxicillin"
--measures-list="azithromycin"
--measures-list="clarithromycin"
--measures-list="erythromycin"
--measures-list="phenoxymethylpenicillin"
--measures-list="flucloxacillin"
--measures-list="cefalexin"
--measures-list="co_amoxiclav"
--measures-list="scarlet_fever"
--measures-list="strep_a_sore_throat"
--measures-list="invasive_strep_a"
needs: [generate_study_population_report_weekly_3]
outputs:
moderately_sensitive:
measure: output/report/weekly/results/event_counts_*.json
generate_notebook:
run: jupyter:latest jupyter nbconvert /workspace/analysis/report/report.ipynb --execute --to html --output-dir=/workspace/output/report --ExecutePreprocessor.timeout=86400 --no-input
needs: [
event_counts_report,
top_5_table_report,
plot_measure_report,
panel_plots,
panel_plots_stacked_medications,
panel_plots_stacked_diagnostics
]
outputs:
moderately_sensitive:
notebook: output/report/report.html
generate_notebook_weekly:
run: jupyter:latest jupyter nbconvert /workspace/analysis/report/report_weekly.ipynb --execute --to html --output-dir=/workspace/output/report --ExecutePreprocessor.timeout=86400 --no-input
needs: [
event_counts_report_weekly,
top_5_table_report_weekly,
plot_measure_report_weekly,
panel_plots_weekly,
]
outputs:
moderately_sensitive:
notebook: output/report/report_weekly.html
Timeline
-
Created:
-
Started:
-
Finished:
-
Runtime: 00:01:43
These timestamps are generated and stored using the UTC timezone on the backend.
Job information
- Status
-
Succeeded
- Backend
- TPP
- Workspace
- strepa_scarlet
- Requested by
- Christine Cunningham
- Branch
- main
- Force run dependencies
- No
- Git commit hash
- 9c7c095
- Requested actions
-
-
generate_notebook_weekly
-