Skip to content

Job request: 14701

Workspace:
strepa_scarlet
ID:
f6yatq3stpxkmwa5

This page shows the technical details of what happened when authorised researcher Louis Fisher requested one or more actions to be run against real patient data in the project, within a secure environment.

By cross-referencing the indicated Requested Actions with the Pipeline section below, you can infer what security level various outputs were written to. Outputs marked as highly_sensitive can never be viewed directly by a researcher; they can only request that code runs against them. Outputs marked as moderately_sensitive can be viewed by an approved researcher by logging into a highly secure environment. Only outputs marked as moderately_sensitive can be requested for release to the public, via a controlled output review service.

Jobs

Pipeline

Show project.yaml
version: '3.0'

expectations:
  population_size: 1000

actions:
  generate_study_population_report_ethnicity:
    run: cohortextractor:latest generate_cohort 
      --study-definition study_definition_ethnicity_report --output-dir output/report --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/report/input_ethnicity_report.csv.gz

  ### Curation check ###
  curation_monthly:
    run: cohortextractor:latest generate_cohort
      --study-definition study_definition_report
      --index-date-range "2019-01-01 to 2019-01-01 by month"
      --param frequency=monthly
      --output-dir=output/curation
      --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/curation/input_report_2019-01-01.csv.gz

  dataset_report_monthly:
      run: python:latest python analysis/dataset_report.py
           --input-files output/curation/input_report_2019-01-01.csv.gz
           --output-dir output/curation/
           --granularity "year"
      needs: [curation_monthly]
      outputs:
        moderately_sensitive:
          # Only output the single summary file
          cohort_report: output/curation/input_report_2019-01-01.html

  curation_weekly:
    run: cohortextractor:latest generate_cohort
      --study-definition study_definition_report
      --index-date-range "2022-07-01 to 2022-07-01 by week"
      --param frequency=weekly
      --output-dir=output/curation
      --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/curation/input_report_2022-07-01.csv.gz

  dataset_report_weekly:
      run: python:latest python analysis/dataset_report.py
           --input-files output/curation/input_report_2022-07-01.csv.gz
           --output-dir output/curation/
           --granularity "day"
      needs: [curation_weekly]
      outputs:
        moderately_sensitive:
          # Only output the single summary file
          cohort_report: output/curation/input_report_2022-07-01.html
  ### End curation check ###

  ### MONTHLY ###
  generate_study_population_report_monthly_1:
    run: cohortextractor:latest generate_cohort
      --study-definition study_definition_report
      --index-date-range "2019-01-01 to 2019-12-01 by month"
      --param frequency=monthly
      --output-dir=output/report
      --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/report/input_*-01.csv.gz

  generate_study_population_report_monthly_2:
    run: cohortextractor:latest generate_cohort
      --study-definition study_definition_report
      --index-date-range "2020-01-01 to 2020-12-01 by month"
      --param frequency=monthly
      --output-dir=output/report
      --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/report/input*-01.csv.gz
  
  generate_study_population_report_monthly_3:
    run: cohortextractor:latest generate_cohort
      --study-definition study_definition_report
      --index-date-range "2021-01-01 to 2021-12-01 by month"
      --param frequency=monthly
      --output-dir=output/report
      --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/report/inpu*-01.csv.gz

  generate_study_population_report_monthly_4:
    run: cohortextractor:latest generate_cohort
      --study-definition study_definition_report
      --index-date-range "2022-01-01 to 2022-12-01 by month"
      --param frequency=monthly
      --output-dir=output/report
      --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/report/inp*-01.csv.gz

  join_cohorts_report:
    run: >
      python:latest python analysis/cohort_joiner.py
        --lhs output/report/input_report_20*.csv.gz
        --rhs output/report/input_ethnicity_report.csv.gz
        --output-dir output/report/joined
    needs: [
      generate_study_population_report_monthly_1, 
      generate_study_population_report_monthly_2,
      generate_study_population_report_monthly_3,
      generate_study_population_report_monthly_4,
      generate_study_population_report_ethnicity]
    outputs:
      highly_sensitive:
        cohort: output/report/joined/input_report_20*.csv.gz

  generate_measures_report:
    run: cohortextractor:latest generate_measures --study-definition study_definition_report --output-dir=output/report/joined
    needs: [join_cohorts_report]
    outputs:
      moderately_sensitive:
        measure_csv: output/report/joined/measure_event_*_rate.csv

  join_measures:
      run: python:latest python analysis/join_and_round.py
           --input-files output/report/joined/measure_*_rate.csv
           --output-dir output/report/results
           --output-name "measure_all.csv"
      needs: [generate_measures_report]
      outputs:
        moderately_sensitive:
          # Only output the single summary file
          measure_csv: output/report/results/measure_all.csv
          measure_csv_checking: output/report/results/checking_measure_all.csv

  top_5_table_report:
    run: >
      python:latest python analysis/report/top_5_report.py
      --input-file output/report/results/measure_all.csv
      --output-dir output/report/results
    needs: [join_measures]
    outputs:
      moderately_sensitive:
        tables: output/report/results/top_5*.csv

  plot_measure_report:
    run: >
      python:latest python analysis/report/plot_measures_report.py
      --measure-path output/report/results/measure_all.csv
      --output-dir output/report/results
    needs: [join_measures]
    outputs:
      moderately_sensitive:
        measure: output/report/results/*measures*.jpeg

  panel_plots:
    run: >
      python:latest python analysis/report/create_panels.py
      --input-file output/report/results/measure_all.csv
      --output-dir output/report/results
    needs: [join_measures]
    outputs:
      moderately_sensitive:
        measure: output/report/results/*.png


  panel_plots_event_plus_medication_scarlet:
    run: >
      python:latest python analysis/report/panel_plots.py
      --input-file output/report/results/measure_all.csv
      --measures-pattern "event_scarlet_fever_medication_any_2_weeks*rate"
      --output-dir output/report/results
      --output-name "scarlet_fever_plus_medication_by_subgroup"
      --scale "rate"
      --first "event_scarlet_fever_medication_any_2_weeks_rate"
      --exclude-group "Missing"
    needs: [join_measures]
    outputs:
      moderately_sensitive:
        measure: output/report/results/scarlet_fever_plus_medication_by_subgroup.png

  panel_plots_event_plus_medication_igas:
    run: >
      python:latest python analysis/report/panel_plots.py
      --input-file output/report/results/measure_all.csv
      --measures-pattern "event_invasive_strep_a_medication_any_2_weeks*rate"
      --output-dir output/report/results
      --output-name "invasive_strep_a_plus_medication_by_subgroup"
      --scale "rate"
      --first "event_invasive_strep_a_medication_any_2_weeks_rate"
      --exclude-group "Missing"
    needs: [join_measures]
    outputs:
      moderately_sensitive:
        measure: output/report/results/invasive_strep_a_plus_medication_by_subgroup.png

  panel_plots_event_plus_medication_strp:
    run: >
      python:latest python analysis/report/panel_plots.py
      --input-file output/report/results/measure_all.csv
      --measures-pattern "event_strep_a_sore_throat_medication_any_2_weeks*rate"
      --output-dir output/report/results
      --output-name "strep_a_sore_throat_plus_medication_by_subgroup"
      --scale "rate"
      --first "event_strep_a_sore_throat_medication_any_2_weeks_rate"
      --exclude-group "Missing"
    needs: [join_measures]
    outputs:
      moderately_sensitive:
        measure: output/report/results/strep_a_sore_throat_plus_medication_by_subgroup.png

  event_counts_report:
    run: >
      python:latest python analysis/report/event_counts.py --input-dir="output/report/joined/" --output-dir="output/report/results" --measures="amoxicillin,azithromycin,clarithromycin,erythromycin,phenoxymethypenicillin,scarlet_fever,strep_a_sore_throat,invasive_strep_a"
    needs: [join_cohorts_report]
    outputs:
      moderately_sensitive:
        measure: output/report/results/event_counts_*.json


### WEEKLY ###
  generate_study_population_report_weekly_1:
    run: cohortextractor:latest generate_cohort
      --study-definition study_definition_report
      --index-date-range "2022-09-01 to 2022-11-04 by week"
      --param frequency=weekly
      --output-dir=output/report/weekly
      --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/report/weekly/input_*.csv.gz

  generate_study_population_report_weekly_2:
    run: cohortextractor:latest generate_cohort
      --study-definition study_definition_report
      --index-date-range "2022-11-11 to 2023-01-06 by week"
      --param frequency=weekly
      --output-dir=output/report/weekly
      --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/report/weekly/input*.csv.gz

  generate_study_population_report_weekly_3:
    run: cohortextractor:latest generate_cohort
      --study-definition study_definition_report
      --index-date-range "2023-01-13 to 2023-01-20 by week"
      --param frequency=weekly
      --output-dir=output/report/weekly
      --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/report/weekly/inpu*.csv.gz
      
  join_cohorts_report_weekly:
    run: >
      python:latest python analysis/cohort_joiner.py
        --lhs output/report/weekly/input_report_20*.csv.gz
        --rhs output/report/input_ethnicity_report.csv.gz
        --output-dir output/report/weekly/joined
    needs: [generate_study_population_report_weekly_1, generate_study_population_report_weekly_2, generate_study_population_report_ethnicity]
    outputs:
      highly_sensitive:
        cohort: output/report/weekly/joined/input_report_20*.csv.gz

  join_cohorts_report_weekly_2:
    run: >
      python:latest python analysis/cohort_joiner.py
        --lhs output/report/weekly/input_report_2*.csv.gz
        --rhs output/report/input_ethnicity_report.csv.gz
        --output-dir output/report/weekly/joined
    needs: [generate_study_population_report_weekly_3, generate_study_population_report_ethnicity]
    outputs:
      highly_sensitive:
        cohort: output/report/weekly/joined/input_report_2*.csv.gz


  generate_measures_report_weekly:
    run: cohortextractor:latest generate_measures --study-definition study_definition_report --output-dir=output/report/weekly/joined
    needs: [join_cohorts_report_weekly, join_cohorts_report_weekly_2]
    outputs:
      moderately_sensitive:
        measure_csv: output/report/weekly/joined/measure_event_*_rate.csv


  join_measures_weekly:
      run: python:latest python analysis/join_and_round.py
           --input-files output/report/weekly/joined/measure_*_rate.csv
           --output-dir output/report/weekly/results
           --output-name "measure_weekly.csv"
      needs: [generate_measures_report_weekly]
      outputs:
        moderately_sensitive:
          # Only output the single summary file
          measure_csv: output/report/weekly/results/measure_weekly.csv

  top_5_table_report_weekly:
    run: >
      python:latest python analysis/report/top_5_report.py
      --input-file output/report/weekly/results/measure_weekly.csv
      --output-dir output/report/weekly/results
    needs: [join_measures_weekly]
    outputs:
      moderately_sensitive:
        tables: output/report/weekly/results/top_5*.csv

  plot_measure_report_weekly:
    run: >
      python:latest python analysis/report/plot_measures_report.py
      --measure-path output/report/weekly/results/measure_weekly.csv
      --output-dir output/report/weekly/results
    needs: [join_measures_weekly]
    outputs:
      moderately_sensitive:
        measure: output/report/weekly/results/*measures*.jpeg

  panel_plots_weekly:
    run: >
      python:latest python analysis/report/create_panels.py
      --input-file output/report/weekly/results/measure_weekly.csv
      --output-dir output/report/weekly/results
    needs: [join_measures_weekly]
    outputs:
      moderately_sensitive:
        measure: output/report/weekly/results/*.png

  panel_plots_event_plus_medication_scarlet_weekly:
    run: >
      python:latest python analysis/report/panel_plots.py
      --input-file output/report/weekly/results/measure_weekly.csv
      --measures-pattern "event_scarlet_fever_medication_any_2_weeks*rate"
      --output-dir output/report/weekly/results
      --output-name "scarlet_fever_plus_medication_by_subgroup"
      --scale "rate"
      --first "event_scarlet_fever_medication_any_2_weeks_rate"
      --exclude-group "Missing"
    needs: [join_measures_weekly]
    outputs:
      moderately_sensitive:
        measure: output/report/weekly/results/scarlet_fever_plus_medication_by_subgroup.png

  panel_plots_event_plus_medication_igas_weekly:
    run: >
      python:latest python analysis/report/panel_plots.py
      --input-file output/report/weekly/results/measure_weekly.csv
      --measures-pattern "event_invasive_strep_a_medication_any_2_weeks*rate"
      --output-dir output/report/weekly/results
      --output-name "invasive_strep_a_plus_medication_by_subgroup"
      --scale "rate"
      --first "event_invasive_strep_a_medication_any_2_weeks_rate"
      --exclude-group "Missing"
    needs: [join_measures_weekly]
    outputs:
      moderately_sensitive:
        measure: output/report/weekly/results/invasive_strep_a_plus_medication_by_subgroup.png

  panel_plots_event_plus_medication_strp_weekly:
    run: >
      python:latest python analysis/report/panel_plots.py
      --input-file output/report/weekly/results/measure_weekly.csv
      --measures-pattern "event_strep_a_sore_throat_medication_any_2_weeks*rate"
      --output-dir output/report/weekly/results
      --output-name "strep_a_sore_throat_plus_medication_by_subgroup"
      --scale "rate"
      --first "event_strep_a_sore_throat_medication_any_2_weeks_rate"
      --exclude-group "Missing"
    needs: [join_measures_weekly]
    outputs:
      moderately_sensitive:
        measure: output/report/weekly/results/strep_a_sore_throat_plus_medication_by_subgroup.png


  event_counts_report_weekly:
    run: >
      python:latest python analysis/report/event_counts.py --input-dir="output/report/weekly/joined/" --output-dir="output/report/weekly/results" --measures="amoxicillin,azithromycin,clarithromycin,erythromycin,phenoxymethypenicillin,scarlet_fever,strep_a_sore_throat,invasive_strep_a"
    needs: [join_cohorts_report_weekly, join_cohorts_report_weekly_2]
    outputs:
      moderately_sensitive:
        measure: output/report/weekly/results/event_counts_*.json

  generate_notebook:
    run: jupyter:latest jupyter nbconvert /workspace/analysis/report/report.ipynb --execute --to html --output-dir=/workspace/output/report --ExecutePreprocessor.timeout=86400 --no-input
    needs: [
      event_counts_report,
      top_5_table_report,
      plot_measure_report,
      panel_plots,
      panel_plots_event_plus_medication_scarlet,
      panel_plots_event_plus_medication_igas,
      panel_plots_event_plus_medication_strp
      ]
    outputs:
      moderately_sensitive:
        notebook: output/report/report.html

  generate_notebook_weekly:
    run: jupyter:latest jupyter nbconvert /workspace/analysis/report/report_weekly.ipynb --execute --to html --output-dir=/workspace/output/report --ExecutePreprocessor.timeout=86400 --no-input
    needs: [
      event_counts_report_weekly,
      top_5_table_report_weekly, 
      plot_measure_report_weekly,
      panel_plots_weekly,
      panel_plots_event_plus_medication_scarlet_weekly,
      panel_plots_event_plus_medication_igas_weekly,
      panel_plots_event_plus_medication_strp_weekly
      ]
    outputs:
      moderately_sensitive:
        notebook: output/report/report_weekly.html

Timeline

  • Created:

  • Started:

  • Finished:

  • Runtime:

These timestamps are generated and stored using the UTC timezone on the backend.

Job information

Status
Succeeded
Backend
TPP
Workspace
strepa_scarlet
Requested by
Louis Fisher
Branch
main
Force run dependencies
No
Git commit hash
8d12834
Requested actions
  • join_measures
  • join_measures_weekly