Skip to content

Job request: 13921

Organisation:
Bennett Institute
Workspace:
medication_reviews
ID:
s4ixwzwmzybc23q5

This page shows the technical details of what happened when the authorised researcher Chris Wood requested one or more actions to be run against real patient data within a secure environment.

By cross-referencing the list of jobs with the pipeline section below, you can infer what security level the outputs were written to.

The output security levels are:

  • highly_sensitive
    • Researchers can never directly view these outputs
    • Researchers can only request code is run against them
  • moderately_sensitive
    • Can be viewed by an approved researcher by logging into a highly secure environment
    • These are the only outputs that can be requested for public release via a controlled output review service.

Jobs

Pipeline

Show project.yaml
version: '3.0'

expectations:
  population_size: 3000

actions:

  generate_study_population:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition
        --index-date-range "2019-04-01 to 2019-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_2019*.csv.gz

  generate_study_population_range2:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition
        --index-date-range "2020-01-01 to 2020-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_2020*.csv.gz

  generate_study_population_range3:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition
        --index-date-range "2021-01-01 to 2021-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_2021*.csv.gz

  generate_study_population_range4:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition
        --index-date-range "2022-01-01 to 2022-03-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_2022*.csv.gz

  generate_study_population_allmedrev:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition_allmedrev
        --index-date-range "2019-04-01 to 2019-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_allmedrev_2019*.csv.gz

  generate_study_population_allmedrev_range2:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition_allmedrev
        --index-date-range "2020-01-01 to 2020-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort2: output/input_allmedrev_2020*.csv.gz

  generate_study_population_allmedrev_range3:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition_allmedrev
        --index-date-range "2021-01-01 to 2021-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort3: output/input_allmedrev_2021*.csv.gz

  generate_study_population_allmedrev_range4:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition_allmedrev
        --index-date-range "2022-01-01 to 2022-03-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort3: output/input_allmedrev_2022*.csv.gz

  generate_ethnicity_cohort:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition_ethnicity
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_ethnicity.csv.gz

  join_cohorts:
    run: >
      cohort-joiner:v0.0.44
        --lhs output/input_20*.csv.gz
        --rhs output/input_ethnicity.csv.gz
        --output-dir output/joined
    needs: [generate_study_population, generate_study_population_range2, generate_study_population_range3, generate_study_population_range4, generate_ethnicity_cohort]
    outputs:
      highly_sensitive:
        cohort: output/joined/input_20*.csv.gz

  join_cohorts_allmedrev:
    run: >
      cohort-joiner:v0.0.44
        --lhs output/input_allmedrev*.csv.gz
        --rhs output/input_ethnicity.csv.gz
        --output-dir output/joined
    needs: [generate_study_population_allmedrev, generate_study_population_allmedrev_range2, generate_study_population_allmedrev_range3, generate_study_population_allmedrev_range4, generate_ethnicity_cohort]
    outputs:
      highly_sensitive:
        cohort: output/joined/input_allmedrev_*.csv.gz

## generate Structured Medication Review Measures and plots

  generate_measures_mr_smr:
     run: >
       cohortextractor:latest generate_measures 
       --study-definition study_definition
       --output-dir=output/joined
     needs: [join_cohorts]
     outputs:
       moderately_sensitive:
         mr_measure_csv: output/joined/measure_mr_*_rate.csv
         mr12m_measure_csv: output/joined/measure_mr12m_*_rate.csv
         smr_measure_csv: output/joined/measure_smr_*_rate.csv
         smr12m_measure_csv: output/joined/measure_smr12m_*_rate.csv

  generate_measures_all_reviews:
     run: >
       cohortextractor:latest generate_measures 
       --study-definition study_definition_allmedrev
       --output-dir=output/joined
     needs: [join_cohorts_allmedrev]
     outputs:
       moderately_sensitive:
         allmedrev_measure_csv: output/joined/measure_allmedrv_*_rate.csv
         allmedrev12m_measure_csv: output/joined/measure_allmedrv12m_*_rate.csv
         allmedrev_measure_asgrouped_csv: output/joined/measure_allmedrv_*_rate_agestandardgrouped.csv
         allmedrev12m_measure_asgrouped_csv: output/joined/measure_allmedrv12m_*_rate_agestandardgrouped.csv

  generate_deciles_charts:
    run: >
      deciles-charts:v0.0.33
        --input-files output/joined/measure_*_practice_rate.csv
        --output-dir output/joined
    config:
      show_outer_percentiles: false
      tables:
        output: true
      charts:
        output: true
    needs: [generate_measures_mr_smr, generate_measures_all_reviews]
    outputs:
      moderately_sensitive:
        deciles_charts: output/joined/deciles_*_*.*

  redact_and_round:
    run: python:latest python analysis/redact_and_round.py
    needs: [generate_measures_mr_smr, generate_codeuse_output, generate_allmedrev_codeuse_output, generate_measures_all_reviews]
    outputs:
      moderately_sensitive:
        cohort: output/redacted/redacted_measure_*.csv
        cohort_codeuse: output/redacted/redacted_totalcodeuse*.csv

  generate_plots:
    run: python:latest python analysis/plots.py
    needs: [redact_and_round, generate_deciles_charts]
    outputs:
      moderately_sensitive:
        percent_cohort: output/figures/*_*_rate_percentage.jpeg
        perthousand_cohort: output/figures/*_*_rate_perthousand.jpeg 

  generate_table_1:
    run: python:latest python analysis/table_1.py --study_def_paths="output/joined/input_allmedrev_*.csv.gz" --demographics="age_band,sex,region,imdQ5,ethnicity,learning_disability,care_home_type,addictivemeds_last12m,dmards_last12m,teratogenicmeds_last12m,highriskmeds_last12m" --outcome "had_anymedrev"
    needs: [join_cohorts_allmedrev]
    outputs:
      moderately_sensitive:
        counts: output/table_1.csv
        had_outcome: output/table_1_had_outcome.csv

  generate_codeuse_output:
    run: python:latest python analysis/code_use_summary.py --study_def_paths="output/joined/input_20*.csv.gz" --codelistfile="user-chriswood-medication-review.csv" --outputfile="codeuse"
    needs: [join_cohorts]
    outputs:
      moderately_sensitive:
        code_counts: output/codeuse.csv
        total_code_counts: output/totalcodeuse.csv

  generate_allmedrev_codeuse_output:
    run: python:latest python analysis/code_use_summary.py --study_def_paths="output/joined/input_allmedrev_*.csv.gz" --codelistfile="user-chriswood-all-medication-reviews.csv" --outputfile="codeuse_allmedrev"
    needs: [join_cohorts_allmedrev]
    outputs:
      moderately_sensitive:
        code_counts: output/codeuse_allmedrev.csv
        total_code_counts: output/totalcodeuse_allmedrev.csv

  run_tests:
    run: python:latest python -m pytest --junit-xml=output/pytest.xml --verbose
    outputs:
      moderately_sensitive:
        log: output/pytest.xml

Timeline

  • Created:

  • Started:

  • Finished:

  • Runtime: 04:17:56

These timestamps are generated and stored using the UTC timezone on the TPP backend.

Job request

Status
Succeeded
Backend
TPP
Workspace
medication_reviews
Requested by
Chris Wood
Branch
main
Force run dependencies
No
Git commit hash
20957ac
Requested actions
  • generate_deciles_charts
  • redact_and_round
  • generate_plots
  • generate_codeuse_output
  • generate_allmedrev_codeuse_output
  • run_tests

Code comparison

Compare the code used in this job request