Job request: 15626
- Organisation:
 - King's College London
 - Workspace:
 - gout
 - ID:
 - pavx5cuomvu2g2h7
 
This page shows the technical details of what happened when the authorised researcher Mark Russell requested one or more actions to be run against real patient data within a secure environment.
By cross-referencing the list of jobs with the pipeline section below, you can infer what security level the outputs were written to.
The output security levels are:
- 
                highly_sensitive
                
- Researchers can never directly view these outputs
 - Researchers can only request code is run against them
 
 - 
                moderately_sensitive
                
- Can be viewed by an approved researcher by logging into a highly secure environment
 - These are the only outputs that can be requested for public release via a controlled output review service.
 
 
Jobs
- 
                
- Job identifier:
 - 
                    
                    
se6d2cpatoxgcr7x 
 - 
                
- Job identifier:
 - 
                    
                    
j67kuu3haqgqgdql 
 
Pipeline
Show project.yaml
version: '3.0'
expectations:
  population_size: 100000
actions:
  generate_study_population:
    run: cohortextractor:latest generate_cohort --study-definition study_definition --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/input.csv.gz
  generate_study_population_count:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_count --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/input_count.csv.gz
  summary_counts:
    run: stata-mp:latest analysis/002_summary_counts.do
    needs: [generate_study_population_count]
    outputs:
      highly_sensitive:
        log1: logs/summary_counts.log
  generate_study_population_allpts:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_allpts --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/input_allpts.csv.gz
  generate_study_population_2015:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2015-07-01" --output-dir=output/measures --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/measures/input_year_2015-07-01.csv.gz
  generate_study_population_2016:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2016-07-01" --output-dir=output/measures --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/measures/input_year_2016-07-01.csv.gz      
  generate_study_population_2017:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2017-07-01" --output-dir=output/measures --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/measures/input_year_2017-07-01.csv.gz          
  generate_study_population_2018:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2018-07-01" --output-dir=output/measures --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/measures/input_year_2018-07-01.csv.gz    
  
  generate_study_population_2019:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2019-07-01" --output-dir=output/measures --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/measures/input_year_2019-07-01.csv.gz            
  generate_study_population_2020:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2020-07-01" --output-dir=output/measures --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/measures/input_year_2020-07-01.csv.gz            
  generate_study_population_2021:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2021-07-01" --output-dir=output/measures --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/measures/input_year_2021-07-01.csv.gz            
  generate_study_population_2022:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_year --index-date-range "2022-07-01" --output-dir=output/measures --output-format=csv.gz
    outputs:
      highly_sensitive:
        cohort: output/measures/input_year_2022-07-01.csv.gz            
    
  generate_measures:
    run: cohortextractor:latest generate_measures --study-definition study_definition_year --output-dir=output/measures
    needs: [generate_study_population_2015, generate_study_population_2016, generate_study_population_2017, generate_study_population_2018, generate_study_population_2019, generate_study_population_2020, generate_study_population_2021, generate_study_population_2022]
    outputs:
      moderately_sensitive:
        measure_csv: output/measures/measure_*.csv           
  
  create_cohorts_allpts:
    run: stata-mp:latest analysis/001_define_covariates_allpts.do
    needs: [generate_study_population_allpts]
    outputs:
      highly_sensitive:
        log1: logs/cleaning_dataset_allpts.log 
        data1: output/data/file_gout_allpts.dta        
  create_cohorts:
    run: stata-mp:latest analysis/000_define_covariates.do
    needs: [generate_study_population, generate_measures]
    outputs:
      highly_sensitive:
        log1: logs/cleaning_dataset.log 
        data1: output/data/file_gout_all.dta
        data2: output/data/gout_prevalence_sex_long.dta
        data3: output/data/gout_incidence_sex_long.dta
        data4: output/data/gout_admissions_sex_long.dta
  run_baseline_tables_allpts:
    run: stata-mp:latest analysis/101_baseline_characteristics_allpts.do
    needs: [create_cohorts_allpts]
    outputs:
      moderately_sensitive:
        log1: logs/descriptive_tables_allpts.log      
        doc1: output/tables/baseline_allpts.csv      
  run_baseline_tables:
    run: stata-mp:latest analysis/100_baseline_characteristics.do
    needs: [create_cohorts]
    outputs:
      moderately_sensitive:
        log1: logs/descriptive_tables.log   
        doc1: output/tables/incidence_year_rounded.csv   
        doc2: output/tables/incidence_month_rounded.csv 
        doc3: output/tables/prevalance_year_rounded.csv   
        doc4: output/tables/incidence_admission_year_rounded.csv   
        doc6: output/tables/baseline_bydiagnosis.csv 
        doc7: output/tables/baseline_byyear.csv 
        doc8: output/tables/ult6m_byyear.csv   
        doc9: output/tables/ult6m_byregion.csv
        doc10: output/tables/urate6m_byyear.csv
        doc11: output/tables/urate6m_byregion.csv
        figure2: output/figures/prevalance_year_rounded.svg
        figure3: output/figures/incidence_admission_year_rounded.svg
  
  run_itsa_models:
    run: stata-mp:latest analysis/200_itsa_models.do
    needs: [create_cohorts]
    outputs:
      moderately_sensitive:
        log1: logs/itsa_models.log   
        figure1:  output/figures/ITSA_ult_newey.svg
        figure2:  output/figures/ITSA_360_newey.svg
      #  doc1: output/tables/gp_to_appt_ITSA_table.csv
  run_box_plots:
    run: stata-mp:latest analysis/300_box_plots.do
    needs: [create_cohorts]
    outputs:
      moderately_sensitive:
        log1: logs/box_plots.log
        figure 1: output/figures/regional_ult_overall.svg
        figure 2: output/figures/regional_ult_2019.svg 
        figure 3: output/figures/regional_ult_2020.svg       
        figure 4: output/figures/regional_ult_2021.svg  
        figure 5: output/figures/regional_ult_2022.svg 
        figure 6: output/figures/regional_ult_merged.svg
  # run_redacted_tables:
  #   run: stata-mp:latest analysis/400_redacted_tables.do
  #   needs: [create_cohorts]
  #   outputs:
  #     moderately_sensitive:
  #       log1: logs/redacted_tables.log   
  #       doc1: output/tables/table_1_rounded_bydiag.csv   
  #       doc2: output/tables/table_mean_bydiag_rounded.csv 
  #       doc3: output/tables/table_median_bydiag_rounded.csv   
  #       doc4: output/tables/table_median_bydiag_rounded_to21.csv   
  #       doc5: output/tables/ITSA_tables_appt_delay_rounded.csv   
  #       doc6: output/tables/ITSA_tables_csdmard_delay_rounded.csv 
  #       doc7: output/tables/drug_byyearanddisease_rounded.csv 
  #       doc8: output/tables/first_csdmard_rounded.csv   
  #       doc9: output/tables/drug_byyearandregion_rounded.csv
  #       doc10: output/tables/referral_byregion_rounded.csv
  #       doc11: output/tables/consultation_medium_rounded.csv
  #       doc12: output/tables/table_median_bydiag_rounded_to21_report.csv 
  #       doc13: output/tables/first_csdmard_rounded_report.csv  
  # run_redacted_tables_allpts:
  #   run: stata-mp:latest analysis/401_redacted_tables_allpts.do
  #   needs: [create_cohorts_allpts]
  #   outputs:
  #     moderately_sensitive:
  #       log1: logs/redacted_tables_allpts.log   
  #       doc1: output/tables/table_1_rounded_allpts.csv  
  # convert_image_formats:
  #   run: python:latest python analysis/convert_images.py --input_dir output/figures --output_dir output/figures
  #   needs: [run_baseline_tables, run_itsa_models, run_box_plots, run_redacted_tables]
  #   outputs:
  #     moderately_sensitive:
  #       figures: output/figures/*.png           
  # generate_notebook:
  #   run: jupyter:latest jupyter nbconvert /workspace/analysis/report.ipynb --execute --to html --template basic --output-dir=/workspace/output --ExecutePreprocessor.timeout=86400 --no-input
  #   needs: [convert_image_formats,run_baseline_tables, run_itsa_models, run_box_plots, run_redacted_tables]
  #   outputs:
  #     moderately_sensitive:
  #       notebook: output/report.html
Timeline
- 
  
    
  
  
Created:
 - 
  
    
  
  
Started:
 - 
  
    
  
  
Finished:
 - 
  
  
Runtime: 00:05:58
 
These timestamps are generated and stored using the UTC timezone on the TPP backend.
Code comparison
Compare the code used in this job request
- No previous job request available for comparison