{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analysis of intronic polyA sites\n", "This notebook demostrates on how to create cutomized features. The examples is using intronic polyA annotations from the polyADB" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using /home/hsher/gencode_coords/GRCh38.p13.genome.fa\n", "using /home/hsher/gencode_coords/GRCh38.p13.genome.fa\n", "Using: /home/hsher/gencode_coords/gencode.v33.transcript.gff3\n" ] } ], "source": [ "# set up files associated with each genome coordinates\n", "import metadensity as md\n", "md.settings.from_config_file('/tscc/nfs/home/hsher/Metadensity/config/hg38-tscc2.ini')\n", "\n", "\n", "# then import the modules\n", "from metadensity.metadensity import *\n", "from metadensity.plotd import *\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "\n", "# I have a precompiles list of ENCODE datas as a csv that loads in this dataloader\n", "import sys\n", "sys.path.append('/home/hsher/Metadensity/scripts')\n", "from dataloader import *\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# use katie's PCF11\n", "katie_data = pd.read_csv('/home/hsher/projects/katie/katie_eclip.csv', index_col = 0)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | uid | \n", "RBP | \n", "Cell line | \n", "bam_0 | \n", "bam_1 | \n", "bam_control | \n", "minus_0 | \n", "minus_1 | \n", "minus_control | \n", "plus_0 | \n", "... | \n", "bed_0 | \n", "bed_1 | \n", "Batch | \n", "prefix | \n", "bam_control_0 | \n", "bam_control_1 | \n", "plus_control_0 | \n", "plus_control_1 | \n", "minus_control_0 | \n", "minus_control_1 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 28 | \n", "258 | \n", "CPSF6 | \n", "K562 | \n", "/projects/ps-yeolab3/encode/analysis/encode_GR... | \n", "/projects/ps-yeolab3/encode/analysis/encode_GR... | \n", "/projects/ps-yeolab3/encode/analysis/encode_GR... | \n", "/projects/ps-yeolab3/encode/analysis/encode_GR... | \n", "/projects/ps-yeolab3/encode/analysis/encode_GR... | \n", "/projects/ps-yeolab3/encode/analysis/encode_GR... | \n", "/projects/ps-yeolab3/encode/analysis/encode_GR... | \n", "... | \n", "/projects/ps-yeolab5/encode/EVN_eCLIP_analysis... | \n", "/projects/ps-yeolab5/encode/EVN_eCLIP_analysis... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 rows × 23 columns
\n", "| \n", " | bam_0 | \n", "bam_control_0 | \n", "minus_0 | \n", "minus_control_0 | \n", "plus_0 | \n", "plus_control_0 | \n", "bed_0 | \n", "bam_1 | \n", "bam_control_1 | \n", "minus_1 | \n", "minus_control_1 | \n", "plus_1 | \n", "plus_control_1 | \n", "bed_1 | \n", "uid | \n", "RBP | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "/home/wjin/projects/CLIP_seq/Kris_Katie/Data/P... | \n", "PCF11 | \n", "PCF11 | \n", "