Advertisement
DenisSergeevitch

DNA 3D visualiser with GPT4.5

Apr 30th, 2024 (edited)
763
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.34 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from mpl_toolkits.mplot3d import Axes3D
  5.  
  6. # Step 1: Load Data Efficiently
  7. def load_data(filepath):
  8.     headers = ['rsid', 'chromosome', 'position', 'genotype']
  9.     # Specify dtype to improve read performance
  10.     dtypes = {'rsid': str, 'chromosome': 'category', 'position': int, 'genotype': str}
  11.     data = pd.read_csv(filepath, delimiter='\t', names=headers, comment='#', dtype=dtypes, low_memory=False)
  12.     print(data.head())
  13.     return data
  14.  
  15. # Step 2: Preprocess and Encode Data
  16. def preprocess_data(data):
  17.     # Replace '--' with NaN directly in read_csv using na_values
  18.     data['genotype'].replace('--', np.nan, inplace=True)
  19.     data.dropna(subset=['genotype'], inplace=True)
  20.     data['genotype_value'] = data['genotype'].apply(lambda x: len(x))
  21.    
  22.     # Convert 'chromosome' to numeric if possible, otherwise to category which is handled later in plotting
  23.     data['chromosome'] = pd.to_numeric(data['chromosome'], errors='coerce')
  24.     if data['chromosome'].isnull().any():
  25.         data['chromosome'] = pd.Categorical(data['chromosome'].fillna('X')).codes
  26.     return data
  27.  
  28. # Step 3: Optimized Plotting Function
  29. def plot_data(data):
  30.     fig = plt.figure(figsize=(10, 8))
  31.     ax = fig.add_subplot(111, projection='3d')
  32.  
  33.     # Downsampling data for faster rendering; adjust the step size as needed
  34.     step = 10  # Increase step size to speed up or decrease for more detail
  35.     # Adjusting the color map to 'Reds' and normalizing the color range
  36.     norm = plt.Normalize(data['genotype_value'].min(), data['genotype_value'].max())
  37.     scatter = ax.scatter(data['chromosome'][::step], data['position'][::step], data['genotype_value'][::step],
  38.                          c=data['genotype_value'][::step], cmap='Reds', norm=norm,
  39.                          marker='o', alpha=0.6, s=50)
  40.  
  41.     ax.set_xlabel('Chromosome')
  42.     ax.set_ylabel('Position')
  43.     ax.set_zlabel('Genotype Value')
  44.     ax.set_title('3D Visualization of DNA Genotypes')
  45.  
  46.     cbar = fig.colorbar(scatter, ax=ax, pad=0.1)
  47.     cbar.set_label('Genotype Value')
  48.  
  49.     plt.show()
  50.  
  51. # Main function
  52. def main():
  53.     filepath = '/file.txt'  # Specify the actual file path
  54.     data = load_data(filepath)
  55.     processed_data = preprocess_data(data)
  56.     plot_data(processed_data)
  57.  
  58. if __name__ == "__main__":
  59.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement