{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Readng data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data input\n", "Getting data into an out of our programs will be a key component of our scripts. There are several ways to do this (yet another good/bad aspect of python). For class we will rely on a few, key python packages:\n", "
\n", " # Open file\n", " f = open('sample.dat', 'r')\n", " # Read and ignore header lines\n", " header1 = f.readline()\n", " header2 = f.readline()\n", " # Loop over lines and extract variables of interest\n", " for line in f:\n", " line = line.strip()\n", " columns = line.split()\n", " month = columns[0]\n", " temp = float(columns[1])\n", " print(month, temp)\n", " f.close()\n", "\n", "
\n", " data = np.loadtxt('sample.dat', delimiter=',', comments='#')\n", "\n", "
\n", " data = np.fromfile('sample2.dat', dtype=float, sep='\\t', count=-1)\n", "\n", "
\n", " data = np.fromregex('sample.dat', r'(\\d+),\\s(\\d+)', np.float)\n", "\n", "
\n", " data = np.genfromtxt('sample.dat',delimiter=',',skiprows=2)\n", " # or, if the columns have different types:\n", " #1 2.0000 buckle_my_shoe\n", " #3 4.0000 margery_door\n", " data = np.genfromtxt('filename', dtype= None)\n", " # data = [(1, 2.0, 'buckle_my_shoe'), (3, 4.0, 'margery_door')]\n", "\n", "
\n", " data = pd.read_table('sample.dat', sep=',')\n", "\n", "
\n", " data = pd.read_csv('sample.dat', header=1)\n", "\n", "
\n", " x, sr = librosa.load('sample.wav')\n", "\n", "
\n", " wf = wave.open(('sample.wav'), 'rb')\n", "\n", "
\n", " from netCDF4 import Dataset\n", " fh = Dataset('sample.nc', mode='r')\n", " time = fh.variables['time'][:]\n", " lon = fh.variables['lon'][:,:]\n", " lat = fh.variables['lat'][:,:]\n", " temp = fh.variables['temp'][:,:]\n", "\n", "
\n", " import xarray as xr\n", " ds = xr.open_dataset('sample.nc')\n", " df = ds.to_dataframe()\n", "\n", "
\n", " from pydap.client import open_url\n", " import numpy as np\n", " from numpy import *\n", " # set ULR from PO.DAAC\n", " dataset = open_url(\"http://opendap-uat.jpl.nasa.gov/thredds/dodsC/ncml_aggregation/OceanTemperature/ghrsst/aggregate__ghrsst_DMI_OI-DMI-L4-GLOB-v1.0.ncml\")\n", " lat = dataset.lat[:]\n", " lon = dataset.lon[:]\n", " time = dataset.time[:]\n", " sst = dataset.analysed_sst.array[0]\n", "\n", "
\n", " from scipy.io import loadmat\n", " fin1 = loadmat('sample.mat',squeeze_me=True)\n", " mtime = fin1['mday']\n", " Tair = fin1['ta_h']\n", " Press = fin1['bpr']\n", "\n", "
\n", " import geopandas as gpd\n", " shape_gpd = gpd.read_file('sample.shp')\n", "\n", "
\n", " shpf = salem.get_demo_file('sample.shp')\n", " gdf = salem.read_shapefile(shpf)\n", "
\n", " | # | \n", "002 | \n", "\\t113 | \n", "\\tTarawa | \n", "Unnamed: 4 | \n", "Bairiki | \n", "\\tKiribati | \n", "\\t1.33200 | \n", "\\t173.01300 | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "# | \n", "hourly | \n", "sea | \n", "level | \n", "from | \n", "UHSLC | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 | \n", "1992 | \n", "12 | \n", "4 | \n", "1 | \n", "2063 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "1992 | \n", "12 | \n", "4 | \n", "2 | \n", "1997 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "1992 | \n", "12 | \n", "4 | \n", "3 | \n", "1846 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "1992 | \n", "12 | \n", "4 | \n", "4 | \n", "1689 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
56 | \n", "1992 | \n", "12 | \n", "6 | \n", "8 | \n", "1135 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
57 | \n", "1992 | \n", "12 | \n", "6 | \n", "9 | \n", "1130 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
58 | \n", "1992 | \n", "12 | \n", "6 | \n", "10 | \n", "1261 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
59 | \n", "1992 | \n", "12 | \n", "6 | \n", "11 | \n", "1527 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
60 | \n", "1992 | \n", "12 | \n", "6 | \n", "12 | \n", "1817 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
61 rows × 9 columns
\n", "