AUC estimation using Mann Whitney U

import pandas as pd
from tqdm import tqdm
import glob
import pylab as plt
import scipy as sp
import numpy as np
from scipy import stats, optimize, interpolate
plt.style.use('ggplot')

def calc_U(y_true, y_score,cb=0.99):
    '''
    Calculate AUC and confidence bounds / pvalue 
    on AUC using U test correspondance
    '''    
    ZALPHA={0.9:1.645,0.95:1.96,.99:2.58,.999:3.27}
    n1 = np.sum(y_true==1)
    n0 = len(y_score)-n1
    order = np.argsort(y_score)
    rank = np.argsort(order)
    rank += 1
    U1 = np.sum(rank[y_true == 1]) - n1*(n1+1)/2
    U0 = np.sum(rank[y_true == 0]) - n0*(n0+1)/2
    AUC1 = U1/ (n1*n0)
    AUC0 = U0/ (n1*n0)
    
    EU1=n0*n1*0.5
    s1=np.sqrt(n0*n1*(n0+n1+1)/12.)
    U1_z= (U1-EU1)/s1
    p = sp.stats.norm.sf(abs(U1_z))*2 #twosided
    
    CF=(ZALPHA[cb]*s1)/(n1*n0)
    
    if AUC1>AUC0:
        return AUC1, p,U1,U1_z,CF
    
    return AUC0, p0,U0,U0_z,CF  

confidence bound with lowess

#dataframe: index (float), csingle column with float


import scipy.stats as stats
import statsmodels.api as sm
from scipy import interpolate
from scipy.interpolate import interp1d

lowess = sm.nonparametric.lowess
df.index=df.index.astype(float)
df=df.sort_index()
S=.8
RUNS=10
DF=df

for i in range(RUNS):
    X=df.sample(int(np.round(S*df.index.size))).sort_index().reset_index().values

    z = lowess(X[:,1], X[:,0], frac= .25)
    f = interp1d(z[:,0], z[:,1])
    Y=f(X[:,0])
    DF=DF.join(pd.DataFrame(Y,X[:,0]),rsuffix=str(i))
DF=DF.interpolate(method='index',order=3,limit_direction='both')
STD=pd.DataFrame(2.62*(DF.std(axis=1)/np.sqrt(RUNS)))
Mf=pd.DataFrame(DF.mean(axis=1)).assign(sm=STD)
Mf.columns=['mn','sm']
ax=Mf.mn.plot()
plt.fill_between(Mf.index,Mf.mn-Mf.sm,Mf.mn+Mf.sm,alpha=.5)
ax.set_xlabel('mean number of items')
ax.set_ylabel('mean accuracy')

ICD10 API

https://clinicaltables.nlm.nih.gov/api/icd10cm/v3/search?sf=code,name&terms=M35

Getting all infection codes in ICD 10

https://clinicaltables.nlm.nih.gov/api/icd10cm/v3/search?sf=code,name&terms=infect&maxList=716
Note: the default maxList is 7

ear

https://clinicaltables.nlm.nih.gov/api/icd10cm/v3/search?sf=code,name&terms=otiti&maxList=716

intestine

https://clinicaltables.nlm.nih.gov/api/icd10cm/v3/search?sf=code,name&terms=intest&maxList=716

Writing Latex Tables from Pandas

import pandas as pd
import numpy as np
DUMMY=False
STRA='L{1in}|L{1.25in}|L{1.25in}|L{1.5in}|L{.3in}|L{.3in}'

def texTable(df,tabname='tmp.tex',FORMAT='%1.2f',INDEX=True,DUMMY=DUMMY,USE_l=False,
             TABFORMAT=None,LNTERM='\\\\\\cline{2-5}\n'):
    '''
        write latex table
    '''
    if DUMMY:
        return
    if INDEX:
        df=df.reset_index()
    columns=df.columns
    df.columns=[x.replace('_','\\_').replace('\_\_','_') for x in columns]
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col]=df[col].str.replace('_','\\_')
    
    if USE_l:
        TABFORMAT='l'*len(df.columns)
    else:
        if TABFORMAT is None:
            TABFORMAT='L{1in}|'*len(df.columns)
            TABFORMAT=TABFORMAT[:-1]
    STR='\\begin{tabular}{'+TABFORMAT+'}\\hline\n'        
    with open(tabname,'w') as f:
        f.write(STR)
    df.to_csv(tabname,float_format=FORMAT,
              line_terminator=LNTERM,
              sep='&',quotechar=' ',index=None,mode='a')
    
    with open(tabname,'a') as f:
        f.write('\\hline\\end{tabular}\n')
        

Saving Publication Quality Figures from Matplotlib

def saveFIG(filename='tmp.pdf',AXIS=False):
    '''
        save fig for publication
    '''
    import pylab as plt
    plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, 
            hspace = 0, wspace = 0)
    plt.margins(0,0)
    if not AXIS:
        plt.gca().xaxis.set_major_locator(plt.NullLocator())
        plt.gca().yaxis.set_major_locator(plt.NullLocator())
    plt.savefig(filename,dpi=300, bbox_inches = 'tight',
                pad_inches = 0,transparent=True) 
    return

ICD CODE LOOKUP

#!/usr/bin/python
import os
import sys
sys.path.append('../../../../pycode/')
from tqdm import tqdm

import pandas as pd
import numpy as np
import glob
import pylab as plt
import subprocess
import urllib
import json
import tempfile
import argparse
from argparse import RawTextHelpFormatter
import re

DUMMY=False
STRA='L{1in}|L{1.25in}|L{1.25in}|L{1.5in}|L{.3in}|L{.3in}'

def texTable(df,tabname='tmp.tex',FORMAT='%1.2f',INDEX=True,DUMMY=DUMMY,USE_l=False):
    '''
        write latex table
    '''
    if DUMMY:
        return
    if INDEX:
        df=df.reset_index()
    columns=df.columns
    df.columns=[str(x).replace('_','\\_') for x in columns]
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col]=df[col].str.replace('_','\\_')
    
    if USE_l:
        TABFORMAT='l'*len(df.columns)
    else:
        TABFORMAT='L{1in}|'*len(df.columns)
        TABFORMAT=TABFORMAT[:-1]
    STR='\\begin{tabular}{'+TABFORMAT+'}\n'        
    with open(tabname,'w') as f:
        f.write(STR)
    df.to_csv(tabname,float_format=FORMAT,
              line_terminator='\\\\\\hline\n',
              sep='&',quotechar=' ',index=None,mode='a')
    
    with open(tabname,'a') as f:
        f.write('\\end{tabular}\n')

        
banner='GENERATE TABLE OF ICD DESCRIZPTIONS IN LATEX FORMAT BY LOOKING UP NAMES FROM WEB'
zed='copyright 2020 zed.uchicago.edu'
def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

ICDDESC='../figfiles/ICD9desc'
NAME_TYPE='short_name'

parser = argparse.ArgumentParser(description='########\n'+banner+'\n'+zed,formatter_class=RawTextHelpFormatter)
parser._optionals.title="Program Options"

parser.add_argument('-list', metavar="", nargs='+',dest='CODELIST',
                    action="store", type=str,
                    default=[],
                    help="code list")
parser.add_argument('-codefile', metavar="",dest='CODEFILE',
                    action="store", type=str,
                    default=None,
                    help="code list file")
parser.add_argument('-pref', metavar="", dest='FILEPREF',
                    action="store", type=str,
                    default='tmp',
                    help="filepref")
parser.add_argument('-nametype', metavar="", dest='NAME_TYPE',
                    action="store", type=str,
                    default='short_name',
                    help="short or long names")
parser.add_argument('-fast', metavar="", dest='FAST',
                    action="store", type=str2bool,
                    default=False,
                    help="fast comp no web lookup of codes")
parser.add_argument('-dictsave', metavar="", dest='SAVE',
                    action="store", type=str2bool,
                    default=False,
                    help="save code dict to avoid future lookups")


 
if len(sys.argv[1:])==0:
    parser.print_help()
    parser.exit()
args=parser.parse_args()

fpref=args.FILEPREF
CODELIST=args.CODELIST
NAME_TYPE=args.NAME_TYPE
CODEFILE=args.CODEFILE
SAVE=args.SAVE

if CODEFILE is not None:
    with open(CODEFILE) as fp:
        CODELIST=CODELIST+fp.readline().split()

CODELIST=list(set(CODELIST))


if not os.path.isfile('ICDCODEDICT_.json'):
    ICDCODEDICT={}
else:
    ICDCODEDICT=json.load(open("ICDCODEDICT_.json"))
    
def getICDdesc(code,NUM_MAX=1):
    names=[]
    if code[0] == 'E':
        code=code.replace(".","")

    if code in ICDCODEDICT.keys():
        return ICDCODEDICT[code]
    
    f = tempfile.NamedTemporaryFile()
    url='https://clinicaltables.nlm.nih.gov/api/icd9cm_dx/v3/search?terms='+code+'&ef='+NAME_TYPE
    urllib.urlretrieve(url, filename=f.name)
    with open(f.name) as json_file:  
        data = json.load(json_file)
    if data[0]>0:
        for i in range(min([NUM_MAX, len(data[2][NAME_TYPE])] )):
            names=np.append(names,str(data[2][NAME_TYPE][i]).strip())
    f.close()
    if len(names) > 0:
        if NUM_MAX == 1:
            names=names[0].replace('&','\&').replace('<','less')
    else:
        names=None

    ICDCODEDICT[code]=names
    return names

codeDICT={str(code):getICDdesc(code) for code in tqdm(CODELIST)}
df=pd.DataFrame.from_dict(codeDICT,orient='index')
df.columns=['description']
df.index.name='code'
df.to_csv(fpref+'.csv')
texTable(df,tabname=fpref+'.tex')
if SAVE:
    json.dump(codeDICT, open( "ICDCODEDICT_.json", 'w' ) )

PMID To BIBTEX

#!/usr/bin/python3
# script by Tommy https://www.biostars.org/u/1945/
import requests
import xml.etree.ElementTree as ET
import sys
import calendar

# Parse PubMed IDs from the command line.
pmids = sys.argv[1:]

## Fetch XML data from Entrez.
efetch = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
r = requests.get(
    '{}?db=pubmed&id={}&rettype=abstract'.format(efetch, ','.join(pmids)))
##print(r.text)

## Loop over the PubMed IDs and parse the XML.
root = ET.fromstring(r.text)
for PubmedArticle in root.iter('PubmedArticle'):
    PMID = PubmedArticle.find('./MedlineCitation/PMID')
    ISSN = PubmedArticle.find('./MedlineCitation/Article/Journal/ISSN')
    Volume = PubmedArticle.find('./MedlineCitation/Article/Journal/JournalIssue/Volume')
    Issue = PubmedArticle.find('./MedlineCitation/Article/Journal/JournalIssue/Issue')
    Year = PubmedArticle.find('./MedlineCitation/Article/Journal/JournalIssue/PubDate/Year')
    Month = PubmedArticle.find('./MedlineCitation/Article/Journal/JournalIssue/PubDate/Month')
##    Year = PubmedArticle.find('./MedlineCitation/Article/ArticleDate/Year')
##    Month = PubmedArticle.find('./MedlineCitation/Article/ArticleDate/Month')
    Title = PubmedArticle.find('./MedlineCitation/Article/Journal/Title')
    ArticleTitle = PubmedArticle.find('./MedlineCitation/Article/ArticleTitle')
    MedlinePgn = PubmedArticle.find('./MedlineCitation/Article/Pagination/MedlinePgn')
    Abstract = PubmedArticle.find('./MedlineCitation/Article/Abstract/AbstractText')
    authors = []
    for Author in PubmedArticle.iter('Author'):
        try:
            LastName = Author.find('LastName').text
            ForeName = Author.find('ForeName').text
        except AttributeError:  # e.g. CollectiveName
            continue
        authors.append('{}, {}'.format(LastName, ForeName))
    ## Use InvestigatorList instead of AuthorList
    if len(authors) == 0:
        ## './MedlineCitation/Article/Journal/InvestigatorList'
        for Investigator in PubmedArticle.iter('Investigator'):
            try:
                LastName = Investigator.find('LastName').text
                ForeName = Investigator.find('ForeName').text
            except AttributeError:  # e.g. CollectiveName
                continue
            authors.append('{}, {}'.format(LastName, ForeName))
    if Year is None:
        _ = PubmedArticle.find('./MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate')
        Year = _.text[:4]
        Month = '{:02d}'.format(list(calendar.month_abbr).index(_.text[5:8]))
    else:
        Year = Year.text
        if Month is not None:
            Month = Month.text
    try:
        for _ in (PMID.text, Volume.text, Title.text, ArticleTitle.text, MedlinePgn.text, Abstract.text, ''.join(authors)):
    ##        assert '"' not in _, _
            if _ is None:
                continue
            assert '{' not in _, _
            assert '}' not in _, _
    except AttributeError:
        pass
    ## Print the bibtex formatted output.
    try:
        print('@Article{{{}{}pmid{},'.format(
            authors[0].split(',')[0], Year, PMID.text))
    except IndexError:
        print('IndexError', pmids, file=sys.stderr, flush=True)
    except AttributeError:
        print('AttributeError', pmids, file=sys.stderr, flush=True)
    print(' Author="{}",'.format(' AND '.join(authors)))
    print(' Title={{{}}},'.format(ArticleTitle.text))
    print(' Journal={{{}}},'.format(Title.text))
    print(' Year={{{}}},'.format(Year))
    if Volume is not None:
        print(' Volume={{{}}},'.format(Volume.text))
    if Issue is not None:
        print(' Number={{{}}},'.format(Issue.text))
    if MedlinePgn is not None:
        print(' Pages={{{}}},'.format(MedlinePgn.text))
    if Month is not None:
        print(' Month={{{}}},'.format(Month))
    if Abstract is not None:    
        print(' Abstract={{{}}},'.format(Abstract.text))
    print(' ISSN={{{}}},'.format(ISSN.text))
    print('}')

Fancy Pivot to Latex


def chk(val,FLAG=True):
    if FLAG:
        if not isinstance(val, str):
            val=str(val)
            if len(val) > 4:
                return val[:4]
    return val
        


def fancyPivot(df,INDEX=None,COLUMN=None,
               SUBINDEX=None,STRL=None,FLAG=False,
              HFORMAT='\\bf\\sffamily ',
              FONTSMALL='\\fontsize{8}{8}\\selectfont'):
    vI=df[INDEX].value_counts().index.values
    vC=df[COLUMN].value_counts().index.values
    D={}
    for i in vI:
        for c in vC:
            df_=df[(df[INDEX]==i) & (df[COLUMN]==c)].drop([INDEX,COLUMN],axis=1).set_index(SUBINDEX)
            df_.index.name=SUBINDEX
            df_=df_.sort_index()
            D[(i,c)]=df_
            subcols=len(df_.columns)+1
            subrows=len(df_.index.values)+1
    Tcols=len(vC)*subcols+1
    Trows=len(vI)*subrows
    
    STR='\\begin{tabular}{'
    
    if STRL is None:
        STR=STR+'L{1in}||'
        for i in np.arange(len(vC)):
            STR=STR+'L{.7in}'*(subcols)  +'|'
        STR=STR[:-1]+'}'
    else:
        STR=STR+STRL+'}'
        
    S2='\\hline'
    for i in vC:
        S2=S2+'&'+'\multicolumn{'+str(subcols)+'}{c}{'+HFORMAT+FONTSMALL+' '+i+'}'
    S2=S2+'\\\\\\cline{2-'+str((len(df.columns)-2)*2+1)+'}'
    
    for i in vI:
        S2=S2+'\multirow{'+str(subrows)+'}{*}{'+HFORMAT+FONTSMALL+' '+i+'}&'+'\n'
        for c in vC:
            S2=S2+HFORMAT+SUBINDEX+'&'
            for cc in df_.columns:
                S2=S2+HFORMAT+FONTSMALL+' '+cc+'&'
        S2=S2[:-1]+'\\\\\cline{2-'+str((len(df.columns)-2)*2+1)+'}'
        for r in df_.index:
            S2=S2+'\multirow{'+str(subrows)+'}{*}{}&'
            for c in vC:
                S2=S2+r+'&'
                for cc in df_.columns:
                    S2=S2+"{}".format(chk(D[(i,c)].loc[r,cc],FLAG))+'&'
            S2=S2[:-1]+'\\\\'
        S2=S2+'\\hline'
    
    return STR+S2+'\\end{tabular}'

Example use of fancy-pivot

BF1.columns=['target problem', 'gender', 'subcohort', 'auc', '\% change']
s=fancyPivot(BF1,INDEX='target problem',COLUMN='gender',SUBINDEX='subcohort',STRL="L{.8in}||L{1in}C{.35in}R{.65in}|L{1in}C{.35in}R{.65in}")

with open("../figfiles/tex/Figures/subcohort.tex", "w") as text_file:
    text_file.write("%s" % s)

python geo libraries

ArcGIS - https://lnkd.in/dgC6sKJH

Cartopy - https://lnkd.in/dc8ijXRg

Contextily - https://lnkd.in/dTdQsmKX

Descartes - https://lnkd.in/dCJykxwW

Fiona - https://lnkd.in/d8sJ3Q5a

Folium - https://lnkd.in/dfSsE-MB

Gdal - https://lnkd.in/dYBJBaAY

Geohash - https://lnkd.in/d_NxJ4_M

Geojson - https://lnkd.in/daGs2WYq

Geopandas - https://lnkd.in/dBTFKKV3

Geopy - https://lnkd.in/dfAzR8Xa

Gevent - http://www.gevent.org

H3 - https://h3geo.org/docs/

OSMnx - https://lnkd.in/dm3pHgUS

PyQGIS - https://lnkd.in/dShWyWVr

PySAL - https://pysal.org

Pydeck - https://lnkd.in/dGBFu-iw

Pyproj - https://lnkd.in/dNG9fdkm

RTree - https://lnkd.in/dURMiYpU

Rasterio - https://lnkd.in/dEMC6ve6

Scikit-mobility - https://lnkd.in/dpHhaX2J

Shapely - https://lnkd.in/d568datK