☰
Current Page
Main Menu
Home
Home
Editing
Python Snippets
Edit
Preview
h1
h2
h3
Keybinding
default
vim
emacs
Markup
BibTeX
Markdown
MediaWiki
Org-mode
Plain Text
Pod
RDoc
reStructuredText
Textile
AsciiDoc
Creole
Help 1
Help 1
Help 1
Help 2
Help 3
Help 4
Help 5
Help 6
Help 7
Help 8
Autosaved text is available. Click the button to restore it.
Restore Text
# AUC estimation using Mann Whitney U ``` import pandas as pd from tqdm import tqdm import glob import pylab as plt import scipy as sp import numpy as np from scipy import stats, optimize, interpolate plt.style.use('ggplot') def calc_U(y_true, y_score,cb=0.99): ''' Calculate AUC and confidence bounds / pvalue on AUC using U test correspondance ''' ZALPHA={0.9:1.645,0.95:1.96,.99:2.58,.999:3.27} n1 = np.sum(y_true==1) n0 = len(y_score)-n1 order = np.argsort(y_score) rank = np.argsort(order) rank += 1 U1 = np.sum(rank[y_true == 1]) - n1*(n1+1)/2 U0 = np.sum(rank[y_true == 0]) - n0*(n0+1)/2 AUC1 = U1/ (n1*n0) AUC0 = U0/ (n1*n0) EU1=n0*n1*0.5 s1=np.sqrt(n0*n1*(n0+n1+1)/12.) U1_z= (U1-EU1)/s1 p = sp.stats.norm.sf(abs(U1_z))*2 #twosided CF=(ZALPHA[cb]*s1)/(n1*n0) if AUC1>AUC0: return AUC1, p,U1,U1_z,CF return AUC0, p0,U0,U0_z,CF ``` # confidence bound with lowess ``` #dataframe: index (float), csingle column with float import scipy.stats as stats import statsmodels.api as sm from scipy import interpolate from scipy.interpolate import interp1d lowess = sm.nonparametric.lowess df.index=df.index.astype(float) df=df.sort_index() S=.8 RUNS=10 DF=df for i in range(RUNS): X=df.sample(int(np.round(S*df.index.size))).sort_index().reset_index().values z = lowess(X[:,1], X[:,0], frac= .25) f = interp1d(z[:,0], z[:,1]) Y=f(X[:,0]) DF=DF.join(pd.DataFrame(Y,X[:,0]),rsuffix=str(i)) DF=DF.interpolate(method='index',order=3,limit_direction='both') STD=pd.DataFrame(2.62*(DF.std(axis=1)/np.sqrt(RUNS))) Mf=pd.DataFrame(DF.mean(axis=1)).assign(sm=STD) Mf.columns=['mn','sm'] ax=Mf.mn.plot() plt.fill_between(Mf.index,Mf.mn-Mf.sm,Mf.mn+Mf.sm,alpha=.5) ax.set_xlabel('mean number of items') ax.set_ylabel('mean accuracy') ``` # ICD10 API ``` https://clinicaltables.nlm.nih.gov/api/icd10cm/v3/search?sf=code,name&terms=M35 ``` ## Getting all infection codes in ICD 10 ``` https://clinicaltables.nlm.nih.gov/api/icd10cm/v3/search?sf=code,name&terms=infect&maxList=716 ``` Note: the default maxList is 7 ## ear ``` https://clinicaltables.nlm.nih.gov/api/icd10cm/v3/search?sf=code,name&terms=otiti&maxList=716 ``` ## intestine ``` https://clinicaltables.nlm.nih.gov/api/icd10cm/v3/search?sf=code,name&terms=intest&maxList=716 ``` # Writing Latex Tables from Pandas ``` import pandas as pd import numpy as np DUMMY=False STRA='L{1in}|L{1.25in}|L{1.25in}|L{1.5in}|L{.3in}|L{.3in}' def texTable(df,tabname='tmp.tex',FORMAT='%1.2f',INDEX=True,DUMMY=DUMMY,USE_l=False, TABFORMAT=None,LNTERM='\\\\\\cline{2-5}\n'): ''' write latex table ''' if DUMMY: return if INDEX: df=df.reset_index() columns=df.columns df.columns=[x.replace('_','\\_').replace('\_\_','_') for x in columns] for col in df.columns: if df[col].dtype == 'object': df[col]=df[col].str.replace('_','\\_') if USE_l: TABFORMAT='l'*len(df.columns) else: if TABFORMAT is None: TABFORMAT='L{1in}|'*len(df.columns) TABFORMAT=TABFORMAT[:-1] STR='\\begin{tabular}{'+TABFORMAT+'}\\hline\n' with open(tabname,'w') as f: f.write(STR) df.to_csv(tabname,float_format=FORMAT, line_terminator=LNTERM, sep='&',quotechar=' ',index=None,mode='a') with open(tabname,'a') as f: f.write('\\hline\\end{tabular}\n') ``` # Saving Publication Quality Figures from Matplotlib ``` def saveFIG(filename='tmp.pdf',AXIS=False): ''' save fig for publication ''' import pylab as plt plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0) plt.margins(0,0) if not AXIS: plt.gca().xaxis.set_major_locator(plt.NullLocator()) plt.gca().yaxis.set_major_locator(plt.NullLocator()) plt.savefig(filename,dpi=300, bbox_inches = 'tight', pad_inches = 0,transparent=True) return ``` # ICD CODE LOOKUP ``` #!/usr/bin/python import os import sys sys.path.append('../../../../pycode/') from tqdm import tqdm import pandas as pd import numpy as np import glob import pylab as plt import subprocess import urllib import json import tempfile import argparse from argparse import RawTextHelpFormatter import re DUMMY=False STRA='L{1in}|L{1.25in}|L{1.25in}|L{1.5in}|L{.3in}|L{.3in}' def texTable(df,tabname='tmp.tex',FORMAT='%1.2f',INDEX=True,DUMMY=DUMMY,USE_l=False): ''' write latex table ''' if DUMMY: return if INDEX: df=df.reset_index() columns=df.columns df.columns=[str(x).replace('_','\\_') for x in columns] for col in df.columns: if df[col].dtype == 'object': df[col]=df[col].str.replace('_','\\_') if USE_l: TABFORMAT='l'*len(df.columns) else: TABFORMAT='L{1in}|'*len(df.columns) TABFORMAT=TABFORMAT[:-1] STR='\\begin{tabular}{'+TABFORMAT+'}\n' with open(tabname,'w') as f: f.write(STR) df.to_csv(tabname,float_format=FORMAT, line_terminator='\\\\\\hline\n', sep='&',quotechar=' ',index=None,mode='a') with open(tabname,'a') as f: f.write('\\end{tabular}\n') banner='GENERATE TABLE OF ICD DESCRIZPTIONS IN LATEX FORMAT BY LOOKING UP NAMES FROM WEB' zed='copyright 2020 zed.uchicago.edu' def str2bool(v): if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: raise argparse.ArgumentTypeError('Boolean value expected.') ICDDESC='../figfiles/ICD9desc' NAME_TYPE='short_name' parser = argparse.ArgumentParser(description='########\n'+banner+'\n'+zed,formatter_class=RawTextHelpFormatter) parser._optionals.title="Program Options" parser.add_argument('-list', metavar="", nargs='+',dest='CODELIST', action="store", type=str, default=[], help="code list") parser.add_argument('-codefile', metavar="",dest='CODEFILE', action="store", type=str, default=None, help="code list file") parser.add_argument('-pref', metavar="", dest='FILEPREF', action="store", type=str, default='tmp', help="filepref") parser.add_argument('-nametype', metavar="", dest='NAME_TYPE', action="store", type=str, default='short_name', help="short or long names") parser.add_argument('-fast', metavar="", dest='FAST', action="store", type=str2bool, default=False, help="fast comp no web lookup of codes") parser.add_argument('-dictsave', metavar="", dest='SAVE', action="store", type=str2bool, default=False, help="save code dict to avoid future lookups") if len(sys.argv[1:])==0: parser.print_help() parser.exit() args=parser.parse_args() fpref=args.FILEPREF CODELIST=args.CODELIST NAME_TYPE=args.NAME_TYPE CODEFILE=args.CODEFILE SAVE=args.SAVE if CODEFILE is not None: with open(CODEFILE) as fp: CODELIST=CODELIST+fp.readline().split() CODELIST=list(set(CODELIST)) if not os.path.isfile('ICDCODEDICT_.json'): ICDCODEDICT={} else: ICDCODEDICT=json.load(open("ICDCODEDICT_.json")) def getICDdesc(code,NUM_MAX=1): names=[] if code[0] == 'E': code=code.replace(".","") if code in ICDCODEDICT.keys(): return ICDCODEDICT[code] f = tempfile.NamedTemporaryFile() url='https://clinicaltables.nlm.nih.gov/api/icd9cm_dx/v3/search?terms='+code+'&ef='+NAME_TYPE urllib.urlretrieve(url, filename=f.name) with open(f.name) as json_file: data = json.load(json_file) if data[0]>0: for i in range(min([NUM_MAX, len(data[2][NAME_TYPE])] )): names=np.append(names,str(data[2][NAME_TYPE][i]).strip()) f.close() if len(names) > 0: if NUM_MAX == 1: names=names[0].replace('&','\&').replace('<','less') else: names=None ICDCODEDICT[code]=names return names codeDICT={str(code):getICDdesc(code) for code in tqdm(CODELIST)} df=pd.DataFrame.from_dict(codeDICT,orient='index') df.columns=['description'] df.index.name='code' df.to_csv(fpref+'.csv') texTable(df,tabname=fpref+'.tex') if SAVE: json.dump(codeDICT, open( "ICDCODEDICT_.json", 'w' ) ) ``` # PMID To BIBTEX ``` #!/usr/bin/python3 # script by Tommy https://www.biostars.org/u/1945/ import requests import xml.etree.ElementTree as ET import sys import calendar # Parse PubMed IDs from the command line. pmids = sys.argv[1:] ## Fetch XML data from Entrez. efetch = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi' r = requests.get( '{}?db=pubmed&id={}&rettype=abstract'.format(efetch, ','.join(pmids))) ##print(r.text) ## Loop over the PubMed IDs and parse the XML. root = ET.fromstring(r.text) for PubmedArticle in root.iter('PubmedArticle'): PMID = PubmedArticle.find('./MedlineCitation/PMID') ISSN = PubmedArticle.find('./MedlineCitation/Article/Journal/ISSN') Volume = PubmedArticle.find('./MedlineCitation/Article/Journal/JournalIssue/Volume') Issue = PubmedArticle.find('./MedlineCitation/Article/Journal/JournalIssue/Issue') Year = PubmedArticle.find('./MedlineCitation/Article/Journal/JournalIssue/PubDate/Year') Month = PubmedArticle.find('./MedlineCitation/Article/Journal/JournalIssue/PubDate/Month') ## Year = PubmedArticle.find('./MedlineCitation/Article/ArticleDate/Year') ## Month = PubmedArticle.find('./MedlineCitation/Article/ArticleDate/Month') Title = PubmedArticle.find('./MedlineCitation/Article/Journal/Title') ArticleTitle = PubmedArticle.find('./MedlineCitation/Article/ArticleTitle') MedlinePgn = PubmedArticle.find('./MedlineCitation/Article/Pagination/MedlinePgn') Abstract = PubmedArticle.find('./MedlineCitation/Article/Abstract/AbstractText') authors = [] for Author in PubmedArticle.iter('Author'): try: LastName = Author.find('LastName').text ForeName = Author.find('ForeName').text except AttributeError: # e.g. CollectiveName continue authors.append('{}, {}'.format(LastName, ForeName)) ## Use InvestigatorList instead of AuthorList if len(authors) == 0: ## './MedlineCitation/Article/Journal/InvestigatorList' for Investigator in PubmedArticle.iter('Investigator'): try: LastName = Investigator.find('LastName').text ForeName = Investigator.find('ForeName').text except AttributeError: # e.g. CollectiveName continue authors.append('{}, {}'.format(LastName, ForeName)) if Year is None: _ = PubmedArticle.find('./MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate') Year = _.text[:4] Month = '{:02d}'.format(list(calendar.month_abbr).index(_.text[5:8])) else: Year = Year.text if Month is not None: Month = Month.text try: for _ in (PMID.text, Volume.text, Title.text, ArticleTitle.text, MedlinePgn.text, Abstract.text, ''.join(authors)): ## assert '"' not in _, _ if _ is None: continue assert '{' not in _, _ assert '}' not in _, _ except AttributeError: pass ## Print the bibtex formatted output. try: print('@Article{{{}{}pmid{},'.format( authors[0].split(',')[0], Year, PMID.text)) except IndexError: print('IndexError', pmids, file=sys.stderr, flush=True) except AttributeError: print('AttributeError', pmids, file=sys.stderr, flush=True) print(' Author="{}",'.format(' AND '.join(authors))) print(' Title={{{}}},'.format(ArticleTitle.text)) print(' Journal={{{}}},'.format(Title.text)) print(' Year={{{}}},'.format(Year)) if Volume is not None: print(' Volume={{{}}},'.format(Volume.text)) if Issue is not None: print(' Number={{{}}},'.format(Issue.text)) if MedlinePgn is not None: print(' Pages={{{}}},'.format(MedlinePgn.text)) if Month is not None: print(' Month={{{}}},'.format(Month)) if Abstract is not None: print(' Abstract={{{}}},'.format(Abstract.text)) print(' ISSN={{{}}},'.format(ISSN.text)) print('}') ``` # Fancy Pivot to Latex ``` def chk(val,FLAG=True): if FLAG: if not isinstance(val, str): val=str(val) if len(val) > 4: return val[:4] return val def fancyPivot(df,INDEX=None,COLUMN=None, SUBINDEX=None,STRL=None,FLAG=False, HFORMAT='\\bf\\sffamily ', FONTSMALL='\\fontsize{8}{8}\\selectfont'): vI=df[INDEX].value_counts().index.values vC=df[COLUMN].value_counts().index.values D={} for i in vI: for c in vC: df_=df[(df[INDEX]==i) & (df[COLUMN]==c)].drop([INDEX,COLUMN],axis=1).set_index(SUBINDEX) df_.index.name=SUBINDEX df_=df_.sort_index() D[(i,c)]=df_ subcols=len(df_.columns)+1 subrows=len(df_.index.values)+1 Tcols=len(vC)*subcols+1 Trows=len(vI)*subrows STR='\\begin{tabular}{' if STRL is None: STR=STR+'L{1in}||' for i in np.arange(len(vC)): STR=STR+'L{.7in}'*(subcols) +'|' STR=STR[:-1]+'}' else: STR=STR+STRL+'}' S2='\\hline' for i in vC: S2=S2+'&'+'\multicolumn{'+str(subcols)+'}{c}{'+HFORMAT+FONTSMALL+' '+i+'}' S2=S2+'\\\\\\cline{2-'+str((len(df.columns)-2)*2+1)+'}' for i in vI: S2=S2+'\multirow{'+str(subrows)+'}{*}{'+HFORMAT+FONTSMALL+' '+i+'}&'+'\n' for c in vC: S2=S2+HFORMAT+SUBINDEX+'&' for cc in df_.columns: S2=S2+HFORMAT+FONTSMALL+' '+cc+'&' S2=S2[:-1]+'\\\\\cline{2-'+str((len(df.columns)-2)*2+1)+'}' for r in df_.index: S2=S2+'\multirow{'+str(subrows)+'}{*}{}&' for c in vC: S2=S2+r+'&' for cc in df_.columns: S2=S2+"{}".format(chk(D[(i,c)].loc[r,cc],FLAG))+'&' S2=S2[:-1]+'\\\\' S2=S2+'\\hline' return STR+S2+'\\end{tabular}' ``` ## Example use of fancy-pivot ``` BF1.columns=['target problem', 'gender', 'subcohort', 'auc', '\% change'] s=fancyPivot(BF1,INDEX='target problem',COLUMN='gender',SUBINDEX='subcohort',STRL="L{.8in}||L{1in}C{.35in}R{.65in}|L{1in}C{.35in}R{.65in}") with open("../figfiles/tex/Figures/subcohort.tex", "w") as text_file: text_file.write("%s" % s) ``` ## python geo libraries ArcGIS - https://lnkd.in/dgC6sKJH Cartopy - https://lnkd.in/dc8ijXRg Contextily - https://lnkd.in/dTdQsmKX Descartes - https://lnkd.in/dCJykxwW Fiona - https://lnkd.in/d8sJ3Q5a Folium - https://lnkd.in/dfSsE-MB Gdal - https://lnkd.in/dYBJBaAY Geohash - https://lnkd.in/d_NxJ4_M Geojson - https://lnkd.in/daGs2WYq Geopandas - https://lnkd.in/dBTFKKV3 Geopy - https://lnkd.in/dfAzR8Xa Gevent - http://www.gevent.org H3 - https://h3geo.org/docs/ OSMnx - https://lnkd.in/dm3pHgUS PyQGIS - https://lnkd.in/dShWyWVr PySAL - https://pysal.org Pydeck - https://lnkd.in/dGBFu-iw Pyproj - https://lnkd.in/dNG9fdkm RTree - https://lnkd.in/dURMiYpU Rasterio - https://lnkd.in/dEMC6ve6 Scikit-mobility - https://lnkd.in/dpHhaX2J Shapely - https://lnkd.in/d568datK
Uploading file...
Header
### Wiki for Zero Knowledge Discovery, University of Kentucky, Division of Biomedical Informatics [zeroknowledgediscovery.org/wiki](http://34.66.189.202:4567/) <img src="../../logo1.png" alt="drawing" style="width:220px;"/> ---
Sidebar
[[_TOC_|levels = 2]]
Edit message:
Cancel