0090-drm-vc4-Add-suport-for-3D-rendering-using-the-V3D-en.patch 163 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558
  1. From d6b537db6d9208d3d512aac65d03a47913787576 Mon Sep 17 00:00:00 2001
  2. From: Eric Anholt <eric@anholt.net>
  3. Date: Mon, 2 Mar 2015 13:01:12 -0800
  4. Subject: [PATCH 090/381] drm/vc4: Add suport for 3D rendering using the V3D
  5. engine.
  6. This is a squash of the out-of-tree development series. Since that
  7. series contained code from the first "get a demo triangle rendered
  8. using a hacked up driver using binary shader code" to "plug the last
  9. known security hole", it's hard to reconstruct a different series of
  10. incremental development that's mergeable without security holes
  11. throughout it.
  12. Signed-off-by: Eric Anholt <eric@anholt.net>
  13. ---
  14. drivers/gpu/drm/vc4/Makefile | 11 +-
  15. drivers/gpu/drm/vc4/vc4_bo.c | 476 +++++++++++++-
  16. drivers/gpu/drm/vc4/vc4_crtc.c | 98 ++-
  17. drivers/gpu/drm/vc4/vc4_debugfs.c | 3 +
  18. drivers/gpu/drm/vc4/vc4_drv.c | 45 +-
  19. drivers/gpu/drm/vc4/vc4_drv.h | 317 ++++++++++
  20. drivers/gpu/drm/vc4/vc4_gem.c | 686 +++++++++++++++++++++
  21. drivers/gpu/drm/vc4/vc4_irq.c | 211 +++++++
  22. drivers/gpu/drm/vc4/vc4_kms.c | 148 ++++-
  23. drivers/gpu/drm/vc4/vc4_packet.h | 384 ++++++++++++
  24. drivers/gpu/drm/vc4/vc4_plane.c | 40 ++
  25. drivers/gpu/drm/vc4/vc4_qpu_defines.h | 268 ++++++++
  26. drivers/gpu/drm/vc4/vc4_render_cl.c | 448 ++++++++++++++
  27. drivers/gpu/drm/vc4/vc4_trace.h | 63 ++
  28. drivers/gpu/drm/vc4/vc4_trace_points.c | 14 +
  29. drivers/gpu/drm/vc4/vc4_v3d.c | 268 ++++++++
  30. drivers/gpu/drm/vc4/vc4_validate.c | 958 +++++++++++++++++++++++++++++
  31. drivers/gpu/drm/vc4/vc4_validate_shaders.c | 521 ++++++++++++++++
  32. include/uapi/drm/vc4_drm.h | 229 +++++++
  33. 19 files changed, 5173 insertions(+), 15 deletions(-)
  34. create mode 100644 drivers/gpu/drm/vc4/vc4_gem.c
  35. create mode 100644 drivers/gpu/drm/vc4/vc4_irq.c
  36. create mode 100644 drivers/gpu/drm/vc4/vc4_packet.h
  37. create mode 100644 drivers/gpu/drm/vc4/vc4_qpu_defines.h
  38. create mode 100644 drivers/gpu/drm/vc4/vc4_render_cl.c
  39. create mode 100644 drivers/gpu/drm/vc4/vc4_trace.h
  40. create mode 100644 drivers/gpu/drm/vc4/vc4_trace_points.c
  41. create mode 100644 drivers/gpu/drm/vc4/vc4_v3d.c
  42. create mode 100644 drivers/gpu/drm/vc4/vc4_validate.c
  43. create mode 100644 drivers/gpu/drm/vc4/vc4_validate_shaders.c
  44. create mode 100644 include/uapi/drm/vc4_drm.h
  45. --- a/drivers/gpu/drm/vc4/Makefile
  46. +++ b/drivers/gpu/drm/vc4/Makefile
  47. @@ -8,10 +8,19 @@ vc4-y := \
  48. vc4_crtc.o \
  49. vc4_drv.o \
  50. vc4_kms.o \
  51. + vc4_gem.o \
  52. vc4_hdmi.o \
  53. vc4_hvs.o \
  54. - vc4_plane.o
  55. + vc4_irq.o \
  56. + vc4_plane.o \
  57. + vc4_render_cl.o \
  58. + vc4_trace_points.o \
  59. + vc4_v3d.o \
  60. + vc4_validate.o \
  61. + vc4_validate_shaders.o
  62. vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
  63. obj-$(CONFIG_DRM_VC4) += vc4.o
  64. +
  65. +CFLAGS_vc4_trace_points.o := -I$(src)
  66. --- a/drivers/gpu/drm/vc4/vc4_bo.c
  67. +++ b/drivers/gpu/drm/vc4/vc4_bo.c
  68. @@ -15,16 +15,174 @@
  69. */
  70. #include "vc4_drv.h"
  71. +#include "uapi/drm/vc4_drm.h"
  72. -struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size)
  73. +static void vc4_bo_stats_dump(struct vc4_dev *vc4)
  74. {
  75. + DRM_INFO("num bos allocated: %d\n",
  76. + vc4->bo_stats.num_allocated);
  77. + DRM_INFO("size bos allocated: %dkb\n",
  78. + vc4->bo_stats.size_allocated / 1024);
  79. + DRM_INFO("num bos used: %d\n",
  80. + vc4->bo_stats.num_allocated - vc4->bo_stats.num_cached);
  81. + DRM_INFO("size bos used: %dkb\n",
  82. + (vc4->bo_stats.size_allocated -
  83. + vc4->bo_stats.size_cached) / 1024);
  84. + DRM_INFO("num bos cached: %d\n",
  85. + vc4->bo_stats.num_cached);
  86. + DRM_INFO("size bos cached: %dkb\n",
  87. + vc4->bo_stats.size_cached / 1024);
  88. +}
  89. +
  90. +static uint32_t bo_page_index(size_t size)
  91. +{
  92. + return (size / PAGE_SIZE) - 1;
  93. +}
  94. +
  95. +/* Must be called with bo_lock held. */
  96. +static void vc4_bo_destroy(struct vc4_bo *bo)
  97. +{
  98. + struct drm_gem_object *obj = &bo->base.base;
  99. + struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
  100. +
  101. + if (bo->validated_shader) {
  102. + kfree(bo->validated_shader->texture_samples);
  103. + kfree(bo->validated_shader);
  104. + bo->validated_shader = NULL;
  105. + }
  106. +
  107. + vc4->bo_stats.num_allocated--;
  108. + vc4->bo_stats.size_allocated -= obj->size;
  109. + drm_gem_cma_free_object(obj);
  110. +}
  111. +
  112. +/* Must be called with bo_lock held. */
  113. +static void vc4_bo_remove_from_cache(struct vc4_bo *bo)
  114. +{
  115. + struct drm_gem_object *obj = &bo->base.base;
  116. + struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
  117. +
  118. + vc4->bo_stats.num_cached--;
  119. + vc4->bo_stats.size_cached -= obj->size;
  120. +
  121. + list_del(&bo->unref_head);
  122. + list_del(&bo->size_head);
  123. +}
  124. +
  125. +static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev,
  126. + size_t size)
  127. +{
  128. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  129. + uint32_t page_index = bo_page_index(size);
  130. +
  131. + if (vc4->bo_cache.size_list_size <= page_index) {
  132. + uint32_t new_size = max(vc4->bo_cache.size_list_size * 2,
  133. + page_index + 1);
  134. + struct list_head *new_list;
  135. + uint32_t i;
  136. +
  137. + new_list = kmalloc(new_size * sizeof(struct list_head),
  138. + GFP_KERNEL);
  139. + if (!new_list)
  140. + return NULL;
  141. +
  142. + /* Rebase the old cached BO lists to their new list
  143. + * head locations.
  144. + */
  145. + for (i = 0; i < vc4->bo_cache.size_list_size; i++) {
  146. + struct list_head *old_list = &vc4->bo_cache.size_list[i];
  147. + if (list_empty(old_list))
  148. + INIT_LIST_HEAD(&new_list[i]);
  149. + else
  150. + list_replace(old_list, &new_list[i]);
  151. + }
  152. + /* And initialize the brand new BO list heads. */
  153. + for (i = vc4->bo_cache.size_list_size; i < new_size; i++)
  154. + INIT_LIST_HEAD(&new_list[i]);
  155. +
  156. + kfree(vc4->bo_cache.size_list);
  157. + vc4->bo_cache.size_list = new_list;
  158. + vc4->bo_cache.size_list_size = new_size;
  159. + }
  160. +
  161. + return &vc4->bo_cache.size_list[page_index];
  162. +}
  163. +
  164. +void vc4_bo_cache_purge(struct drm_device *dev)
  165. +{
  166. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  167. +
  168. + spin_lock(&vc4->bo_lock);
  169. + while (!list_empty(&vc4->bo_cache.time_list)) {
  170. + struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
  171. + struct vc4_bo, unref_head);
  172. + vc4_bo_remove_from_cache(bo);
  173. + vc4_bo_destroy(bo);
  174. + }
  175. + spin_unlock(&vc4->bo_lock);
  176. +}
  177. +
  178. +struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size)
  179. +{
  180. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  181. + uint32_t size = roundup(unaligned_size, PAGE_SIZE);
  182. + uint32_t page_index = bo_page_index(size);
  183. struct drm_gem_cma_object *cma_obj;
  184. + int pass;
  185. - cma_obj = drm_gem_cma_create(dev, size);
  186. - if (IS_ERR(cma_obj))
  187. + if (size == 0)
  188. return NULL;
  189. - else
  190. - return to_vc4_bo(&cma_obj->base);
  191. +
  192. + /* First, try to get a vc4_bo from the kernel BO cache. */
  193. + spin_lock(&vc4->bo_lock);
  194. + if (page_index < vc4->bo_cache.size_list_size &&
  195. + !list_empty(&vc4->bo_cache.size_list[page_index])) {
  196. + struct vc4_bo *bo =
  197. + list_first_entry(&vc4->bo_cache.size_list[page_index],
  198. + struct vc4_bo, size_head);
  199. + vc4_bo_remove_from_cache(bo);
  200. + spin_unlock(&vc4->bo_lock);
  201. + kref_init(&bo->base.base.refcount);
  202. + return bo;
  203. + }
  204. + spin_unlock(&vc4->bo_lock);
  205. +
  206. + /* Otherwise, make a new BO. */
  207. + for (pass = 0; ; pass++) {
  208. + cma_obj = drm_gem_cma_create(dev, size);
  209. + if (!IS_ERR(cma_obj))
  210. + break;
  211. +
  212. + switch (pass) {
  213. + case 0:
  214. + /*
  215. + * If we've run out of CMA memory, kill the cache of
  216. + * CMA allocations we've got laying around and try again.
  217. + */
  218. + vc4_bo_cache_purge(dev);
  219. + break;
  220. + case 1:
  221. + /*
  222. + * Getting desperate, so try to wait for any
  223. + * previous rendering to finish, free its
  224. + * unreferenced BOs to the cache, and then
  225. + * free the cache.
  226. + */
  227. + vc4_wait_for_seqno(dev, vc4->emit_seqno, ~0ull, true);
  228. + vc4_job_handle_completed(vc4);
  229. + vc4_bo_cache_purge(dev);
  230. + break;
  231. + case 3:
  232. + DRM_ERROR("Failed to allocate from CMA:\n");
  233. + vc4_bo_stats_dump(vc4);
  234. + return NULL;
  235. + }
  236. + }
  237. +
  238. + vc4->bo_stats.num_allocated++;
  239. + vc4->bo_stats.size_allocated += size;
  240. +
  241. + return to_vc4_bo(&cma_obj->base);
  242. }
  243. int vc4_dumb_create(struct drm_file *file_priv,
  244. @@ -41,7 +199,129 @@ int vc4_dumb_create(struct drm_file *fil
  245. if (args->size < args->pitch * args->height)
  246. args->size = args->pitch * args->height;
  247. - bo = vc4_bo_create(dev, roundup(args->size, PAGE_SIZE));
  248. + bo = vc4_bo_create(dev, args->size);
  249. + if (!bo)
  250. + return -ENOMEM;
  251. +
  252. + ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
  253. + drm_gem_object_unreference_unlocked(&bo->base.base);
  254. +
  255. + return ret;
  256. +}
  257. +
  258. +static void
  259. +vc4_bo_cache_free_old(struct drm_device *dev)
  260. +{
  261. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  262. + unsigned long expire_time = jiffies - msecs_to_jiffies(1000);
  263. +
  264. + spin_lock(&vc4->bo_lock);
  265. + while (!list_empty(&vc4->bo_cache.time_list)) {
  266. + struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
  267. + struct vc4_bo, unref_head);
  268. + if (time_before(expire_time, bo->free_time)) {
  269. + mod_timer(&vc4->bo_cache.time_timer,
  270. + round_jiffies_up(jiffies +
  271. + msecs_to_jiffies(1000)));
  272. + spin_unlock(&vc4->bo_lock);
  273. + return;
  274. + }
  275. +
  276. + vc4_bo_remove_from_cache(bo);
  277. + vc4_bo_destroy(bo);
  278. + }
  279. + spin_unlock(&vc4->bo_lock);
  280. +}
  281. +
  282. +/* Called on the last userspace/kernel unreference of the BO. Returns
  283. + * it to the BO cache if possible, otherwise frees it.
  284. + *
  285. + * Note that this is called with the struct_mutex held.
  286. + */
  287. +void vc4_free_object(struct drm_gem_object *gem_bo)
  288. +{
  289. + struct drm_device *dev = gem_bo->dev;
  290. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  291. + struct vc4_bo *bo = to_vc4_bo(gem_bo);
  292. + struct list_head *cache_list;
  293. +
  294. + /* If the object references someone else's memory, we can't cache it.
  295. + */
  296. + if (gem_bo->import_attach) {
  297. + vc4_bo_destroy(bo);
  298. + return;
  299. + }
  300. +
  301. + /* Don't cache if it was publicly named. */
  302. + if (gem_bo->name) {
  303. + vc4_bo_destroy(bo);
  304. + return;
  305. + }
  306. +
  307. + spin_lock(&vc4->bo_lock);
  308. + cache_list = vc4_get_cache_list_for_size(dev, gem_bo->size);
  309. + if (!cache_list) {
  310. + vc4_bo_destroy(bo);
  311. + spin_unlock(&vc4->bo_lock);
  312. + return;
  313. + }
  314. +
  315. + if (bo->validated_shader) {
  316. + kfree(bo->validated_shader->texture_samples);
  317. + kfree(bo->validated_shader);
  318. + bo->validated_shader = NULL;
  319. + }
  320. +
  321. + bo->free_time = jiffies;
  322. + list_add(&bo->size_head, cache_list);
  323. + list_add(&bo->unref_head, &vc4->bo_cache.time_list);
  324. +
  325. + vc4->bo_stats.num_cached++;
  326. + vc4->bo_stats.size_cached += gem_bo->size;
  327. + spin_unlock(&vc4->bo_lock);
  328. +
  329. + vc4_bo_cache_free_old(dev);
  330. +}
  331. +
  332. +static void vc4_bo_cache_time_work(struct work_struct *work)
  333. +{
  334. + struct vc4_dev *vc4 =
  335. + container_of(work, struct vc4_dev, bo_cache.time_work);
  336. + struct drm_device *dev = vc4->dev;
  337. +
  338. + vc4_bo_cache_free_old(dev);
  339. +}
  340. +
  341. +static void vc4_bo_cache_time_timer(unsigned long data)
  342. +{
  343. + struct drm_device *dev = (struct drm_device *)data;
  344. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  345. +
  346. + schedule_work(&vc4->bo_cache.time_work);
  347. +}
  348. +
  349. +struct dma_buf *
  350. +vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
  351. +{
  352. + struct vc4_bo *bo = to_vc4_bo(obj);
  353. +
  354. + if (bo->validated_shader) {
  355. + DRM_ERROR("Attempting to export shader BO\n");
  356. + return ERR_PTR(-EINVAL);
  357. + }
  358. +
  359. + return drm_gem_prime_export(dev, obj, flags);
  360. +}
  361. +
  362. +int
  363. +vc4_create_bo_ioctl(struct drm_device *dev, void *data,
  364. + struct drm_file *file_priv)
  365. +{
  366. + struct drm_vc4_create_bo *args = data;
  367. + struct vc4_bo *bo = NULL;
  368. + int ret;
  369. +
  370. + bo = vc4_bo_create(dev, args->size);
  371. if (!bo)
  372. return -ENOMEM;
  373. @@ -50,3 +330,187 @@ int vc4_dumb_create(struct drm_file *fil
  374. return ret;
  375. }
  376. +
  377. +int
  378. +vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
  379. + struct drm_file *file_priv)
  380. +{
  381. + struct drm_vc4_create_shader_bo *args = data;
  382. + struct vc4_bo *bo = NULL;
  383. + int ret;
  384. +
  385. + if (args->size == 0)
  386. + return -EINVAL;
  387. +
  388. + if (args->size % sizeof(u64) != 0)
  389. + return -EINVAL;
  390. +
  391. + if (args->flags != 0) {
  392. + DRM_INFO("Unknown flags set: 0x%08x\n", args->flags);
  393. + return -EINVAL;
  394. + }
  395. +
  396. + if (args->pad != 0) {
  397. + DRM_INFO("Pad set: 0x%08x\n", args->pad);
  398. + return -EINVAL;
  399. + }
  400. +
  401. + bo = vc4_bo_create(dev, args->size);
  402. + if (!bo)
  403. + return -ENOMEM;
  404. +
  405. + ret = copy_from_user(bo->base.vaddr,
  406. + (void __user *)(uintptr_t)args->data,
  407. + args->size);
  408. + if (ret != 0)
  409. + goto fail;
  410. +
  411. + bo->validated_shader = vc4_validate_shader(&bo->base);
  412. + if (!bo->validated_shader) {
  413. + ret = -EINVAL;
  414. + goto fail;
  415. + }
  416. +
  417. + /* We have to create the handle after validation, to avoid
  418. + * races for users to do doing things like mmap the shader BO.
  419. + */
  420. + ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
  421. +
  422. + fail:
  423. + drm_gem_object_unreference_unlocked(&bo->base.base);
  424. +
  425. + return ret;
  426. +}
  427. +
  428. +int
  429. +vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
  430. + struct drm_file *file_priv)
  431. +{
  432. + struct drm_vc4_mmap_bo *args = data;
  433. + struct drm_gem_object *gem_obj;
  434. +
  435. + gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
  436. + if (!gem_obj) {
  437. + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
  438. + return -EINVAL;
  439. + }
  440. +
  441. + /* The mmap offset was set up at BO allocation time. */
  442. + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
  443. +
  444. + drm_gem_object_unreference(gem_obj);
  445. + return 0;
  446. +}
  447. +
  448. +int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
  449. +{
  450. + struct drm_gem_object *gem_obj;
  451. + struct vc4_bo *bo;
  452. + int ret;
  453. +
  454. + ret = drm_gem_mmap(filp, vma);
  455. + if (ret)
  456. + return ret;
  457. +
  458. + gem_obj = vma->vm_private_data;
  459. + bo = to_vc4_bo(gem_obj);
  460. +
  461. + if (bo->validated_shader) {
  462. + DRM_ERROR("mmaping of shader BOs not allowed.\n");
  463. + return -EINVAL;
  464. + }
  465. +
  466. + /*
  467. + * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
  468. + * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
  469. + * the whole buffer.
  470. + */
  471. + vma->vm_flags &= ~VM_PFNMAP;
  472. + vma->vm_pgoff = 0;
  473. +
  474. + ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
  475. + bo->base.vaddr, bo->base.paddr,
  476. + vma->vm_end - vma->vm_start);
  477. + if (ret)
  478. + drm_gem_vm_close(vma);
  479. +
  480. + return ret;
  481. +}
  482. +
  483. +int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
  484. +{
  485. + struct vc4_bo *bo = to_vc4_bo(obj);
  486. +
  487. + if (bo->validated_shader) {
  488. + DRM_ERROR("mmaping of shader BOs not allowed.\n");
  489. + return -EINVAL;
  490. + }
  491. +
  492. + return drm_gem_cma_prime_mmap(obj, vma);
  493. +}
  494. +
  495. +void *vc4_prime_vmap(struct drm_gem_object *obj)
  496. +{
  497. + struct vc4_bo *bo = to_vc4_bo(obj);
  498. +
  499. + if (bo->validated_shader) {
  500. + DRM_ERROR("mmaping of shader BOs not allowed.\n");
  501. + return ERR_PTR(-EINVAL);
  502. + }
  503. +
  504. + return drm_gem_cma_prime_vmap(obj);
  505. +}
  506. +
  507. +void vc4_bo_cache_init(struct drm_device *dev)
  508. +{
  509. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  510. +
  511. + spin_lock_init(&vc4->bo_lock);
  512. +
  513. + INIT_LIST_HEAD(&vc4->bo_cache.time_list);
  514. +
  515. + INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work);
  516. + setup_timer(&vc4->bo_cache.time_timer,
  517. + vc4_bo_cache_time_timer,
  518. + (unsigned long) dev);
  519. +}
  520. +
  521. +void vc4_bo_cache_destroy(struct drm_device *dev)
  522. +{
  523. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  524. +
  525. + del_timer(&vc4->bo_cache.time_timer);
  526. + cancel_work_sync(&vc4->bo_cache.time_work);
  527. +
  528. + vc4_bo_cache_purge(dev);
  529. +
  530. + if (vc4->bo_stats.num_allocated) {
  531. + DRM_ERROR("Destroying BO cache while BOs still allocated:\n");
  532. + vc4_bo_stats_dump(vc4);
  533. + }
  534. +}
  535. +
  536. +#ifdef CONFIG_DEBUG_FS
  537. +int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
  538. +{
  539. + struct drm_info_node *node = (struct drm_info_node *) m->private;
  540. + struct drm_device *dev = node->minor->dev;
  541. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  542. + struct vc4_bo_stats stats;
  543. +
  544. + spin_lock(&vc4->bo_lock);
  545. + stats = vc4->bo_stats;
  546. + spin_unlock(&vc4->bo_lock);
  547. +
  548. + seq_printf(m, "num bos allocated: %d\n", stats.num_allocated);
  549. + seq_printf(m, "size bos allocated: %dkb\n", stats.size_allocated / 1024);
  550. + seq_printf(m, "num bos used: %d\n", (stats.num_allocated -
  551. + stats.num_cached));
  552. + seq_printf(m, "size bos used: %dkb\n", (stats.size_allocated -
  553. + stats.size_cached) / 1024);
  554. + seq_printf(m, "num bos cached: %d\n", stats.num_cached);
  555. + seq_printf(m, "size bos cached: %dkb\n", stats.size_cached / 1024);
  556. +
  557. + return 0;
  558. +}
  559. +#endif
  560. --- a/drivers/gpu/drm/vc4/vc4_crtc.c
  561. +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
  562. @@ -35,6 +35,7 @@
  563. #include "drm_atomic_helper.h"
  564. #include "drm_crtc_helper.h"
  565. #include "linux/clk.h"
  566. +#include "drm_fb_cma_helper.h"
  567. #include "linux/component.h"
  568. #include "linux/of_device.h"
  569. #include "vc4_drv.h"
  570. @@ -476,10 +477,105 @@ static irqreturn_t vc4_crtc_irq_handler(
  571. return ret;
  572. }
  573. +struct vc4_async_flip_state {
  574. + struct drm_crtc *crtc;
  575. + struct drm_framebuffer *fb;
  576. + struct drm_pending_vblank_event *event;
  577. +
  578. + struct vc4_seqno_cb cb;
  579. +};
  580. +
  581. +/* Called when the V3D execution for the BO being flipped to is done, so that
  582. + * we can actually update the plane's address to point to it.
  583. + */
  584. +static void
  585. +vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
  586. +{
  587. + struct vc4_async_flip_state *flip_state =
  588. + container_of(cb, struct vc4_async_flip_state, cb);
  589. + struct drm_crtc *crtc = flip_state->crtc;
  590. + struct drm_device *dev = crtc->dev;
  591. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  592. + struct drm_plane *plane = crtc->primary;
  593. +
  594. + vc4_plane_async_set_fb(plane, flip_state->fb);
  595. + if (flip_state->event) {
  596. + unsigned long flags;
  597. + spin_lock_irqsave(&dev->event_lock, flags);
  598. + drm_crtc_send_vblank_event(crtc, flip_state->event);
  599. + spin_unlock_irqrestore(&dev->event_lock, flags);
  600. + }
  601. +
  602. + drm_framebuffer_unreference(flip_state->fb);
  603. + kfree(flip_state);
  604. +
  605. + up(&vc4->async_modeset);
  606. +}
  607. +
  608. +/* Implements async (non-vblank-synced) page flips.
  609. + *
  610. + * The page flip ioctl needs to return immediately, so we grab the
  611. + * modeset semaphore on the pipe, and queue the address update for
  612. + * when V3D is done with the BO being flipped to.
  613. + */
  614. +static int vc4_async_page_flip(struct drm_crtc *crtc,
  615. + struct drm_framebuffer *fb,
  616. + struct drm_pending_vblank_event *event,
  617. + uint32_t flags)
  618. +{
  619. + struct drm_device *dev = crtc->dev;
  620. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  621. + struct drm_plane *plane = crtc->primary;
  622. + int ret = 0;
  623. + struct vc4_async_flip_state *flip_state;
  624. + struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
  625. + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
  626. +
  627. + flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
  628. + if (!flip_state)
  629. + return -ENOMEM;
  630. +
  631. + drm_framebuffer_reference(fb);
  632. + flip_state->fb = fb;
  633. + flip_state->crtc = crtc;
  634. + flip_state->event = event;
  635. +
  636. + /* Make sure all other async modesetes have landed. */
  637. + ret = down_interruptible(&vc4->async_modeset);
  638. + if (ret) {
  639. + kfree(flip_state);
  640. + return ret;
  641. + }
  642. +
  643. + /* Immediately update the plane's legacy fb pointer, so that later
  644. + * modeset prep sees the state that will be present when the semaphore
  645. + * is released.
  646. + */
  647. + drm_atomic_set_fb_for_plane(plane->state, fb);
  648. + plane->fb = fb;
  649. +
  650. + vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
  651. + vc4_async_page_flip_complete);
  652. +
  653. + /* Driver takes ownership of state on successful async commit. */
  654. + return 0;
  655. +}
  656. +
  657. +static int vc4_page_flip(struct drm_crtc *crtc,
  658. + struct drm_framebuffer *fb,
  659. + struct drm_pending_vblank_event *event,
  660. + uint32_t flags)
  661. +{
  662. + if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
  663. + return vc4_async_page_flip(crtc, fb, event, flags);
  664. + else
  665. + return drm_atomic_helper_page_flip(crtc, fb, event, flags);
  666. +}
  667. +
  668. static const struct drm_crtc_funcs vc4_crtc_funcs = {
  669. .set_config = drm_atomic_helper_set_config,
  670. .destroy = vc4_crtc_destroy,
  671. - .page_flip = drm_atomic_helper_page_flip,
  672. + .page_flip = vc4_page_flip,
  673. .set_property = NULL,
  674. .cursor_set = NULL, /* handled by drm_mode_cursor_universal */
  675. .cursor_move = NULL, /* handled by drm_mode_cursor_universal */
  676. --- a/drivers/gpu/drm/vc4/vc4_debugfs.c
  677. +++ b/drivers/gpu/drm/vc4/vc4_debugfs.c
  678. @@ -16,11 +16,14 @@
  679. #include "vc4_regs.h"
  680. static const struct drm_info_list vc4_debugfs_list[] = {
  681. + {"bo_stats", vc4_bo_stats_debugfs, 0},
  682. {"hdmi_regs", vc4_hdmi_debugfs_regs, 0},
  683. {"hvs_regs", vc4_hvs_debugfs_regs, 0},
  684. {"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0},
  685. {"crtc1_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)1},
  686. {"crtc2_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)2},
  687. + {"v3d_ident", vc4_v3d_debugfs_ident, 0},
  688. + {"v3d_regs", vc4_v3d_debugfs_regs, 0},
  689. };
  690. #define VC4_DEBUGFS_ENTRIES ARRAY_SIZE(vc4_debugfs_list)
  691. --- a/drivers/gpu/drm/vc4/vc4_drv.c
  692. +++ b/drivers/gpu/drm/vc4/vc4_drv.c
  693. @@ -14,8 +14,10 @@
  694. #include <linux/module.h>
  695. #include <linux/of_platform.h>
  696. #include <linux/platform_device.h>
  697. +#include <soc/bcm2835/raspberrypi-firmware.h>
  698. #include "drm_fb_cma_helper.h"
  699. +#include "uapi/drm/vc4_drm.h"
  700. #include "vc4_drv.h"
  701. #include "vc4_regs.h"
  702. @@ -63,7 +65,7 @@ static const struct file_operations vc4_
  703. .open = drm_open,
  704. .release = drm_release,
  705. .unlocked_ioctl = drm_ioctl,
  706. - .mmap = drm_gem_cma_mmap,
  707. + .mmap = vc4_mmap,
  708. .poll = drm_poll,
  709. .read = drm_read,
  710. #ifdef CONFIG_COMPAT
  711. @@ -73,16 +75,28 @@ static const struct file_operations vc4_
  712. };
  713. static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
  714. + DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
  715. + DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
  716. + DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
  717. + DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
  718. + DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
  719. + DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
  720. };
  721. static struct drm_driver vc4_drm_driver = {
  722. .driver_features = (DRIVER_MODESET |
  723. DRIVER_ATOMIC |
  724. DRIVER_GEM |
  725. + DRIVER_HAVE_IRQ |
  726. DRIVER_PRIME),
  727. .lastclose = vc4_lastclose,
  728. .preclose = vc4_drm_preclose,
  729. + .irq_handler = vc4_irq,
  730. + .irq_preinstall = vc4_irq_preinstall,
  731. + .irq_postinstall = vc4_irq_postinstall,
  732. + .irq_uninstall = vc4_irq_uninstall,
  733. +
  734. .enable_vblank = vc4_enable_vblank,
  735. .disable_vblank = vc4_disable_vblank,
  736. .get_vblank_counter = drm_vblank_count,
  737. @@ -92,18 +106,18 @@ static struct drm_driver vc4_drm_driver
  738. .debugfs_cleanup = vc4_debugfs_cleanup,
  739. #endif
  740. - .gem_free_object = drm_gem_cma_free_object,
  741. + .gem_free_object = vc4_free_object,
  742. .gem_vm_ops = &drm_gem_cma_vm_ops,
  743. .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
  744. .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
  745. .gem_prime_import = drm_gem_prime_import,
  746. - .gem_prime_export = drm_gem_prime_export,
  747. + .gem_prime_export = vc4_prime_export,
  748. .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
  749. .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
  750. - .gem_prime_vmap = drm_gem_cma_prime_vmap,
  751. + .gem_prime_vmap = vc4_prime_vmap,
  752. .gem_prime_vunmap = drm_gem_cma_prime_vunmap,
  753. - .gem_prime_mmap = drm_gem_cma_prime_mmap,
  754. + .gem_prime_mmap = vc4_prime_mmap,
  755. .dumb_create = vc4_dumb_create,
  756. .dumb_map_offset = drm_gem_cma_dumb_map_offset,
  757. @@ -113,6 +127,8 @@ static struct drm_driver vc4_drm_driver
  758. .num_ioctls = ARRAY_SIZE(vc4_drm_ioctls),
  759. .fops = &vc4_drm_fops,
  760. + .gem_obj_size = sizeof(struct vc4_bo),
  761. +
  762. .name = DRIVER_NAME,
  763. .desc = DRIVER_DESC,
  764. .date = DRIVER_DATE,
  765. @@ -153,6 +169,7 @@ static int vc4_drm_bind(struct device *d
  766. struct drm_device *drm;
  767. struct drm_connector *connector;
  768. struct vc4_dev *vc4;
  769. + struct device_node *firmware_node;
  770. int ret = 0;
  771. dev->coherent_dma_mask = DMA_BIT_MASK(32);
  772. @@ -161,6 +178,14 @@ static int vc4_drm_bind(struct device *d
  773. if (!vc4)
  774. return -ENOMEM;
  775. + firmware_node = of_parse_phandle(dev->of_node, "firmware", 0);
  776. + vc4->firmware = rpi_firmware_get(firmware_node);
  777. + if (!vc4->firmware) {
  778. + DRM_DEBUG("Failed to get Raspberry Pi firmware reference.\n");
  779. + return -EPROBE_DEFER;
  780. + }
  781. + of_node_put(firmware_node);
  782. +
  783. drm = drm_dev_alloc(&vc4_drm_driver, dev);
  784. if (!drm)
  785. return -ENOMEM;
  786. @@ -170,13 +195,17 @@ static int vc4_drm_bind(struct device *d
  787. drm_dev_set_unique(drm, dev_name(dev));
  788. + vc4_bo_cache_init(drm);
  789. +
  790. drm_mode_config_init(drm);
  791. if (ret)
  792. goto unref;
  793. + vc4_gem_init(drm);
  794. +
  795. ret = component_bind_all(dev, drm);
  796. if (ret)
  797. - goto unref;
  798. + goto gem_destroy;
  799. ret = drm_dev_register(drm, 0);
  800. if (ret < 0)
  801. @@ -200,8 +229,11 @@ unregister:
  802. drm_dev_unregister(drm);
  803. unbind_all:
  804. component_unbind_all(dev, drm);
  805. +gem_destroy:
  806. + vc4_gem_destroy(drm);
  807. unref:
  808. drm_dev_unref(drm);
  809. + vc4_bo_cache_destroy(drm);
  810. return ret;
  811. }
  812. @@ -228,6 +260,7 @@ static struct platform_driver *const com
  813. &vc4_hdmi_driver,
  814. &vc4_crtc_driver,
  815. &vc4_hvs_driver,
  816. + &vc4_v3d_driver,
  817. };
  818. static int vc4_platform_drm_probe(struct platform_device *pdev)
  819. --- a/drivers/gpu/drm/vc4/vc4_drv.h
  820. +++ b/drivers/gpu/drm/vc4/vc4_drv.h
  821. @@ -15,8 +15,85 @@ struct vc4_dev {
  822. struct vc4_hdmi *hdmi;
  823. struct vc4_hvs *hvs;
  824. struct vc4_crtc *crtc[3];
  825. + struct vc4_v3d *v3d;
  826. struct drm_fbdev_cma *fbdev;
  827. + struct rpi_firmware *firmware;
  828. +
  829. + /* The kernel-space BO cache. Tracks buffers that have been
  830. + * unreferenced by all other users (refcounts of 0!) but not
  831. + * yet freed, so we can do cheap allocations.
  832. + */
  833. + struct vc4_bo_cache {
  834. + /* Array of list heads for entries in the BO cache,
  835. + * based on number of pages, so we can do O(1) lookups
  836. + * in the cache when allocating.
  837. + */
  838. + struct list_head *size_list;
  839. + uint32_t size_list_size;
  840. +
  841. + /* List of all BOs in the cache, ordered by age, so we
  842. + * can do O(1) lookups when trying to free old
  843. + * buffers.
  844. + */
  845. + struct list_head time_list;
  846. + struct work_struct time_work;
  847. + struct timer_list time_timer;
  848. + } bo_cache;
  849. +
  850. + struct vc4_bo_stats {
  851. + u32 num_allocated;
  852. + u32 size_allocated;
  853. + u32 num_cached;
  854. + u32 size_cached;
  855. + } bo_stats;
  856. +
  857. + /* Protects bo_cache and the BO stats. */
  858. + spinlock_t bo_lock;
  859. +
  860. + /* Sequence number for the last job queued in job_list.
  861. + * Starts at 0 (no jobs emitted).
  862. + */
  863. + uint64_t emit_seqno;
  864. +
  865. + /* Sequence number for the last completed job on the GPU.
  866. + * Starts at 0 (no jobs completed).
  867. + */
  868. + uint64_t finished_seqno;
  869. +
  870. + /* List of all struct vc4_exec_info for jobs to be executed.
  871. + * The first job in the list is the one currently programmed
  872. + * into ct0ca/ct1ca for execution.
  873. + */
  874. + struct list_head job_list;
  875. + /* List of the finished vc4_exec_infos waiting to be freed by
  876. + * job_done_work.
  877. + */
  878. + struct list_head job_done_list;
  879. + spinlock_t job_lock;
  880. + wait_queue_head_t job_wait_queue;
  881. + struct work_struct job_done_work;
  882. +
  883. + /* List of struct vc4_seqno_cb for callbacks to be made from a
  884. + * workqueue when the given seqno is passed.
  885. + */
  886. + struct list_head seqno_cb_list;
  887. +
  888. + /* The binner overflow memory that's currently set up in
  889. + * BPOA/BPOS registers. When overflow occurs and a new one is
  890. + * allocated, the previous one will be moved to
  891. + * vc4->current_exec's free list.
  892. + */
  893. + struct vc4_bo *overflow_mem;
  894. + struct work_struct overflow_mem_work;
  895. +
  896. + struct {
  897. + uint32_t last_ct0ca, last_ct1ca;
  898. + struct timer_list timer;
  899. + struct work_struct reset_work;
  900. + } hangcheck;
  901. +
  902. + struct semaphore async_modeset;
  903. };
  904. static inline struct vc4_dev *
  905. @@ -27,6 +104,25 @@ to_vc4_dev(struct drm_device *dev)
  906. struct vc4_bo {
  907. struct drm_gem_cma_object base;
  908. +
  909. + /* seqno of the last job to render to this BO. */
  910. + uint64_t seqno;
  911. +
  912. + /* List entry for the BO's position in either
  913. + * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
  914. + */
  915. + struct list_head unref_head;
  916. +
  917. + /* Time in jiffies when the BO was put in vc4->bo_cache. */
  918. + unsigned long free_time;
  919. +
  920. + /* List entry for the BO's position in vc4_dev->bo_cache.size_list */
  921. + struct list_head size_head;
  922. +
  923. + /* Struct for shader validation state, if created by
  924. + * DRM_IOCTL_VC4_CREATE_SHADER_BO.
  925. + */
  926. + struct vc4_validated_shader_info *validated_shader;
  927. };
  928. static inline struct vc4_bo *
  929. @@ -35,6 +131,17 @@ to_vc4_bo(struct drm_gem_object *bo)
  930. return (struct vc4_bo *)bo;
  931. }
  932. +struct vc4_seqno_cb {
  933. + struct work_struct work;
  934. + uint64_t seqno;
  935. + void (*func)(struct vc4_seqno_cb *cb);
  936. +};
  937. +
  938. +struct vc4_v3d {
  939. + struct platform_device *pdev;
  940. + void __iomem *regs;
  941. +};
  942. +
  943. struct vc4_hvs {
  944. struct platform_device *pdev;
  945. void __iomem *regs;
  946. @@ -72,9 +179,151 @@ to_vc4_encoder(struct drm_encoder *encod
  947. return container_of(encoder, struct vc4_encoder, base);
  948. }
  949. +#define V3D_READ(offset) readl(vc4->v3d->regs + offset)
  950. +#define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset)
  951. #define HVS_READ(offset) readl(vc4->hvs->regs + offset)
  952. #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
  953. +enum vc4_bo_mode {
  954. + VC4_MODE_UNDECIDED,
  955. + VC4_MODE_RENDER,
  956. + VC4_MODE_SHADER,
  957. +};
  958. +
  959. +struct vc4_bo_exec_state {
  960. + struct drm_gem_cma_object *bo;
  961. + enum vc4_bo_mode mode;
  962. +};
  963. +
  964. +struct vc4_exec_info {
  965. + /* Sequence number for this bin/render job. */
  966. + uint64_t seqno;
  967. +
  968. + /* Kernel-space copy of the ioctl arguments */
  969. + struct drm_vc4_submit_cl *args;
  970. +
  971. + /* This is the array of BOs that were looked up at the start of exec.
  972. + * Command validation will use indices into this array.
  973. + */
  974. + struct vc4_bo_exec_state *bo;
  975. + uint32_t bo_count;
  976. +
  977. + /* Pointers for our position in vc4->job_list */
  978. + struct list_head head;
  979. +
  980. + /* List of other BOs used in the job that need to be released
  981. + * once the job is complete.
  982. + */
  983. + struct list_head unref_list;
  984. +
  985. + /* Current unvalidated indices into @bo loaded by the non-hardware
  986. + * VC4_PACKET_GEM_HANDLES.
  987. + */
  988. + uint32_t bo_index[2];
  989. +
  990. + /* This is the BO where we store the validated command lists, shader
  991. + * records, and uniforms.
  992. + */
  993. + struct drm_gem_cma_object *exec_bo;
  994. +
  995. + /**
  996. + * This tracks the per-shader-record state (packet 64) that
  997. + * determines the length of the shader record and the offset
  998. + * it's expected to be found at. It gets read in from the
  999. + * command lists.
  1000. + */
  1001. + struct vc4_shader_state {
  1002. + uint8_t packet;
  1003. + uint32_t addr;
  1004. + /* Maximum vertex index referenced by any primitive using this
  1005. + * shader state.
  1006. + */
  1007. + uint32_t max_index;
  1008. + } *shader_state;
  1009. +
  1010. + /** How many shader states the user declared they were using. */
  1011. + uint32_t shader_state_size;
  1012. + /** How many shader state records the validator has seen. */
  1013. + uint32_t shader_state_count;
  1014. +
  1015. + bool found_tile_binning_mode_config_packet;
  1016. + bool found_start_tile_binning_packet;
  1017. + bool found_increment_semaphore_packet;
  1018. + uint8_t bin_tiles_x, bin_tiles_y;
  1019. + struct drm_gem_cma_object *tile_bo;
  1020. + uint32_t tile_alloc_offset;
  1021. +
  1022. + /**
  1023. + * Computed addresses pointing into exec_bo where we start the
  1024. + * bin thread (ct0) and render thread (ct1).
  1025. + */
  1026. + uint32_t ct0ca, ct0ea;
  1027. + uint32_t ct1ca, ct1ea;
  1028. +
  1029. + /* Pointers to the shader recs. These paddr gets incremented as CL
  1030. + * packets are relocated in validate_gl_shader_state, and the vaddrs
  1031. + * (u and v) get incremented and size decremented as the shader recs
  1032. + * themselves are validated.
  1033. + */
  1034. + void *shader_rec_u;
  1035. + void *shader_rec_v;
  1036. + uint32_t shader_rec_p;
  1037. + uint32_t shader_rec_size;
  1038. +
  1039. + /* Pointers to the uniform data. These pointers are incremented, and
  1040. + * size decremented, as each batch of uniforms is uploaded.
  1041. + */
  1042. + void *uniforms_u;
  1043. + void *uniforms_v;
  1044. + uint32_t uniforms_p;
  1045. + uint32_t uniforms_size;
  1046. +};
  1047. +
  1048. +static inline struct vc4_exec_info *
  1049. +vc4_first_job(struct vc4_dev *vc4)
  1050. +{
  1051. + if (list_empty(&vc4->job_list))
  1052. + return NULL;
  1053. + return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
  1054. +}
  1055. +
  1056. +/**
  1057. + * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
  1058. + * setup parameters.
  1059. + *
  1060. + * This will be used at draw time to relocate the reference to the texture
  1061. + * contents in p0, and validate that the offset combined with
  1062. + * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
  1063. + * Note that the hardware treats unprovided config parameters as 0, so not all
  1064. + * of them need to be set up for every texure sample, and we'll store ~0 as
  1065. + * the offset to mark the unused ones.
  1066. + *
  1067. + * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
  1068. + * Setup") for definitions of the texture parameters.
  1069. + */
  1070. +struct vc4_texture_sample_info {
  1071. + bool is_direct;
  1072. + uint32_t p_offset[4];
  1073. +};
  1074. +
  1075. +/**
  1076. + * struct vc4_validated_shader_info - information about validated shaders that
  1077. + * needs to be used from command list validation.
  1078. + *
  1079. + * For a given shader, each time a shader state record references it, we need
  1080. + * to verify that the shader doesn't read more uniforms than the shader state
  1081. + * record's uniform BO pointer can provide, and we need to apply relocations
  1082. + * and validate the shader state record's uniforms that define the texture
  1083. + * samples.
  1084. + */
  1085. +struct vc4_validated_shader_info
  1086. +{
  1087. + uint32_t uniforms_size;
  1088. + uint32_t uniforms_src_size;
  1089. + uint32_t num_texture_samples;
  1090. + struct vc4_texture_sample_info *texture_samples;
  1091. +};
  1092. +
  1093. /**
  1094. * _wait_for - magic (register) wait macro
  1095. *
  1096. @@ -111,6 +360,18 @@ int vc4_dumb_create(struct drm_file *fil
  1097. struct drm_mode_create_dumb *args);
  1098. struct dma_buf *vc4_prime_export(struct drm_device *dev,
  1099. struct drm_gem_object *obj, int flags);
  1100. +int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
  1101. + struct drm_file *file_priv);
  1102. +int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
  1103. + struct drm_file *file_priv);
  1104. +int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
  1105. + struct drm_file *file_priv);
  1106. +int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
  1107. +int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
  1108. +void *vc4_prime_vmap(struct drm_gem_object *obj);
  1109. +void vc4_bo_cache_init(struct drm_device *dev);
  1110. +void vc4_bo_cache_destroy(struct drm_device *dev);
  1111. +int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
  1112. /* vc4_crtc.c */
  1113. extern struct platform_driver vc4_crtc_driver;
  1114. @@ -126,10 +387,34 @@ void vc4_debugfs_cleanup(struct drm_mino
  1115. /* vc4_drv.c */
  1116. void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
  1117. +/* vc4_gem.c */
  1118. +void vc4_gem_init(struct drm_device *dev);
  1119. +void vc4_gem_destroy(struct drm_device *dev);
  1120. +int vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
  1121. + struct drm_file *file_priv);
  1122. +int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
  1123. + struct drm_file *file_priv);
  1124. +int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
  1125. + struct drm_file *file_priv);
  1126. +void vc4_submit_next_job(struct drm_device *dev);
  1127. +int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
  1128. + uint64_t timeout_ns, bool interruptible);
  1129. +void vc4_job_handle_completed(struct vc4_dev *vc4);
  1130. +int vc4_queue_seqno_cb(struct drm_device *dev,
  1131. + struct vc4_seqno_cb *cb, uint64_t seqno,
  1132. + void (*func)(struct vc4_seqno_cb *cb));
  1133. +
  1134. /* vc4_hdmi.c */
  1135. extern struct platform_driver vc4_hdmi_driver;
  1136. int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused);
  1137. +/* vc4_irq.c */
  1138. +irqreturn_t vc4_irq(int irq, void *arg);
  1139. +void vc4_irq_preinstall(struct drm_device *dev);
  1140. +int vc4_irq_postinstall(struct drm_device *dev);
  1141. +void vc4_irq_uninstall(struct drm_device *dev);
  1142. +void vc4_irq_reset(struct drm_device *dev);
  1143. +
  1144. /* vc4_hvs.c */
  1145. extern struct platform_driver vc4_hvs_driver;
  1146. void vc4_hvs_dump_state(struct drm_device *dev);
  1147. @@ -143,3 +428,35 @@ struct drm_plane *vc4_plane_init(struct
  1148. enum drm_plane_type type);
  1149. u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
  1150. u32 vc4_plane_dlist_size(struct drm_plane_state *state);
  1151. +void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb);
  1152. +
  1153. +/* vc4_v3d.c */
  1154. +extern struct platform_driver vc4_v3d_driver;
  1155. +int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
  1156. +int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
  1157. +int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
  1158. +
  1159. +/* vc4_validate.c */
  1160. +int
  1161. +vc4_validate_bin_cl(struct drm_device *dev,
  1162. + void *validated,
  1163. + void *unvalidated,
  1164. + struct vc4_exec_info *exec);
  1165. +
  1166. +int
  1167. +vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
  1168. +
  1169. +struct vc4_validated_shader_info *
  1170. +vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
  1171. +
  1172. +bool vc4_use_bo(struct vc4_exec_info *exec,
  1173. + uint32_t hindex,
  1174. + enum vc4_bo_mode mode,
  1175. + struct drm_gem_cma_object **obj);
  1176. +
  1177. +int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
  1178. +
  1179. +bool vc4_check_tex_size(struct vc4_exec_info *exec,
  1180. + struct drm_gem_cma_object *fbo,
  1181. + uint32_t offset, uint8_t tiling_format,
  1182. + uint32_t width, uint32_t height, uint8_t cpp);
  1183. --- /dev/null
  1184. +++ b/drivers/gpu/drm/vc4/vc4_gem.c
  1185. @@ -0,0 +1,686 @@
  1186. +/*
  1187. + * Copyright © 2014 Broadcom
  1188. + *
  1189. + * Permission is hereby granted, free of charge, to any person obtaining a
  1190. + * copy of this software and associated documentation files (the "Software"),
  1191. + * to deal in the Software without restriction, including without limitation
  1192. + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  1193. + * and/or sell copies of the Software, and to permit persons to whom the
  1194. + * Software is furnished to do so, subject to the following conditions:
  1195. + *
  1196. + * The above copyright notice and this permission notice (including the next
  1197. + * paragraph) shall be included in all copies or substantial portions of the
  1198. + * Software.
  1199. + *
  1200. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  1201. + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  1202. + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  1203. + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  1204. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  1205. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  1206. + * IN THE SOFTWARE.
  1207. + */
  1208. +
  1209. +#include <linux/module.h>
  1210. +#include <linux/platform_device.h>
  1211. +#include <linux/device.h>
  1212. +#include <linux/io.h>
  1213. +
  1214. +#include "uapi/drm/vc4_drm.h"
  1215. +#include "vc4_drv.h"
  1216. +#include "vc4_regs.h"
  1217. +#include "vc4_trace.h"
  1218. +
  1219. +static void
  1220. +vc4_queue_hangcheck(struct drm_device *dev)
  1221. +{
  1222. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1223. +
  1224. + mod_timer(&vc4->hangcheck.timer,
  1225. + round_jiffies_up(jiffies + msecs_to_jiffies(100)));
  1226. +}
  1227. +
  1228. +static void
  1229. +vc4_reset(struct drm_device *dev)
  1230. +{
  1231. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1232. +
  1233. + DRM_INFO("Resetting GPU.\n");
  1234. + vc4_v3d_set_power(vc4, false);
  1235. + vc4_v3d_set_power(vc4, true);
  1236. +
  1237. + vc4_irq_reset(dev);
  1238. +
  1239. + /* Rearm the hangcheck -- another job might have been waiting
  1240. + * for our hung one to get kicked off, and vc4_irq_reset()
  1241. + * would have started it.
  1242. + */
  1243. + vc4_queue_hangcheck(dev);
  1244. +}
  1245. +
  1246. +static void
  1247. +vc4_reset_work(struct work_struct *work)
  1248. +{
  1249. + struct vc4_dev *vc4 =
  1250. + container_of(work, struct vc4_dev, hangcheck.reset_work);
  1251. +
  1252. + vc4_reset(vc4->dev);
  1253. +}
  1254. +
  1255. +static void
  1256. +vc4_hangcheck_elapsed(unsigned long data)
  1257. +{
  1258. + struct drm_device *dev = (struct drm_device *)data;
  1259. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1260. + uint32_t ct0ca, ct1ca;
  1261. +
  1262. + /* If idle, we can stop watching for hangs. */
  1263. + if (list_empty(&vc4->job_list))
  1264. + return;
  1265. +
  1266. + ct0ca = V3D_READ(V3D_CTNCA(0));
  1267. + ct1ca = V3D_READ(V3D_CTNCA(1));
  1268. +
  1269. + /* If we've made any progress in execution, rearm the timer
  1270. + * and wait.
  1271. + */
  1272. + if (ct0ca != vc4->hangcheck.last_ct0ca ||
  1273. + ct1ca != vc4->hangcheck.last_ct1ca) {
  1274. + vc4->hangcheck.last_ct0ca = ct0ca;
  1275. + vc4->hangcheck.last_ct1ca = ct1ca;
  1276. + vc4_queue_hangcheck(dev);
  1277. + return;
  1278. + }
  1279. +
  1280. + /* We've gone too long with no progress, reset. This has to
  1281. + * be done from a work struct, since resetting can sleep and
  1282. + * this timer hook isn't allowed to.
  1283. + */
  1284. + schedule_work(&vc4->hangcheck.reset_work);
  1285. +}
  1286. +
  1287. +static void
  1288. +submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
  1289. +{
  1290. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1291. +
  1292. + /* Stop any existing thread and set state to "stopped at halt" */
  1293. + V3D_WRITE(V3D_CTNCS(thread), V3D_CTRUN);
  1294. + barrier();
  1295. +
  1296. + V3D_WRITE(V3D_CTNCA(thread), start);
  1297. + barrier();
  1298. +
  1299. + /* Set the end address of the control list. Writing this
  1300. + * register is what starts the job.
  1301. + */
  1302. + V3D_WRITE(V3D_CTNEA(thread), end);
  1303. + barrier();
  1304. +}
  1305. +
  1306. +int
  1307. +vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
  1308. + bool interruptible)
  1309. +{
  1310. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1311. + int ret = 0;
  1312. + unsigned long timeout_expire;
  1313. + DEFINE_WAIT(wait);
  1314. +
  1315. + if (vc4->finished_seqno >= seqno)
  1316. + return 0;
  1317. +
  1318. + if (timeout_ns == 0)
  1319. + return -ETIME;
  1320. +
  1321. + timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
  1322. +
  1323. + trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
  1324. + for (;;) {
  1325. + prepare_to_wait(&vc4->job_wait_queue, &wait,
  1326. + interruptible ? TASK_INTERRUPTIBLE :
  1327. + TASK_UNINTERRUPTIBLE);
  1328. +
  1329. + if (interruptible && signal_pending(current)) {
  1330. + ret = -ERESTARTSYS;
  1331. + break;
  1332. + }
  1333. +
  1334. + if (vc4->finished_seqno >= seqno)
  1335. + break;
  1336. +
  1337. + if (timeout_ns != ~0ull) {
  1338. + if (time_after_eq(jiffies, timeout_expire)) {
  1339. + ret = -ETIME;
  1340. + break;
  1341. + }
  1342. + schedule_timeout(timeout_expire - jiffies);
  1343. + } else {
  1344. + schedule();
  1345. + }
  1346. + }
  1347. +
  1348. + finish_wait(&vc4->job_wait_queue, &wait);
  1349. + trace_vc4_wait_for_seqno_end(dev, seqno);
  1350. +
  1351. + if (ret && ret != -ERESTARTSYS) {
  1352. + DRM_ERROR("timeout waiting for render thread idle\n");
  1353. + return ret;
  1354. + }
  1355. +
  1356. + return 0;
  1357. +}
  1358. +
  1359. +static void
  1360. +vc4_flush_caches(struct drm_device *dev)
  1361. +{
  1362. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1363. +
  1364. + /* Flush the GPU L2 caches. These caches sit on top of system
  1365. + * L3 (the 128kb or so shared with the CPU), and are
  1366. + * non-allocating in the L3.
  1367. + */
  1368. + V3D_WRITE(V3D_L2CACTL,
  1369. + V3D_L2CACTL_L2CCLR);
  1370. +
  1371. + V3D_WRITE(V3D_SLCACTL,
  1372. + VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
  1373. + VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
  1374. + VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
  1375. + VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
  1376. +}
  1377. +
  1378. +/* Sets the registers for the next job to be actually be executed in
  1379. + * the hardware.
  1380. + *
  1381. + * The job_lock should be held during this.
  1382. + */
  1383. +void
  1384. +vc4_submit_next_job(struct drm_device *dev)
  1385. +{
  1386. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1387. + struct vc4_exec_info *exec = vc4_first_job(vc4);
  1388. +
  1389. + if (!exec)
  1390. + return;
  1391. +
  1392. + vc4_flush_caches(dev);
  1393. +
  1394. + /* Disable the binner's pre-loaded overflow memory address */
  1395. + V3D_WRITE(V3D_BPOA, 0);
  1396. + V3D_WRITE(V3D_BPOS, 0);
  1397. +
  1398. + if (exec->ct0ca != exec->ct0ea)
  1399. + submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
  1400. + submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
  1401. +}
  1402. +
  1403. +static void
  1404. +vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
  1405. +{
  1406. + struct vc4_bo *bo;
  1407. + unsigned i;
  1408. +
  1409. + for (i = 0; i < exec->bo_count; i++) {
  1410. + bo = to_vc4_bo(&exec->bo[i].bo->base);
  1411. + bo->seqno = seqno;
  1412. + }
  1413. +
  1414. + list_for_each_entry(bo, &exec->unref_list, unref_head) {
  1415. + bo->seqno = seqno;
  1416. + }
  1417. +}
  1418. +
  1419. +/* Queues a struct vc4_exec_info for execution. If no job is
  1420. + * currently executing, then submits it.
  1421. + *
  1422. + * Unlike most GPUs, our hardware only handles one command list at a
  1423. + * time. To queue multiple jobs at once, we'd need to edit the
  1424. + * previous command list to have a jump to the new one at the end, and
  1425. + * then bump the end address. That's a change for a later date,
  1426. + * though.
  1427. + */
  1428. +static void
  1429. +vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
  1430. +{
  1431. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1432. + uint64_t seqno = ++vc4->emit_seqno;
  1433. + unsigned long irqflags;
  1434. +
  1435. + exec->seqno = seqno;
  1436. + vc4_update_bo_seqnos(exec, seqno);
  1437. +
  1438. + spin_lock_irqsave(&vc4->job_lock, irqflags);
  1439. + list_add_tail(&exec->head, &vc4->job_list);
  1440. +
  1441. + /* If no job was executing, kick ours off. Otherwise, it'll
  1442. + * get started when the previous job's frame done interrupt
  1443. + * occurs.
  1444. + */
  1445. + if (vc4_first_job(vc4) == exec) {
  1446. + vc4_submit_next_job(dev);
  1447. + vc4_queue_hangcheck(dev);
  1448. + }
  1449. +
  1450. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  1451. +}
  1452. +
  1453. +/**
  1454. + * Looks up a bunch of GEM handles for BOs and stores the array for
  1455. + * use in the command validator that actually writes relocated
  1456. + * addresses pointing to them.
  1457. + */
  1458. +static int
  1459. +vc4_cl_lookup_bos(struct drm_device *dev,
  1460. + struct drm_file *file_priv,
  1461. + struct vc4_exec_info *exec)
  1462. +{
  1463. + struct drm_vc4_submit_cl *args = exec->args;
  1464. + uint32_t *handles;
  1465. + int ret = 0;
  1466. + int i;
  1467. +
  1468. + exec->bo_count = args->bo_handle_count;
  1469. +
  1470. + if (!exec->bo_count) {
  1471. + /* See comment on bo_index for why we have to check
  1472. + * this.
  1473. + */
  1474. + DRM_ERROR("Rendering requires BOs to validate\n");
  1475. + return -EINVAL;
  1476. + }
  1477. +
  1478. + exec->bo = kcalloc(exec->bo_count, sizeof(struct vc4_bo_exec_state),
  1479. + GFP_KERNEL);
  1480. + if (!exec->bo) {
  1481. + DRM_ERROR("Failed to allocate validated BO pointers\n");
  1482. + return -ENOMEM;
  1483. + }
  1484. +
  1485. + handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
  1486. + if (!handles) {
  1487. + DRM_ERROR("Failed to allocate incoming GEM handles\n");
  1488. + goto fail;
  1489. + }
  1490. +
  1491. + ret = copy_from_user(handles,
  1492. + (void __user *)(uintptr_t)args->bo_handles,
  1493. + exec->bo_count * sizeof(uint32_t));
  1494. + if (ret) {
  1495. + DRM_ERROR("Failed to copy in GEM handles\n");
  1496. + goto fail;
  1497. + }
  1498. +
  1499. + spin_lock(&file_priv->table_lock);
  1500. + for (i = 0; i < exec->bo_count; i++) {
  1501. + struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
  1502. + handles[i]);
  1503. + if (!bo) {
  1504. + DRM_ERROR("Failed to look up GEM BO %d: %d\n",
  1505. + i, handles[i]);
  1506. + ret = -EINVAL;
  1507. + spin_unlock(&file_priv->table_lock);
  1508. + goto fail;
  1509. + }
  1510. + drm_gem_object_reference(bo);
  1511. + exec->bo[i].bo = (struct drm_gem_cma_object *)bo;
  1512. + }
  1513. + spin_unlock(&file_priv->table_lock);
  1514. +
  1515. +fail:
  1516. + kfree(handles);
  1517. + return 0;
  1518. +}
  1519. +
  1520. +static int
  1521. +vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
  1522. +{
  1523. + struct drm_vc4_submit_cl *args = exec->args;
  1524. + void *temp = NULL;
  1525. + void *bin;
  1526. + int ret = 0;
  1527. + uint32_t bin_offset = 0;
  1528. + uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
  1529. + 16);
  1530. + uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
  1531. + uint32_t exec_size = uniforms_offset + args->uniforms_size;
  1532. + uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
  1533. + args->shader_rec_count);
  1534. + struct vc4_bo *bo;
  1535. +
  1536. + if (uniforms_offset < shader_rec_offset ||
  1537. + exec_size < uniforms_offset ||
  1538. + args->shader_rec_count >= (UINT_MAX /
  1539. + sizeof(struct vc4_shader_state)) ||
  1540. + temp_size < exec_size) {
  1541. + DRM_ERROR("overflow in exec arguments\n");
  1542. + goto fail;
  1543. + }
  1544. +
  1545. + /* Allocate space where we'll store the copied in user command lists
  1546. + * and shader records.
  1547. + *
  1548. + * We don't just copy directly into the BOs because we need to
  1549. + * read the contents back for validation, and I think the
  1550. + * bo->vaddr is uncached access.
  1551. + */
  1552. + temp = kmalloc(temp_size, GFP_KERNEL);
  1553. + if (!temp) {
  1554. + DRM_ERROR("Failed to allocate storage for copying "
  1555. + "in bin/render CLs.\n");
  1556. + ret = -ENOMEM;
  1557. + goto fail;
  1558. + }
  1559. + bin = temp + bin_offset;
  1560. + exec->shader_rec_u = temp + shader_rec_offset;
  1561. + exec->uniforms_u = temp + uniforms_offset;
  1562. + exec->shader_state = temp + exec_size;
  1563. + exec->shader_state_size = args->shader_rec_count;
  1564. +
  1565. + ret = copy_from_user(bin,
  1566. + (void __user *)(uintptr_t)args->bin_cl,
  1567. + args->bin_cl_size);
  1568. + if (ret) {
  1569. + DRM_ERROR("Failed to copy in bin cl\n");
  1570. + goto fail;
  1571. + }
  1572. +
  1573. + ret = copy_from_user(exec->shader_rec_u,
  1574. + (void __user *)(uintptr_t)args->shader_rec,
  1575. + args->shader_rec_size);
  1576. + if (ret) {
  1577. + DRM_ERROR("Failed to copy in shader recs\n");
  1578. + goto fail;
  1579. + }
  1580. +
  1581. + ret = copy_from_user(exec->uniforms_u,
  1582. + (void __user *)(uintptr_t)args->uniforms,
  1583. + args->uniforms_size);
  1584. + if (ret) {
  1585. + DRM_ERROR("Failed to copy in uniforms cl\n");
  1586. + goto fail;
  1587. + }
  1588. +
  1589. + bo = vc4_bo_create(dev, exec_size);
  1590. + if (!bo) {
  1591. + DRM_ERROR("Couldn't allocate BO for binning\n");
  1592. + ret = PTR_ERR(exec->exec_bo);
  1593. + goto fail;
  1594. + }
  1595. + exec->exec_bo = &bo->base;
  1596. +
  1597. + list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
  1598. + &exec->unref_list);
  1599. +
  1600. + exec->ct0ca = exec->exec_bo->paddr + bin_offset;
  1601. +
  1602. + exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
  1603. + exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
  1604. + exec->shader_rec_size = args->shader_rec_size;
  1605. +
  1606. + exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
  1607. + exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
  1608. + exec->uniforms_size = args->uniforms_size;
  1609. +
  1610. + ret = vc4_validate_bin_cl(dev,
  1611. + exec->exec_bo->vaddr + bin_offset,
  1612. + bin,
  1613. + exec);
  1614. + if (ret)
  1615. + goto fail;
  1616. +
  1617. + ret = vc4_validate_shader_recs(dev, exec);
  1618. +
  1619. +fail:
  1620. + kfree(temp);
  1621. + return ret;
  1622. +}
  1623. +
  1624. +static void
  1625. +vc4_complete_exec(struct vc4_exec_info *exec)
  1626. +{
  1627. + unsigned i;
  1628. +
  1629. + if (exec->bo) {
  1630. + for (i = 0; i < exec->bo_count; i++)
  1631. + drm_gem_object_unreference(&exec->bo[i].bo->base);
  1632. + kfree(exec->bo);
  1633. + }
  1634. +
  1635. + while (!list_empty(&exec->unref_list)) {
  1636. + struct vc4_bo *bo = list_first_entry(&exec->unref_list,
  1637. + struct vc4_bo, unref_head);
  1638. + list_del(&bo->unref_head);
  1639. + drm_gem_object_unreference(&bo->base.base);
  1640. + }
  1641. +
  1642. + kfree(exec);
  1643. +}
  1644. +
  1645. +void
  1646. +vc4_job_handle_completed(struct vc4_dev *vc4)
  1647. +{
  1648. + unsigned long irqflags;
  1649. + struct vc4_seqno_cb *cb, *cb_temp;
  1650. +
  1651. + spin_lock_irqsave(&vc4->job_lock, irqflags);
  1652. + while (!list_empty(&vc4->job_done_list)) {
  1653. + struct vc4_exec_info *exec =
  1654. + list_first_entry(&vc4->job_done_list,
  1655. + struct vc4_exec_info, head);
  1656. + list_del(&exec->head);
  1657. +
  1658. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  1659. + vc4_complete_exec(exec);
  1660. + spin_lock_irqsave(&vc4->job_lock, irqflags);
  1661. + }
  1662. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  1663. +
  1664. + list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
  1665. + if (cb->seqno <= vc4->finished_seqno) {
  1666. + list_del_init(&cb->work.entry);
  1667. + schedule_work(&cb->work);
  1668. + }
  1669. + }
  1670. +}
  1671. +
  1672. +static void vc4_seqno_cb_work(struct work_struct *work)
  1673. +{
  1674. + struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
  1675. + cb->func(cb);
  1676. +}
  1677. +
  1678. +int vc4_queue_seqno_cb(struct drm_device *dev,
  1679. + struct vc4_seqno_cb *cb, uint64_t seqno,
  1680. + void (*func)(struct vc4_seqno_cb *cb))
  1681. +{
  1682. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1683. + int ret = 0;
  1684. +
  1685. + cb->func = func;
  1686. + INIT_WORK(&cb->work, vc4_seqno_cb_work);
  1687. +
  1688. + mutex_lock(&dev->struct_mutex);
  1689. + if (seqno > vc4->finished_seqno) {
  1690. + cb->seqno = seqno;
  1691. + list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
  1692. + } else {
  1693. + schedule_work(&cb->work);
  1694. + }
  1695. + mutex_unlock(&dev->struct_mutex);
  1696. +
  1697. + return ret;
  1698. +}
  1699. +
  1700. +/* Scheduled when any job has been completed, this walks the list of
  1701. + * jobs that had completed and unrefs their BOs and frees their exec
  1702. + * structs.
  1703. + */
  1704. +static void
  1705. +vc4_job_done_work(struct work_struct *work)
  1706. +{
  1707. + struct vc4_dev *vc4 =
  1708. + container_of(work, struct vc4_dev, job_done_work);
  1709. + struct drm_device *dev = vc4->dev;
  1710. +
  1711. + /* Need the struct lock for drm_gem_object_unreference(). */
  1712. + mutex_lock(&dev->struct_mutex);
  1713. + vc4_job_handle_completed(vc4);
  1714. + mutex_unlock(&dev->struct_mutex);
  1715. +}
  1716. +
  1717. +static int
  1718. +vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
  1719. + uint64_t seqno,
  1720. + uint64_t *timeout_ns)
  1721. +{
  1722. + unsigned long start = jiffies;
  1723. + int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
  1724. +
  1725. + if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
  1726. + uint64_t delta = jiffies_to_nsecs(jiffies - start);
  1727. + if (*timeout_ns >= delta)
  1728. + *timeout_ns -= delta;
  1729. + }
  1730. +
  1731. + return ret;
  1732. +}
  1733. +
  1734. +int
  1735. +vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
  1736. + struct drm_file *file_priv)
  1737. +{
  1738. + struct drm_vc4_wait_seqno *args = data;
  1739. +
  1740. + return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
  1741. + &args->timeout_ns);
  1742. +}
  1743. +
  1744. +int
  1745. +vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
  1746. + struct drm_file *file_priv)
  1747. +{
  1748. + int ret;
  1749. + struct drm_vc4_wait_bo *args = data;
  1750. + struct drm_gem_object *gem_obj;
  1751. + struct vc4_bo *bo;
  1752. +
  1753. + gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
  1754. + if (!gem_obj) {
  1755. + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
  1756. + return -EINVAL;
  1757. + }
  1758. + bo = to_vc4_bo(gem_obj);
  1759. +
  1760. + ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, &args->timeout_ns);
  1761. +
  1762. + drm_gem_object_unreference(gem_obj);
  1763. + return ret;
  1764. +}
  1765. +
  1766. +/**
  1767. + * Submits a command list to the VC4.
  1768. + *
  1769. + * This is what is called batchbuffer emitting on other hardware.
  1770. + */
  1771. +int
  1772. +vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
  1773. + struct drm_file *file_priv)
  1774. +{
  1775. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1776. + struct drm_vc4_submit_cl *args = data;
  1777. + struct vc4_exec_info *exec;
  1778. + int ret;
  1779. +
  1780. + if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
  1781. + DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
  1782. + return -EINVAL;
  1783. + }
  1784. +
  1785. + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
  1786. + if (!exec) {
  1787. + DRM_ERROR("malloc failure on exec struct\n");
  1788. + return -ENOMEM;
  1789. + }
  1790. +
  1791. + exec->args = args;
  1792. + INIT_LIST_HEAD(&exec->unref_list);
  1793. +
  1794. + mutex_lock(&dev->struct_mutex);
  1795. +
  1796. + ret = vc4_cl_lookup_bos(dev, file_priv, exec);
  1797. + if (ret)
  1798. + goto fail;
  1799. +
  1800. + if (exec->args->bin_cl_size != 0) {
  1801. + ret = vc4_get_bcl(dev, exec);
  1802. + if (ret)
  1803. + goto fail;
  1804. + } else {
  1805. + exec->ct0ca = exec->ct0ea = 0;
  1806. + }
  1807. +
  1808. + ret = vc4_get_rcl(dev, exec);
  1809. + if (ret)
  1810. + goto fail;
  1811. +
  1812. + /* Clear this out of the struct we'll be putting in the queue,
  1813. + * since it's part of our stack.
  1814. + */
  1815. + exec->args = NULL;
  1816. +
  1817. + vc4_queue_submit(dev, exec);
  1818. +
  1819. + /* Return the seqno for our job. */
  1820. + args->seqno = vc4->emit_seqno;
  1821. +
  1822. + mutex_unlock(&dev->struct_mutex);
  1823. +
  1824. + return 0;
  1825. +
  1826. +fail:
  1827. + vc4_complete_exec(exec);
  1828. +
  1829. + mutex_unlock(&dev->struct_mutex);
  1830. +
  1831. + return ret;
  1832. +}
  1833. +
  1834. +void
  1835. +vc4_gem_init(struct drm_device *dev)
  1836. +{
  1837. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1838. +
  1839. + INIT_LIST_HEAD(&vc4->job_list);
  1840. + INIT_LIST_HEAD(&vc4->job_done_list);
  1841. + INIT_LIST_HEAD(&vc4->seqno_cb_list);
  1842. + spin_lock_init(&vc4->job_lock);
  1843. +
  1844. + INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
  1845. + setup_timer(&vc4->hangcheck.timer,
  1846. + vc4_hangcheck_elapsed,
  1847. + (unsigned long) dev);
  1848. +
  1849. + INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
  1850. +}
  1851. +
  1852. +void
  1853. +vc4_gem_destroy(struct drm_device *dev)
  1854. +{
  1855. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1856. +
  1857. + /* Waiting for exec to finish would need to be done before
  1858. + * unregistering V3D.
  1859. + */
  1860. + WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
  1861. +
  1862. + /* V3D should already have disabled its interrupt and cleared
  1863. + * the overflow allocation registers. Now free the object.
  1864. + */
  1865. + if (vc4->overflow_mem) {
  1866. + drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
  1867. + vc4->overflow_mem = NULL;
  1868. + }
  1869. +
  1870. + vc4_bo_cache_destroy(dev);
  1871. +}
  1872. --- /dev/null
  1873. +++ b/drivers/gpu/drm/vc4/vc4_irq.c
  1874. @@ -0,0 +1,211 @@
  1875. +/*
  1876. + * Copyright © 2014 Broadcom
  1877. + *
  1878. + * Permission is hereby granted, free of charge, to any person obtaining a
  1879. + * copy of this software and associated documentation files (the "Software"),
  1880. + * to deal in the Software without restriction, including without limitation
  1881. + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  1882. + * and/or sell copies of the Software, and to permit persons to whom the
  1883. + * Software is furnished to do so, subject to the following conditions:
  1884. + *
  1885. + * The above copyright notice and this permission notice (including the next
  1886. + * paragraph) shall be included in all copies or substantial portions of the
  1887. + * Software.
  1888. + *
  1889. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  1890. + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  1891. + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  1892. + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  1893. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  1894. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  1895. + * IN THE SOFTWARE.
  1896. + */
  1897. +
  1898. +/** DOC: Interrupt management for the V3D engine.
  1899. + *
  1900. + * We have an interrupt status register (V3D_INTCTL) which reports
  1901. + * interrupts, and where writing 1 bits clears those interrupts.
  1902. + * There are also a pair of interrupt registers
  1903. + * (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or
  1904. + * disables that specific interrupt, and 0s written are ignored
  1905. + * (reading either one returns the set of enabled interrupts).
  1906. + *
  1907. + * When we take a render frame interrupt, we need to wake the
  1908. + * processes waiting for some frame to be done, and get the next frame
  1909. + * submitted ASAP (so the hardware doesn't sit idle when there's work
  1910. + * to do).
  1911. + *
  1912. + * When we take the binner out of memory interrupt, we need to
  1913. + * allocate some new memory and pass it to the binner so that the
  1914. + * current job can make progress.
  1915. + */
  1916. +
  1917. +#include "vc4_drv.h"
  1918. +#include "vc4_regs.h"
  1919. +
  1920. +#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
  1921. + V3D_INT_FRDONE)
  1922. +
  1923. +DECLARE_WAIT_QUEUE_HEAD(render_wait);
  1924. +
  1925. +static void
  1926. +vc4_overflow_mem_work(struct work_struct *work)
  1927. +{
  1928. + struct vc4_dev *vc4 =
  1929. + container_of(work, struct vc4_dev, overflow_mem_work);
  1930. + struct drm_device *dev = vc4->dev;
  1931. + struct vc4_bo *bo;
  1932. +
  1933. + bo = vc4_bo_create(dev, 256 * 1024);
  1934. + if (!bo) {
  1935. + DRM_ERROR("Couldn't allocate binner overflow mem\n");
  1936. + return;
  1937. + }
  1938. +
  1939. + /* If there's a job executing currently, then our previous
  1940. + * overflow allocation is getting used in that job and we need
  1941. + * to queue it to be released when the job is done. But if no
  1942. + * job is executing at all, then we can free the old overflow
  1943. + * object direcctly.
  1944. + *
  1945. + * No lock necessary for this pointer since we're the only
  1946. + * ones that update the pointer, and our workqueue won't
  1947. + * reenter.
  1948. + */
  1949. + if (vc4->overflow_mem) {
  1950. + struct vc4_exec_info *current_exec;
  1951. + unsigned long irqflags;
  1952. +
  1953. + spin_lock_irqsave(&vc4->job_lock, irqflags);
  1954. + current_exec = vc4_first_job(vc4);
  1955. + if (current_exec) {
  1956. + vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
  1957. + list_add_tail(&vc4->overflow_mem->unref_head,
  1958. + &current_exec->unref_list);
  1959. + vc4->overflow_mem = NULL;
  1960. + }
  1961. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  1962. + }
  1963. +
  1964. + if (vc4->overflow_mem) {
  1965. + drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
  1966. + }
  1967. + vc4->overflow_mem = bo;
  1968. +
  1969. + V3D_WRITE(V3D_BPOA, bo->base.paddr);
  1970. + V3D_WRITE(V3D_BPOS, bo->base.base.size);
  1971. + V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
  1972. + V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
  1973. +}
  1974. +
  1975. +static void
  1976. +vc4_irq_finish_job(struct drm_device *dev)
  1977. +{
  1978. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1979. + struct vc4_exec_info *exec = vc4_first_job(vc4);
  1980. +
  1981. + if (!exec)
  1982. + return;
  1983. +
  1984. + vc4->finished_seqno++;
  1985. + list_move_tail(&exec->head, &vc4->job_done_list);
  1986. + vc4_submit_next_job(dev);
  1987. +
  1988. + wake_up_all(&vc4->job_wait_queue);
  1989. + schedule_work(&vc4->job_done_work);
  1990. +}
  1991. +
  1992. +irqreturn_t
  1993. +vc4_irq(int irq, void *arg)
  1994. +{
  1995. + struct drm_device *dev = arg;
  1996. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  1997. + uint32_t intctl;
  1998. + irqreturn_t status = IRQ_NONE;
  1999. +
  2000. + barrier();
  2001. + intctl = V3D_READ(V3D_INTCTL);
  2002. +
  2003. + /* Acknowledge the interrupts we're handling here. The render
  2004. + * frame done interrupt will be cleared, while OUTOMEM will
  2005. + * stay high until the underlying cause is cleared.
  2006. + */
  2007. + V3D_WRITE(V3D_INTCTL, intctl);
  2008. +
  2009. + if (intctl & V3D_INT_OUTOMEM) {
  2010. + /* Disable OUTOMEM until the work is done. */
  2011. + V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM);
  2012. + schedule_work(&vc4->overflow_mem_work);
  2013. + status = IRQ_HANDLED;
  2014. + }
  2015. +
  2016. + if (intctl & V3D_INT_FRDONE) {
  2017. + spin_lock(&vc4->job_lock);
  2018. + vc4_irq_finish_job(dev);
  2019. + spin_unlock(&vc4->job_lock);
  2020. + status = IRQ_HANDLED;
  2021. + }
  2022. +
  2023. + return status;
  2024. +}
  2025. +
  2026. +void
  2027. +vc4_irq_preinstall(struct drm_device *dev)
  2028. +{
  2029. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  2030. +
  2031. + init_waitqueue_head(&vc4->job_wait_queue);
  2032. + INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
  2033. +
  2034. + /* Clear any pending interrupts someone might have left around
  2035. + * for us.
  2036. + */
  2037. + V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
  2038. +}
  2039. +
  2040. +int
  2041. +vc4_irq_postinstall(struct drm_device *dev)
  2042. +{
  2043. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  2044. +
  2045. + /* Enable both the render done and out of memory interrupts. */
  2046. + V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
  2047. +
  2048. + return 0;
  2049. +}
  2050. +
  2051. +void
  2052. +vc4_irq_uninstall(struct drm_device *dev)
  2053. +{
  2054. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  2055. +
  2056. + /* Disable sending interrupts for our driver's IRQs. */
  2057. + V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
  2058. +
  2059. + /* Clear any pending interrupts we might have left. */
  2060. + V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
  2061. +
  2062. + cancel_work_sync(&vc4->overflow_mem_work);
  2063. +}
  2064. +
  2065. +/** Reinitializes interrupt registers when a GPU reset is performed. */
  2066. +void vc4_irq_reset(struct drm_device *dev)
  2067. +{
  2068. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  2069. + unsigned long irqflags;
  2070. +
  2071. + /* Acknowledge any stale IRQs. */
  2072. + V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
  2073. +
  2074. + /*
  2075. + * Turn all our interrupts on. Binner out of memory is the
  2076. + * only one we expect to trigger at this point, since we've
  2077. + * just come from poweron and haven't supplied any overflow
  2078. + * memory yet.
  2079. + */
  2080. + V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
  2081. +
  2082. + spin_lock_irqsave(&vc4->job_lock, irqflags);
  2083. + vc4_irq_finish_job(dev);
  2084. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  2085. +}
  2086. --- a/drivers/gpu/drm/vc4/vc4_kms.c
  2087. +++ b/drivers/gpu/drm/vc4/vc4_kms.c
  2088. @@ -15,6 +15,7 @@
  2089. */
  2090. #include "drm_crtc.h"
  2091. +#include "drm_atomic.h"
  2092. #include "drm_atomic_helper.h"
  2093. #include "drm_crtc_helper.h"
  2094. #include "drm_plane_helper.h"
  2095. @@ -29,10 +30,151 @@ static void vc4_output_poll_changed(stru
  2096. drm_fbdev_cma_hotplug_event(vc4->fbdev);
  2097. }
  2098. +struct vc4_commit {
  2099. + struct drm_device *dev;
  2100. + struct drm_atomic_state *state;
  2101. + struct vc4_seqno_cb cb;
  2102. +};
  2103. +
  2104. +static void
  2105. +vc4_atomic_complete_commit(struct vc4_commit *c)
  2106. +{
  2107. + struct drm_atomic_state *state = c->state;
  2108. + struct drm_device *dev = state->dev;
  2109. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  2110. +
  2111. + drm_atomic_helper_commit_modeset_disables(dev, state);
  2112. +
  2113. + drm_atomic_helper_commit_planes(dev, state);
  2114. +
  2115. + drm_atomic_helper_commit_modeset_enables(dev, state);
  2116. +
  2117. + drm_atomic_helper_wait_for_vblanks(dev, state);
  2118. +
  2119. + drm_atomic_helper_cleanup_planes(dev, state);
  2120. +
  2121. + drm_atomic_state_free(state);
  2122. +
  2123. + up(&vc4->async_modeset);
  2124. +
  2125. + kfree(c);
  2126. +}
  2127. +
  2128. +static void
  2129. +vc4_atomic_complete_commit_seqno_cb(struct vc4_seqno_cb *cb)
  2130. +{
  2131. + struct vc4_commit *c = container_of(cb, struct vc4_commit, cb);
  2132. +
  2133. + vc4_atomic_complete_commit(c);
  2134. +}
  2135. +
  2136. +static struct vc4_commit *commit_init(struct drm_atomic_state *state)
  2137. +{
  2138. + struct vc4_commit *c = kzalloc(sizeof(*c), GFP_KERNEL);
  2139. +
  2140. + if (!c)
  2141. + return NULL;
  2142. + c->dev = state->dev;
  2143. + c->state = state;
  2144. +
  2145. + return c;
  2146. +}
  2147. +
  2148. +/**
  2149. + * vc4_atomic_commit - commit validated state object
  2150. + * @dev: DRM device
  2151. + * @state: the driver state object
  2152. + * @async: asynchronous commit
  2153. + *
  2154. + * This function commits a with drm_atomic_helper_check() pre-validated state
  2155. + * object. This can still fail when e.g. the framebuffer reservation fails. For
  2156. + * now this doesn't implement asynchronous commits.
  2157. + *
  2158. + * RETURNS
  2159. + * Zero for success or -errno.
  2160. + */
  2161. +static int vc4_atomic_commit(struct drm_device *dev,
  2162. + struct drm_atomic_state *state,
  2163. + bool async)
  2164. +{
  2165. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  2166. + int ret;
  2167. + int i;
  2168. + uint64_t wait_seqno = 0;
  2169. + struct vc4_commit *c;
  2170. +
  2171. + c = commit_init(state);
  2172. + if (!c)
  2173. + return -ENOMEM;
  2174. +
  2175. + /* Make sure that any outstanding modesets have finished. */
  2176. + ret = down_interruptible(&vc4->async_modeset);
  2177. + if (ret) {
  2178. + kfree(c);
  2179. + return ret;
  2180. + }
  2181. +
  2182. + ret = drm_atomic_helper_prepare_planes(dev, state);
  2183. + if (ret) {
  2184. + kfree(c);
  2185. + up(&vc4->async_modeset);
  2186. + return ret;
  2187. + }
  2188. +
  2189. + for (i = 0; i < dev->mode_config.num_total_plane; i++) {
  2190. + struct drm_plane *plane = state->planes[i];
  2191. + struct drm_plane_state *new_state = state->plane_states[i];
  2192. +
  2193. + if (!plane)
  2194. + continue;
  2195. +
  2196. + if ((plane->state->fb != new_state->fb) && new_state->fb) {
  2197. + struct drm_gem_cma_object *cma_bo =
  2198. + drm_fb_cma_get_gem_obj(new_state->fb, 0);
  2199. + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
  2200. + wait_seqno = max(bo->seqno, wait_seqno);
  2201. + }
  2202. + }
  2203. +
  2204. + /*
  2205. + * This is the point of no return - everything below never fails except
  2206. + * when the hw goes bonghits. Which means we can commit the new state on
  2207. + * the software side now.
  2208. + */
  2209. +
  2210. + drm_atomic_helper_swap_state(dev, state);
  2211. +
  2212. + /*
  2213. + * Everything below can be run asynchronously without the need to grab
  2214. + * any modeset locks at all under one condition: It must be guaranteed
  2215. + * that the asynchronous work has either been cancelled (if the driver
  2216. + * supports it, which at least requires that the framebuffers get
  2217. + * cleaned up with drm_atomic_helper_cleanup_planes()) or completed
  2218. + * before the new state gets committed on the software side with
  2219. + * drm_atomic_helper_swap_state().
  2220. + *
  2221. + * This scheme allows new atomic state updates to be prepared and
  2222. + * checked in parallel to the asynchronous completion of the previous
  2223. + * update. Which is important since compositors need to figure out the
  2224. + * composition of the next frame right after having submitted the
  2225. + * current layout.
  2226. + */
  2227. +
  2228. + if (async) {
  2229. + vc4_queue_seqno_cb(dev, &c->cb, wait_seqno,
  2230. + vc4_atomic_complete_commit_seqno_cb);
  2231. + } else {
  2232. + vc4_wait_for_seqno(dev, wait_seqno, ~0ull, false);
  2233. + vc4_atomic_complete_commit(c);
  2234. + }
  2235. +
  2236. + return 0;
  2237. +}
  2238. +
  2239. static const struct drm_mode_config_funcs vc4_mode_funcs = {
  2240. .output_poll_changed = vc4_output_poll_changed,
  2241. .atomic_check = drm_atomic_helper_check,
  2242. - .atomic_commit = drm_atomic_helper_commit,
  2243. + .atomic_commit = vc4_atomic_commit,
  2244. .fb_create = drm_fb_cma_create,
  2245. };
  2246. @@ -41,6 +183,8 @@ int vc4_kms_load(struct drm_device *dev)
  2247. struct vc4_dev *vc4 = to_vc4_dev(dev);
  2248. int ret;
  2249. + sema_init(&vc4->async_modeset, 1);
  2250. +
  2251. ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
  2252. if (ret < 0) {
  2253. dev_err(dev->dev, "failed to initialize vblank\n");
  2254. @@ -51,6 +195,8 @@ int vc4_kms_load(struct drm_device *dev)
  2255. dev->mode_config.max_height = 2048;
  2256. dev->mode_config.funcs = &vc4_mode_funcs;
  2257. dev->mode_config.preferred_depth = 24;
  2258. + dev->mode_config.async_page_flip = true;
  2259. +
  2260. dev->vblank_disable_allowed = true;
  2261. drm_mode_config_reset(dev);
  2262. --- /dev/null
  2263. +++ b/drivers/gpu/drm/vc4/vc4_packet.h
  2264. @@ -0,0 +1,384 @@
  2265. +/*
  2266. + * Copyright © 2014 Broadcom
  2267. + *
  2268. + * Permission is hereby granted, free of charge, to any person obtaining a
  2269. + * copy of this software and associated documentation files (the "Software"),
  2270. + * to deal in the Software without restriction, including without limitation
  2271. + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  2272. + * and/or sell copies of the Software, and to permit persons to whom the
  2273. + * Software is furnished to do so, subject to the following conditions:
  2274. + *
  2275. + * The above copyright notice and this permission notice (including the next
  2276. + * paragraph) shall be included in all copies or substantial portions of the
  2277. + * Software.
  2278. + *
  2279. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  2280. + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  2281. + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  2282. + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  2283. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  2284. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  2285. + * IN THE SOFTWARE.
  2286. + */
  2287. +
  2288. +#ifndef VC4_PACKET_H
  2289. +#define VC4_PACKET_H
  2290. +
  2291. +#include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
  2292. +
  2293. +enum vc4_packet {
  2294. + VC4_PACKET_HALT = 0,
  2295. + VC4_PACKET_NOP = 1,
  2296. +
  2297. + VC4_PACKET_FLUSH = 4,
  2298. + VC4_PACKET_FLUSH_ALL = 5,
  2299. + VC4_PACKET_START_TILE_BINNING = 6,
  2300. + VC4_PACKET_INCREMENT_SEMAPHORE = 7,
  2301. + VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
  2302. +
  2303. + VC4_PACKET_BRANCH = 16,
  2304. + VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
  2305. +
  2306. + VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
  2307. + VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
  2308. + VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
  2309. + VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
  2310. + VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
  2311. + VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
  2312. +
  2313. + VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
  2314. + VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
  2315. +
  2316. + VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
  2317. + VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
  2318. +
  2319. + VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
  2320. +
  2321. + VC4_PACKET_GL_SHADER_STATE = 64,
  2322. + VC4_PACKET_NV_SHADER_STATE = 65,
  2323. + VC4_PACKET_VG_SHADER_STATE = 66,
  2324. +
  2325. + VC4_PACKET_CONFIGURATION_BITS = 96,
  2326. + VC4_PACKET_FLAT_SHADE_FLAGS = 97,
  2327. + VC4_PACKET_POINT_SIZE = 98,
  2328. + VC4_PACKET_LINE_WIDTH = 99,
  2329. + VC4_PACKET_RHT_X_BOUNDARY = 100,
  2330. + VC4_PACKET_DEPTH_OFFSET = 101,
  2331. + VC4_PACKET_CLIP_WINDOW = 102,
  2332. + VC4_PACKET_VIEWPORT_OFFSET = 103,
  2333. + VC4_PACKET_Z_CLIPPING = 104,
  2334. + VC4_PACKET_CLIPPER_XY_SCALING = 105,
  2335. + VC4_PACKET_CLIPPER_Z_SCALING = 106,
  2336. +
  2337. + VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
  2338. + VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
  2339. + VC4_PACKET_CLEAR_COLORS = 114,
  2340. + VC4_PACKET_TILE_COORDINATES = 115,
  2341. +
  2342. + /* Not an actual hardware packet -- this is what we use to put
  2343. + * references to GEM bos in the command stream, since we need the u32
  2344. + * int the actual address packet in order to store the offset from the
  2345. + * start of the BO.
  2346. + */
  2347. + VC4_PACKET_GEM_HANDLES = 254,
  2348. +} __attribute__ ((__packed__));
  2349. +
  2350. +#define VC4_PACKET_HALT_SIZE 1
  2351. +#define VC4_PACKET_NOP_SIZE 1
  2352. +#define VC4_PACKET_FLUSH_SIZE 1
  2353. +#define VC4_PACKET_FLUSH_ALL_SIZE 1
  2354. +#define VC4_PACKET_START_TILE_BINNING_SIZE 1
  2355. +#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1
  2356. +#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1
  2357. +#define VC4_PACKET_BRANCH_SIZE 5
  2358. +#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5
  2359. +#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1
  2360. +#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1
  2361. +#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5
  2362. +#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5
  2363. +#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7
  2364. +#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7
  2365. +#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14
  2366. +#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10
  2367. +#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1
  2368. +#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1
  2369. +#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2
  2370. +#define VC4_PACKET_GL_SHADER_STATE_SIZE 5
  2371. +#define VC4_PACKET_NV_SHADER_STATE_SIZE 5
  2372. +#define VC4_PACKET_VG_SHADER_STATE_SIZE 5
  2373. +#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4
  2374. +#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5
  2375. +#define VC4_PACKET_POINT_SIZE_SIZE 5
  2376. +#define VC4_PACKET_LINE_WIDTH_SIZE 5
  2377. +#define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3
  2378. +#define VC4_PACKET_DEPTH_OFFSET_SIZE 5
  2379. +#define VC4_PACKET_CLIP_WINDOW_SIZE 9
  2380. +#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5
  2381. +#define VC4_PACKET_Z_CLIPPING_SIZE 9
  2382. +#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9
  2383. +#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9
  2384. +#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16
  2385. +#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11
  2386. +#define VC4_PACKET_CLEAR_COLORS_SIZE 14
  2387. +#define VC4_PACKET_TILE_COORDINATES_SIZE 3
  2388. +#define VC4_PACKET_GEM_HANDLES_SIZE 9
  2389. +
  2390. +/** @{
  2391. + * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
  2392. + * VC4_PACKET_TILE_RENDERING_MODE_CONFIG.
  2393. +*/
  2394. +#define VC4_TILING_FORMAT_LINEAR 0
  2395. +#define VC4_TILING_FORMAT_T 1
  2396. +#define VC4_TILING_FORMAT_LT 2
  2397. +/** @} */
  2398. +
  2399. +/** @{
  2400. + *
  2401. + * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
  2402. + * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
  2403. + */
  2404. +#define VC4_LOADSTORE_FULL_RES_EOF (1 << 3)
  2405. +#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL (1 << 2)
  2406. +#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1)
  2407. +#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0)
  2408. +
  2409. +/** @{
  2410. + *
  2411. + * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
  2412. + * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
  2413. + */
  2414. +
  2415. +#define VC4_LOADSTORE_TILE_BUFFER_EOF (1 << 3)
  2416. +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK (1 << 2)
  2417. +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS (1 << 1)
  2418. +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR (1 << 0)
  2419. +
  2420. +/** @} */
  2421. +
  2422. +/** @{
  2423. + *
  2424. + * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
  2425. + * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
  2426. + */
  2427. +#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15)
  2428. +#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 14)
  2429. +#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 13)
  2430. +#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 12)
  2431. +
  2432. +#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8)
  2433. +#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8
  2434. +#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0
  2435. +#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1
  2436. +#define VC4_LOADSTORE_TILE_BUFFER_BGR565 2
  2437. +/** @} */
  2438. +
  2439. +/** @{
  2440. + *
  2441. + * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
  2442. + * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
  2443. + */
  2444. +#define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6)
  2445. +#define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6
  2446. +#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6)
  2447. +#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6)
  2448. +#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6)
  2449. +
  2450. +/** The values of the field are VC4_TILING_FORMAT_* */
  2451. +#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4)
  2452. +#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4
  2453. +
  2454. +#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0)
  2455. +#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0
  2456. +#define VC4_LOADSTORE_TILE_BUFFER_NONE 0
  2457. +#define VC4_LOADSTORE_TILE_BUFFER_COLOR 1
  2458. +#define VC4_LOADSTORE_TILE_BUFFER_ZS 2
  2459. +#define VC4_LOADSTORE_TILE_BUFFER_Z 3
  2460. +#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4
  2461. +#define VC4_LOADSTORE_TILE_BUFFER_FULL 5
  2462. +/** @} */
  2463. +
  2464. +#define VC4_INDEX_BUFFER_U8 (0 << 4)
  2465. +#define VC4_INDEX_BUFFER_U16 (1 << 4)
  2466. +
  2467. +/* This flag is only present in NV shader state. */
  2468. +#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS (1 << 3)
  2469. +#define VC4_SHADER_FLAG_ENABLE_CLIPPING (1 << 2)
  2470. +#define VC4_SHADER_FLAG_VS_POINT_SIZE (1 << 1)
  2471. +#define VC4_SHADER_FLAG_FS_SINGLE_THREAD (1 << 0)
  2472. +
  2473. +/** @{ byte 2 of config bits. */
  2474. +#define VC4_CONFIG_BITS_EARLY_Z_UPDATE (1 << 1)
  2475. +#define VC4_CONFIG_BITS_EARLY_Z (1 << 0)
  2476. +/** @} */
  2477. +
  2478. +/** @{ byte 1 of config bits. */
  2479. +#define VC4_CONFIG_BITS_Z_UPDATE (1 << 7)
  2480. +/** same values in this 3-bit field as PIPE_FUNC_* */
  2481. +#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4
  2482. +#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE (1 << 3)
  2483. +
  2484. +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1)
  2485. +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1)
  2486. +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1)
  2487. +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1)
  2488. +
  2489. +#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT (1 << 0)
  2490. +/** @} */
  2491. +
  2492. +/** @{ byte 0 of config bits. */
  2493. +#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6)
  2494. +#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6)
  2495. +#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6)
  2496. +
  2497. +#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES (1 << 4)
  2498. +#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET (1 << 3)
  2499. +#define VC4_CONFIG_BITS_CW_PRIMITIVES (1 << 2)
  2500. +#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK (1 << 1)
  2501. +#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT (1 << 0)
  2502. +/** @} */
  2503. +
  2504. +/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
  2505. +#define VC4_BIN_CONFIG_DB_NON_MS (1 << 7)
  2506. +
  2507. +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5)
  2508. +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5
  2509. +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0
  2510. +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1
  2511. +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2
  2512. +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3
  2513. +
  2514. +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3)
  2515. +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
  2516. +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0
  2517. +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1
  2518. +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2
  2519. +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3
  2520. +
  2521. +#define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2)
  2522. +#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1)
  2523. +#define VC4_BIN_CONFIG_MS_MODE_4X (1 << 0)
  2524. +/** @} */
  2525. +
  2526. +/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
  2527. +#define VC4_RENDER_CONFIG_DB_NON_MS (1 << 12)
  2528. +#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE (1 << 11)
  2529. +#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G (1 << 10)
  2530. +#define VC4_RENDER_CONFIG_COVERAGE_MODE (1 << 9)
  2531. +#define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8)
  2532. +
  2533. +/** The values of the field are VC4_TILING_FORMAT_* */
  2534. +#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6)
  2535. +#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6
  2536. +
  2537. +#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4)
  2538. +#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4)
  2539. +#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4)
  2540. +
  2541. +#define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2)
  2542. +#define VC4_RENDER_CONFIG_FORMAT_SHIFT 2
  2543. +#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0
  2544. +#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1
  2545. +#define VC4_RENDER_CONFIG_FORMAT_BGR565 2
  2546. +
  2547. +#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1)
  2548. +#define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0)
  2549. +
  2550. +#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4)
  2551. +#define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4)
  2552. +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 << 0)
  2553. +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 << 0)
  2554. +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 << 0)
  2555. +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0)
  2556. +
  2557. +enum vc4_texture_data_type {
  2558. + VC4_TEXTURE_TYPE_RGBA8888 = 0,
  2559. + VC4_TEXTURE_TYPE_RGBX8888 = 1,
  2560. + VC4_TEXTURE_TYPE_RGBA4444 = 2,
  2561. + VC4_TEXTURE_TYPE_RGBA5551 = 3,
  2562. + VC4_TEXTURE_TYPE_RGB565 = 4,
  2563. + VC4_TEXTURE_TYPE_LUMINANCE = 5,
  2564. + VC4_TEXTURE_TYPE_ALPHA = 6,
  2565. + VC4_TEXTURE_TYPE_LUMALPHA = 7,
  2566. + VC4_TEXTURE_TYPE_ETC1 = 8,
  2567. + VC4_TEXTURE_TYPE_S16F = 9,
  2568. + VC4_TEXTURE_TYPE_S8 = 10,
  2569. + VC4_TEXTURE_TYPE_S16 = 11,
  2570. + VC4_TEXTURE_TYPE_BW1 = 12,
  2571. + VC4_TEXTURE_TYPE_A4 = 13,
  2572. + VC4_TEXTURE_TYPE_A1 = 14,
  2573. + VC4_TEXTURE_TYPE_RGBA64 = 15,
  2574. + VC4_TEXTURE_TYPE_RGBA32R = 16,
  2575. + VC4_TEXTURE_TYPE_YUV422R = 17,
  2576. +};
  2577. +
  2578. +#define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12)
  2579. +#define VC4_TEX_P0_OFFSET_SHIFT 12
  2580. +#define VC4_TEX_P0_CSWIZ_MASK VC4_MASK(11, 10)
  2581. +#define VC4_TEX_P0_CSWIZ_SHIFT 10
  2582. +#define VC4_TEX_P0_CMMODE_MASK VC4_MASK(9, 9)
  2583. +#define VC4_TEX_P0_CMMODE_SHIFT 9
  2584. +#define VC4_TEX_P0_FLIPY_MASK VC4_MASK(8, 8)
  2585. +#define VC4_TEX_P0_FLIPY_SHIFT 8
  2586. +#define VC4_TEX_P0_TYPE_MASK VC4_MASK(7, 4)
  2587. +#define VC4_TEX_P0_TYPE_SHIFT 4
  2588. +#define VC4_TEX_P0_MIPLVLS_MASK VC4_MASK(3, 0)
  2589. +#define VC4_TEX_P0_MIPLVLS_SHIFT 0
  2590. +
  2591. +#define VC4_TEX_P1_TYPE4_MASK VC4_MASK(31, 31)
  2592. +#define VC4_TEX_P1_TYPE4_SHIFT 31
  2593. +#define VC4_TEX_P1_HEIGHT_MASK VC4_MASK(30, 20)
  2594. +#define VC4_TEX_P1_HEIGHT_SHIFT 20
  2595. +#define VC4_TEX_P1_ETCFLIP_MASK VC4_MASK(19, 19)
  2596. +#define VC4_TEX_P1_ETCFLIP_SHIFT 19
  2597. +#define VC4_TEX_P1_WIDTH_MASK VC4_MASK(18, 8)
  2598. +#define VC4_TEX_P1_WIDTH_SHIFT 8
  2599. +
  2600. +#define VC4_TEX_P1_MAGFILT_MASK VC4_MASK(7, 7)
  2601. +#define VC4_TEX_P1_MAGFILT_SHIFT 7
  2602. +# define VC4_TEX_P1_MAGFILT_LINEAR 0
  2603. +# define VC4_TEX_P1_MAGFILT_NEAREST 1
  2604. +
  2605. +#define VC4_TEX_P1_MINFILT_MASK VC4_MASK(6, 4)
  2606. +#define VC4_TEX_P1_MINFILT_SHIFT 4
  2607. +# define VC4_TEX_P1_MINFILT_LINEAR 0
  2608. +# define VC4_TEX_P1_MINFILT_NEAREST 1
  2609. +# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR 2
  2610. +# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN 3
  2611. +# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR 4
  2612. +# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN 5
  2613. +
  2614. +#define VC4_TEX_P1_WRAP_T_MASK VC4_MASK(3, 2)
  2615. +#define VC4_TEX_P1_WRAP_T_SHIFT 2
  2616. +#define VC4_TEX_P1_WRAP_S_MASK VC4_MASK(1, 0)
  2617. +#define VC4_TEX_P1_WRAP_S_SHIFT 0
  2618. +# define VC4_TEX_P1_WRAP_REPEAT 0
  2619. +# define VC4_TEX_P1_WRAP_CLAMP 1
  2620. +# define VC4_TEX_P1_WRAP_MIRROR 2
  2621. +# define VC4_TEX_P1_WRAP_BORDER 3
  2622. +
  2623. +#define VC4_TEX_P2_PTYPE_MASK VC4_MASK(31, 30)
  2624. +#define VC4_TEX_P2_PTYPE_SHIFT 30
  2625. +# define VC4_TEX_P2_PTYPE_IGNORED 0
  2626. +# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE 1
  2627. +# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS 2
  2628. +# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS 3
  2629. +
  2630. +/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */
  2631. +#define VC4_TEX_P2_CMST_MASK VC4_MASK(29, 12)
  2632. +#define VC4_TEX_P2_CMST_SHIFT 12
  2633. +#define VC4_TEX_P2_BSLOD_MASK VC4_MASK(0, 0)
  2634. +#define VC4_TEX_P2_BSLOD_SHIFT 0
  2635. +
  2636. +/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */
  2637. +#define VC4_TEX_P2_CHEIGHT_MASK VC4_MASK(22, 12)
  2638. +#define VC4_TEX_P2_CHEIGHT_SHIFT 12
  2639. +#define VC4_TEX_P2_CWIDTH_MASK VC4_MASK(10, 0)
  2640. +#define VC4_TEX_P2_CWIDTH_SHIFT 0
  2641. +
  2642. +/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */
  2643. +#define VC4_TEX_P2_CYOFF_MASK VC4_MASK(22, 12)
  2644. +#define VC4_TEX_P2_CYOFF_SHIFT 12
  2645. +#define VC4_TEX_P2_CXOFF_MASK VC4_MASK(10, 0)
  2646. +#define VC4_TEX_P2_CXOFF_SHIFT 0
  2647. +
  2648. +#endif /* VC4_PACKET_H */
  2649. --- a/drivers/gpu/drm/vc4/vc4_plane.c
  2650. +++ b/drivers/gpu/drm/vc4/vc4_plane.c
  2651. @@ -29,6 +29,14 @@ struct vc4_plane_state {
  2652. u32 *dlist;
  2653. u32 dlist_size; /* Number of dwords in allocated for the display list */
  2654. u32 dlist_count; /* Number of used dwords in the display list. */
  2655. +
  2656. + /* Offset in the dlist to pointer word 0. */
  2657. + u32 pw0_offset;
  2658. +
  2659. + /* Offset where the plane's dlist was last stored in the
  2660. + hardware at vc4_crtc_atomic_flush() time.
  2661. + */
  2662. + u32 *hw_dlist;
  2663. };
  2664. static inline struct vc4_plane_state *
  2665. @@ -207,6 +215,8 @@ static int vc4_plane_mode_set(struct drm
  2666. /* Position Word 3: Context. Written by the HVS. */
  2667. vc4_dlist_write(vc4_state, 0xc0c0c0c0);
  2668. + vc4_state->pw0_offset = vc4_state->dlist_count;
  2669. +
  2670. /* Pointer Word 0: RGB / Y Pointer */
  2671. vc4_dlist_write(vc4_state, bo->paddr + offset);
  2672. @@ -258,6 +268,8 @@ u32 vc4_plane_write_dlist(struct drm_pla
  2673. struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
  2674. int i;
  2675. + vc4_state->hw_dlist = dlist;
  2676. +
  2677. /* Can't memcpy_toio() because it needs to be 32-bit writes. */
  2678. for (i = 0; i < vc4_state->dlist_count; i++)
  2679. writel(vc4_state->dlist[i], &dlist[i]);
  2680. @@ -272,6 +284,34 @@ u32 vc4_plane_dlist_size(struct drm_plan
  2681. return vc4_state->dlist_count;
  2682. }
  2683. +/* Updates the plane to immediately (well, once the FIFO needs
  2684. + * refilling) scan out from at a new framebuffer.
  2685. + */
  2686. +void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
  2687. +{
  2688. + struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
  2689. + struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
  2690. + uint32_t addr;
  2691. +
  2692. + /* We're skipping the address adjustment for negative origin,
  2693. + * because this is only called on the primary plane.
  2694. + */
  2695. + WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
  2696. + addr = bo->paddr + fb->offsets[0];
  2697. +
  2698. + /* Write the new address into the hardware immediately. The
  2699. + * scanout will start from this address as soon as the FIFO
  2700. + * needs to refill with pixels.
  2701. + */
  2702. + writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]);
  2703. +
  2704. + /* Also update the CPU-side dlist copy, so that any later
  2705. + * atomic updates that don't do a new modeset on our plane
  2706. + * also use our updated address.
  2707. + */
  2708. + vc4_state->dlist[vc4_state->pw0_offset] = addr;
  2709. +}
  2710. +
  2711. static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
  2712. .prepare_fb = NULL,
  2713. .cleanup_fb = NULL,
  2714. --- /dev/null
  2715. +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
  2716. @@ -0,0 +1,268 @@
  2717. +/*
  2718. + * Copyright © 2014 Broadcom
  2719. + *
  2720. + * Permission is hereby granted, free of charge, to any person obtaining a
  2721. + * copy of this software and associated documentation files (the "Software"),
  2722. + * to deal in the Software without restriction, including without limitation
  2723. + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  2724. + * and/or sell copies of the Software, and to permit persons to whom the
  2725. + * Software is furnished to do so, subject to the following conditions:
  2726. + *
  2727. + * The above copyright notice and this permission notice (including the next
  2728. + * paragraph) shall be included in all copies or substantial portions of the
  2729. + * Software.
  2730. + *
  2731. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  2732. + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  2733. + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  2734. + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  2735. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  2736. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  2737. + * IN THE SOFTWARE.
  2738. + */
  2739. +
  2740. +#ifndef VC4_QPU_DEFINES_H
  2741. +#define VC4_QPU_DEFINES_H
  2742. +
  2743. +enum qpu_op_add {
  2744. + QPU_A_NOP,
  2745. + QPU_A_FADD,
  2746. + QPU_A_FSUB,
  2747. + QPU_A_FMIN,
  2748. + QPU_A_FMAX,
  2749. + QPU_A_FMINABS,
  2750. + QPU_A_FMAXABS,
  2751. + QPU_A_FTOI,
  2752. + QPU_A_ITOF,
  2753. + QPU_A_ADD = 12,
  2754. + QPU_A_SUB,
  2755. + QPU_A_SHR,
  2756. + QPU_A_ASR,
  2757. + QPU_A_ROR,
  2758. + QPU_A_SHL,
  2759. + QPU_A_MIN,
  2760. + QPU_A_MAX,
  2761. + QPU_A_AND,
  2762. + QPU_A_OR,
  2763. + QPU_A_XOR,
  2764. + QPU_A_NOT,
  2765. + QPU_A_CLZ,
  2766. + QPU_A_V8ADDS = 30,
  2767. + QPU_A_V8SUBS = 31,
  2768. +};
  2769. +
  2770. +enum qpu_op_mul {
  2771. + QPU_M_NOP,
  2772. + QPU_M_FMUL,
  2773. + QPU_M_MUL24,
  2774. + QPU_M_V8MULD,
  2775. + QPU_M_V8MIN,
  2776. + QPU_M_V8MAX,
  2777. + QPU_M_V8ADDS,
  2778. + QPU_M_V8SUBS,
  2779. +};
  2780. +
  2781. +enum qpu_raddr {
  2782. + QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
  2783. + /* 0-31 are the plain regfile a or b fields */
  2784. + QPU_R_UNIF = 32,
  2785. + QPU_R_VARY = 35,
  2786. + QPU_R_ELEM_QPU = 38,
  2787. + QPU_R_NOP,
  2788. + QPU_R_XY_PIXEL_COORD = 41,
  2789. + QPU_R_MS_REV_FLAGS = 41,
  2790. + QPU_R_VPM = 48,
  2791. + QPU_R_VPM_LD_BUSY,
  2792. + QPU_R_VPM_LD_WAIT,
  2793. + QPU_R_MUTEX_ACQUIRE,
  2794. +};
  2795. +
  2796. +enum qpu_waddr {
  2797. + /* 0-31 are the plain regfile a or b fields */
  2798. + QPU_W_ACC0 = 32, /* aka r0 */
  2799. + QPU_W_ACC1,
  2800. + QPU_W_ACC2,
  2801. + QPU_W_ACC3,
  2802. + QPU_W_TMU_NOSWAP,
  2803. + QPU_W_ACC5,
  2804. + QPU_W_HOST_INT,
  2805. + QPU_W_NOP,
  2806. + QPU_W_UNIFORMS_ADDRESS,
  2807. + QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
  2808. + QPU_W_MS_FLAGS = 42,
  2809. + QPU_W_REV_FLAG = 42,
  2810. + QPU_W_TLB_STENCIL_SETUP = 43,
  2811. + QPU_W_TLB_Z,
  2812. + QPU_W_TLB_COLOR_MS,
  2813. + QPU_W_TLB_COLOR_ALL,
  2814. + QPU_W_TLB_ALPHA_MASK,
  2815. + QPU_W_VPM,
  2816. + QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
  2817. + QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
  2818. + QPU_W_MUTEX_RELEASE,
  2819. + QPU_W_SFU_RECIP,
  2820. + QPU_W_SFU_RECIPSQRT,
  2821. + QPU_W_SFU_EXP,
  2822. + QPU_W_SFU_LOG,
  2823. + QPU_W_TMU0_S,
  2824. + QPU_W_TMU0_T,
  2825. + QPU_W_TMU0_R,
  2826. + QPU_W_TMU0_B,
  2827. + QPU_W_TMU1_S,
  2828. + QPU_W_TMU1_T,
  2829. + QPU_W_TMU1_R,
  2830. + QPU_W_TMU1_B,
  2831. +};
  2832. +
  2833. +enum qpu_sig_bits {
  2834. + QPU_SIG_SW_BREAKPOINT,
  2835. + QPU_SIG_NONE,
  2836. + QPU_SIG_THREAD_SWITCH,
  2837. + QPU_SIG_PROG_END,
  2838. + QPU_SIG_WAIT_FOR_SCOREBOARD,
  2839. + QPU_SIG_SCOREBOARD_UNLOCK,
  2840. + QPU_SIG_LAST_THREAD_SWITCH,
  2841. + QPU_SIG_COVERAGE_LOAD,
  2842. + QPU_SIG_COLOR_LOAD,
  2843. + QPU_SIG_COLOR_LOAD_END,
  2844. + QPU_SIG_LOAD_TMU0,
  2845. + QPU_SIG_LOAD_TMU1,
  2846. + QPU_SIG_ALPHA_MASK_LOAD,
  2847. + QPU_SIG_SMALL_IMM,
  2848. + QPU_SIG_LOAD_IMM,
  2849. + QPU_SIG_BRANCH
  2850. +};
  2851. +
  2852. +enum qpu_mux {
  2853. + /* hardware mux values */
  2854. + QPU_MUX_R0,
  2855. + QPU_MUX_R1,
  2856. + QPU_MUX_R2,
  2857. + QPU_MUX_R3,
  2858. + QPU_MUX_R4,
  2859. + QPU_MUX_R5,
  2860. + QPU_MUX_A,
  2861. + QPU_MUX_B,
  2862. +
  2863. + /* non-hardware mux values */
  2864. + QPU_MUX_IMM,
  2865. +};
  2866. +
  2867. +enum qpu_cond {
  2868. + QPU_COND_NEVER,
  2869. + QPU_COND_ALWAYS,
  2870. + QPU_COND_ZS,
  2871. + QPU_COND_ZC,
  2872. + QPU_COND_NS,
  2873. + QPU_COND_NC,
  2874. + QPU_COND_CS,
  2875. + QPU_COND_CC,
  2876. +};
  2877. +
  2878. +enum qpu_pack_mul {
  2879. + QPU_PACK_MUL_NOP,
  2880. + QPU_PACK_MUL_8888 = 3, /* replicated to each 8 bits of the 32-bit dst. */
  2881. + QPU_PACK_MUL_8A,
  2882. + QPU_PACK_MUL_8B,
  2883. + QPU_PACK_MUL_8C,
  2884. + QPU_PACK_MUL_8D,
  2885. +};
  2886. +
  2887. +enum qpu_pack_a {
  2888. + QPU_PACK_A_NOP,
  2889. + /* convert to 16 bit float if float input, or to int16. */
  2890. + QPU_PACK_A_16A,
  2891. + QPU_PACK_A_16B,
  2892. + /* replicated to each 8 bits of the 32-bit dst. */
  2893. + QPU_PACK_A_8888,
  2894. + /* Convert to 8-bit unsigned int. */
  2895. + QPU_PACK_A_8A,
  2896. + QPU_PACK_A_8B,
  2897. + QPU_PACK_A_8C,
  2898. + QPU_PACK_A_8D,
  2899. +
  2900. + /* Saturating variants of the previous instructions. */
  2901. + QPU_PACK_A_32_SAT, /* int-only */
  2902. + QPU_PACK_A_16A_SAT, /* int or float */
  2903. + QPU_PACK_A_16B_SAT,
  2904. + QPU_PACK_A_8888_SAT,
  2905. + QPU_PACK_A_8A_SAT,
  2906. + QPU_PACK_A_8B_SAT,
  2907. + QPU_PACK_A_8C_SAT,
  2908. + QPU_PACK_A_8D_SAT,
  2909. +};
  2910. +
  2911. +enum qpu_unpack_r4 {
  2912. + QPU_UNPACK_R4_NOP,
  2913. + QPU_UNPACK_R4_F16A_TO_F32,
  2914. + QPU_UNPACK_R4_F16B_TO_F32,
  2915. + QPU_UNPACK_R4_8D_REP,
  2916. + QPU_UNPACK_R4_8A,
  2917. + QPU_UNPACK_R4_8B,
  2918. + QPU_UNPACK_R4_8C,
  2919. + QPU_UNPACK_R4_8D,
  2920. +};
  2921. +
  2922. +#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
  2923. +/* Using the GNU statement expression extension */
  2924. +#define QPU_SET_FIELD(value, field) \
  2925. + ({ \
  2926. + uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
  2927. + assert((fieldval & ~ field ## _MASK) == 0); \
  2928. + fieldval & field ## _MASK; \
  2929. + })
  2930. +
  2931. +#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
  2932. +
  2933. +#define QPU_SIG_SHIFT 60
  2934. +#define QPU_SIG_MASK QPU_MASK(63, 60)
  2935. +
  2936. +#define QPU_UNPACK_SHIFT 57
  2937. +#define QPU_UNPACK_MASK QPU_MASK(59, 57)
  2938. +
  2939. +/**
  2940. + * If set, the pack field means PACK_MUL or R4 packing, instead of normal
  2941. + * regfile a packing.
  2942. + */
  2943. +#define QPU_PM ((uint64_t)1 << 56)
  2944. +
  2945. +#define QPU_PACK_SHIFT 52
  2946. +#define QPU_PACK_MASK QPU_MASK(55, 52)
  2947. +
  2948. +#define QPU_COND_ADD_SHIFT 49
  2949. +#define QPU_COND_ADD_MASK QPU_MASK(51, 49)
  2950. +#define QPU_COND_MUL_SHIFT 46
  2951. +#define QPU_COND_MUL_MASK QPU_MASK(48, 46)
  2952. +
  2953. +#define QPU_SF ((uint64_t)1 << 45)
  2954. +
  2955. +#define QPU_WADDR_ADD_SHIFT 38
  2956. +#define QPU_WADDR_ADD_MASK QPU_MASK(43, 38)
  2957. +#define QPU_WADDR_MUL_SHIFT 32
  2958. +#define QPU_WADDR_MUL_MASK QPU_MASK(37, 32)
  2959. +
  2960. +#define QPU_OP_MUL_SHIFT 29
  2961. +#define QPU_OP_MUL_MASK QPU_MASK(31, 29)
  2962. +
  2963. +#define QPU_RADDR_A_SHIFT 18
  2964. +#define QPU_RADDR_A_MASK QPU_MASK(23, 18)
  2965. +#define QPU_RADDR_B_SHIFT 12
  2966. +#define QPU_RADDR_B_MASK QPU_MASK(17, 12)
  2967. +#define QPU_SMALL_IMM_SHIFT 12
  2968. +#define QPU_SMALL_IMM_MASK QPU_MASK(17, 12)
  2969. +
  2970. +#define QPU_ADD_A_SHIFT 9
  2971. +#define QPU_ADD_A_MASK QPU_MASK(11, 9)
  2972. +#define QPU_ADD_B_SHIFT 6
  2973. +#define QPU_ADD_B_MASK QPU_MASK(8, 6)
  2974. +#define QPU_MUL_A_SHIFT 3
  2975. +#define QPU_MUL_A_MASK QPU_MASK(5, 3)
  2976. +#define QPU_MUL_B_SHIFT 0
  2977. +#define QPU_MUL_B_MASK QPU_MASK(2, 0)
  2978. +
  2979. +#define QPU_WS ((uint64_t)1 << 44)
  2980. +
  2981. +#define QPU_OP_ADD_SHIFT 24
  2982. +#define QPU_OP_ADD_MASK QPU_MASK(28, 24)
  2983. +
  2984. +#endif /* VC4_QPU_DEFINES_H */
  2985. --- /dev/null
  2986. +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
  2987. @@ -0,0 +1,448 @@
  2988. +/*
  2989. + * Copyright © 2014-2015 Broadcom
  2990. + *
  2991. + * Permission is hereby granted, free of charge, to any person obtaining a
  2992. + * copy of this software and associated documentation files (the "Software"),
  2993. + * to deal in the Software without restriction, including without limitation
  2994. + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  2995. + * and/or sell copies of the Software, and to permit persons to whom the
  2996. + * Software is furnished to do so, subject to the following conditions:
  2997. + *
  2998. + * The above copyright notice and this permission notice (including the next
  2999. + * paragraph) shall be included in all copies or substantial portions of the
  3000. + * Software.
  3001. + *
  3002. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  3003. + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  3004. + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  3005. + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  3006. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  3007. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  3008. + * IN THE SOFTWARE.
  3009. + */
  3010. +
  3011. +/**
  3012. + * DOC: Render command list generation
  3013. + *
  3014. + * In the VC4 driver, render command list generation is performed by the
  3015. + * kernel instead of userspace. We do this because validating a
  3016. + * user-submitted command list is hard to get right and has high CPU overhead,
  3017. + * while the number of valid configurations for render command lists is
  3018. + * actually fairly low.
  3019. + */
  3020. +
  3021. +#include "uapi/drm/vc4_drm.h"
  3022. +#include "vc4_drv.h"
  3023. +#include "vc4_packet.h"
  3024. +
  3025. +struct vc4_rcl_setup {
  3026. + struct drm_gem_cma_object *color_read;
  3027. + struct drm_gem_cma_object *color_ms_write;
  3028. + struct drm_gem_cma_object *zs_read;
  3029. + struct drm_gem_cma_object *zs_write;
  3030. +
  3031. + struct drm_gem_cma_object *rcl;
  3032. + u32 next_offset;
  3033. +};
  3034. +
  3035. +static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
  3036. +{
  3037. + *(u8 *)(setup->rcl->vaddr + setup->next_offset) = val;
  3038. + setup->next_offset += 1;
  3039. +}
  3040. +
  3041. +static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val)
  3042. +{
  3043. + *(u16 *)(setup->rcl->vaddr + setup->next_offset) = val;
  3044. + setup->next_offset += 2;
  3045. +}
  3046. +
  3047. +static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
  3048. +{
  3049. + *(u32 *)(setup->rcl->vaddr + setup->next_offset) = val;
  3050. + setup->next_offset += 4;
  3051. +}
  3052. +
  3053. +
  3054. +/*
  3055. + * Emits a no-op STORE_TILE_BUFFER_GENERAL.
  3056. + *
  3057. + * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
  3058. + * some sort before another load is triggered.
  3059. + */
  3060. +static void vc4_store_before_load(struct vc4_rcl_setup *setup)
  3061. +{
  3062. + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
  3063. + rcl_u16(setup,
  3064. + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE,
  3065. + VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
  3066. + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
  3067. + VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
  3068. + VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR);
  3069. + rcl_u32(setup, 0); /* no address, since we're in None mode */
  3070. +}
  3071. +
  3072. +/*
  3073. + * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
  3074. + *
  3075. + * The tile coordinates packet triggers a pending load if there is one, are
  3076. + * used for clipping during rendering, and determine where loads/stores happen
  3077. + * relative to their base address.
  3078. + */
  3079. +static void vc4_tile_coordinates(struct vc4_rcl_setup *setup,
  3080. + uint32_t x, uint32_t y)
  3081. +{
  3082. + rcl_u8(setup, VC4_PACKET_TILE_COORDINATES);
  3083. + rcl_u8(setup, x);
  3084. + rcl_u8(setup, y);
  3085. +}
  3086. +
  3087. +static void emit_tile(struct vc4_exec_info *exec,
  3088. + struct vc4_rcl_setup *setup,
  3089. + uint8_t x, uint8_t y, bool first, bool last)
  3090. +{
  3091. + struct drm_vc4_submit_cl *args = exec->args;
  3092. + bool has_bin = args->bin_cl_size != 0;
  3093. +
  3094. + /* Note that the load doesn't actually occur until the
  3095. + * tile coords packet is processed, and only one load
  3096. + * may be outstanding at a time.
  3097. + */
  3098. + if (setup->color_read) {
  3099. + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
  3100. + rcl_u16(setup, args->color_read.bits);
  3101. + rcl_u32(setup,
  3102. + setup->color_read->paddr + args->color_read.offset);
  3103. + }
  3104. +
  3105. + if (setup->zs_read) {
  3106. + if (setup->color_read) {
  3107. + /* Exec previous load. */
  3108. + vc4_tile_coordinates(setup, x, y);
  3109. + vc4_store_before_load(setup);
  3110. + }
  3111. +
  3112. + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
  3113. + rcl_u16(setup, args->zs_read.bits);
  3114. + rcl_u32(setup, setup->zs_read->paddr + args->zs_read.offset);
  3115. + }
  3116. +
  3117. + /* Clipping depends on tile coordinates having been
  3118. + * emitted, so we always need one here.
  3119. + */
  3120. + vc4_tile_coordinates(setup, x, y);
  3121. +
  3122. + /* Wait for the binner before jumping to the first
  3123. + * tile's lists.
  3124. + */
  3125. + if (first && has_bin)
  3126. + rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE);
  3127. +
  3128. + if (has_bin) {
  3129. + rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
  3130. + rcl_u32(setup, (exec->tile_bo->paddr +
  3131. + exec->tile_alloc_offset +
  3132. + (y * exec->bin_tiles_x + x) * 32));
  3133. + }
  3134. +
  3135. + if (setup->zs_write) {
  3136. + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
  3137. + rcl_u16(setup, args->zs_write.bits |
  3138. + (setup->color_ms_write ?
  3139. + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0));
  3140. + rcl_u32(setup,
  3141. + (setup->zs_write->paddr + args->zs_write.offset) |
  3142. + ((last && !setup->color_ms_write) ?
  3143. + VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
  3144. + }
  3145. +
  3146. + if (setup->color_ms_write) {
  3147. + if (setup->zs_write) {
  3148. + /* Reset after previous store */
  3149. + vc4_tile_coordinates(setup, x, y);
  3150. + }
  3151. +
  3152. + if (last)
  3153. + rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
  3154. + else
  3155. + rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER);
  3156. + }
  3157. +}
  3158. +
  3159. +static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
  3160. + struct vc4_rcl_setup *setup)
  3161. +{
  3162. + struct drm_vc4_submit_cl *args = exec->args;
  3163. + bool has_bin = args->bin_cl_size != 0;
  3164. + uint8_t min_x_tile = args->min_x_tile;
  3165. + uint8_t min_y_tile = args->min_y_tile;
  3166. + uint8_t max_x_tile = args->max_x_tile;
  3167. + uint8_t max_y_tile = args->max_y_tile;
  3168. + uint8_t xtiles = max_x_tile - min_x_tile + 1;
  3169. + uint8_t ytiles = max_y_tile - min_y_tile + 1;
  3170. + uint8_t x, y;
  3171. + uint32_t size, loop_body_size;
  3172. +
  3173. + size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
  3174. + loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
  3175. +
  3176. + if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
  3177. + size += VC4_PACKET_CLEAR_COLORS_SIZE +
  3178. + VC4_PACKET_TILE_COORDINATES_SIZE +
  3179. + VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
  3180. + }
  3181. +
  3182. + if (setup->color_read) {
  3183. + loop_body_size += (VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE);
  3184. + }
  3185. + if (setup->zs_read) {
  3186. + if (setup->color_read) {
  3187. + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
  3188. + loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
  3189. + }
  3190. + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
  3191. + }
  3192. +
  3193. + if (has_bin) {
  3194. + size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;
  3195. + loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
  3196. + }
  3197. +
  3198. + if (setup->zs_write)
  3199. + loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
  3200. + if (setup->color_ms_write) {
  3201. + if (setup->zs_write)
  3202. + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
  3203. + loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;
  3204. + }
  3205. + size += xtiles * ytiles * loop_body_size;
  3206. +
  3207. + setup->rcl = &vc4_bo_create(dev, size)->base;
  3208. + if (!setup->rcl)
  3209. + return -ENOMEM;
  3210. + list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
  3211. + &exec->unref_list);
  3212. +
  3213. + rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
  3214. + rcl_u32(setup,
  3215. + (setup->color_ms_write ?
  3216. + (setup->color_ms_write->paddr +
  3217. + args->color_ms_write.offset) :
  3218. + 0));
  3219. + rcl_u16(setup, args->width);
  3220. + rcl_u16(setup, args->height);
  3221. + rcl_u16(setup, args->color_ms_write.bits);
  3222. +
  3223. + /* The tile buffer gets cleared when the previous tile is stored. If
  3224. + * the clear values changed between frames, then the tile buffer has
  3225. + * stale clear values in it, so we have to do a store in None mode (no
  3226. + * writes) so that we trigger the tile buffer clear.
  3227. + */
  3228. + if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
  3229. + rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
  3230. + rcl_u32(setup, args->clear_color[0]);
  3231. + rcl_u32(setup, args->clear_color[1]);
  3232. + rcl_u32(setup, args->clear_z);
  3233. + rcl_u8(setup, args->clear_s);
  3234. +
  3235. + vc4_tile_coordinates(setup, 0, 0);
  3236. +
  3237. + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
  3238. + rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);
  3239. + rcl_u32(setup, 0); /* no address, since we're in None mode */
  3240. + }
  3241. +
  3242. + for (y = min_y_tile; y <= max_y_tile; y++) {
  3243. + for (x = min_x_tile; x <= max_x_tile; x++) {
  3244. + bool first = (x == min_x_tile && y == min_y_tile);
  3245. + bool last = (x == max_x_tile && y == max_y_tile);
  3246. + emit_tile(exec, setup, x, y, first, last);
  3247. + }
  3248. + }
  3249. +
  3250. + BUG_ON(setup->next_offset != size);
  3251. + exec->ct1ca = setup->rcl->paddr;
  3252. + exec->ct1ea = setup->rcl->paddr + setup->next_offset;
  3253. +
  3254. + return 0;
  3255. +}
  3256. +
  3257. +static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
  3258. + struct drm_gem_cma_object **obj,
  3259. + struct drm_vc4_submit_rcl_surface *surf)
  3260. +{
  3261. + uint8_t tiling = VC4_GET_FIELD(surf->bits,
  3262. + VC4_LOADSTORE_TILE_BUFFER_TILING);
  3263. + uint8_t buffer = VC4_GET_FIELD(surf->bits,
  3264. + VC4_LOADSTORE_TILE_BUFFER_BUFFER);
  3265. + uint8_t format = VC4_GET_FIELD(surf->bits,
  3266. + VC4_LOADSTORE_TILE_BUFFER_FORMAT);
  3267. + int cpp;
  3268. +
  3269. + if (surf->pad != 0) {
  3270. + DRM_ERROR("Padding unset\n");
  3271. + return -EINVAL;
  3272. + }
  3273. +
  3274. + if (surf->hindex == ~0)
  3275. + return 0;
  3276. +
  3277. + if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
  3278. + return -EINVAL;
  3279. +
  3280. + if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
  3281. + VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
  3282. + VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
  3283. + DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
  3284. + surf->bits);
  3285. + return -EINVAL;
  3286. + }
  3287. +
  3288. + if (tiling > VC4_TILING_FORMAT_LT) {
  3289. + DRM_ERROR("Bad tiling format\n");
  3290. + return -EINVAL;
  3291. + }
  3292. +
  3293. + if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {
  3294. + if (format != 0) {
  3295. + DRM_ERROR("No color format should be set for ZS\n");
  3296. + return -EINVAL;
  3297. + }
  3298. + cpp = 4;
  3299. + } else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) {
  3300. + switch (format) {
  3301. + case VC4_LOADSTORE_TILE_BUFFER_BGR565:
  3302. + case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER:
  3303. + cpp = 2;
  3304. + break;
  3305. + case VC4_LOADSTORE_TILE_BUFFER_RGBA8888:
  3306. + cpp = 4;
  3307. + break;
  3308. + default:
  3309. + DRM_ERROR("Bad tile buffer format\n");
  3310. + return -EINVAL;
  3311. + }
  3312. + } else {
  3313. + DRM_ERROR("Bad load/store buffer %d.\n", buffer);
  3314. + return -EINVAL;
  3315. + }
  3316. +
  3317. + if (surf->offset & 0xf) {
  3318. + DRM_ERROR("load/store buffer must be 16b aligned.\n");
  3319. + return -EINVAL;
  3320. + }
  3321. +
  3322. + if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
  3323. + exec->args->width, exec->args->height, cpp)) {
  3324. + return -EINVAL;
  3325. + }
  3326. +
  3327. + return 0;
  3328. +}
  3329. +
  3330. +static int
  3331. +vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec,
  3332. + struct drm_gem_cma_object **obj,
  3333. + struct drm_vc4_submit_rcl_surface *surf)
  3334. +{
  3335. + uint8_t tiling = VC4_GET_FIELD(surf->bits,
  3336. + VC4_RENDER_CONFIG_MEMORY_FORMAT);
  3337. + uint8_t format = VC4_GET_FIELD(surf->bits,
  3338. + VC4_RENDER_CONFIG_FORMAT);
  3339. + int cpp;
  3340. +
  3341. + if (surf->pad != 0) {
  3342. + DRM_ERROR("Padding unset\n");
  3343. + return -EINVAL;
  3344. + }
  3345. +
  3346. + if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |
  3347. + VC4_RENDER_CONFIG_FORMAT_MASK)) {
  3348. + DRM_ERROR("Unknown bits in render config: 0x%04x\n",
  3349. + surf->bits);
  3350. + return -EINVAL;
  3351. + }
  3352. +
  3353. + if (surf->hindex == ~0)
  3354. + return 0;
  3355. +
  3356. + if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
  3357. + return -EINVAL;
  3358. +
  3359. + if (tiling > VC4_TILING_FORMAT_LT) {
  3360. + DRM_ERROR("Bad tiling format\n");
  3361. + return -EINVAL;
  3362. + }
  3363. +
  3364. + switch (format) {
  3365. + case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
  3366. + case VC4_RENDER_CONFIG_FORMAT_BGR565:
  3367. + cpp = 2;
  3368. + break;
  3369. + case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
  3370. + cpp = 4;
  3371. + break;
  3372. + default:
  3373. + DRM_ERROR("Bad tile buffer format\n");
  3374. + return -EINVAL;
  3375. + }
  3376. +
  3377. + if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
  3378. + exec->args->width, exec->args->height, cpp)) {
  3379. + return -EINVAL;
  3380. + }
  3381. +
  3382. + return 0;
  3383. +}
  3384. +
  3385. +int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
  3386. +{
  3387. + struct vc4_rcl_setup setup = {0};
  3388. + struct drm_vc4_submit_cl *args = exec->args;
  3389. + bool has_bin = args->bin_cl_size != 0;
  3390. + int ret;
  3391. +
  3392. + if (args->min_x_tile > args->max_x_tile ||
  3393. + args->min_y_tile > args->max_y_tile) {
  3394. + DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
  3395. + args->min_x_tile, args->min_y_tile,
  3396. + args->max_x_tile, args->max_y_tile);
  3397. + return -EINVAL;
  3398. + }
  3399. +
  3400. + if (has_bin &&
  3401. + (args->max_x_tile > exec->bin_tiles_x ||
  3402. + args->max_y_tile > exec->bin_tiles_y)) {
  3403. + DRM_ERROR("Render tiles (%d,%d) outside of bin config (%d,%d)\n",
  3404. + args->max_x_tile, args->max_y_tile,
  3405. + exec->bin_tiles_x, exec->bin_tiles_y);
  3406. + return -EINVAL;
  3407. + }
  3408. +
  3409. + ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
  3410. + if (ret)
  3411. + return ret;
  3412. +
  3413. + ret = vc4_rcl_ms_surface_setup(exec, &setup.color_ms_write,
  3414. + &args->color_ms_write);
  3415. + if (ret)
  3416. + return ret;
  3417. +
  3418. + ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);
  3419. + if (ret)
  3420. + return ret;
  3421. +
  3422. + ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);
  3423. + if (ret)
  3424. + return ret;
  3425. +
  3426. + /* We shouldn't even have the job submitted to us if there's no
  3427. + * surface to write out.
  3428. + */
  3429. + if (!setup.color_ms_write && !setup.zs_write) {
  3430. + DRM_ERROR("RCL requires color or Z/S write\n");
  3431. + return -EINVAL;
  3432. + }
  3433. +
  3434. + return vc4_create_rcl_bo(dev, exec, &setup);
  3435. +}
  3436. --- /dev/null
  3437. +++ b/drivers/gpu/drm/vc4/vc4_trace.h
  3438. @@ -0,0 +1,63 @@
  3439. +/*
  3440. + * Copyright (C) 2015 Broadcom
  3441. + *
  3442. + * This program is free software; you can redistribute it and/or modify
  3443. + * it under the terms of the GNU General Public License version 2 as
  3444. + * published by the Free Software Foundation.
  3445. + */
  3446. +
  3447. +#if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
  3448. +#define _VC4_TRACE_H_
  3449. +
  3450. +#include <linux/stringify.h>
  3451. +#include <linux/types.h>
  3452. +#include <linux/tracepoint.h>
  3453. +
  3454. +#undef TRACE_SYSTEM
  3455. +#define TRACE_SYSTEM vc4
  3456. +#define TRACE_INCLUDE_FILE vc4_trace
  3457. +
  3458. +TRACE_EVENT(vc4_wait_for_seqno_begin,
  3459. + TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout),
  3460. + TP_ARGS(dev, seqno, timeout),
  3461. +
  3462. + TP_STRUCT__entry(
  3463. + __field(u32, dev)
  3464. + __field(u64, seqno)
  3465. + __field(u64, timeout)
  3466. + ),
  3467. +
  3468. + TP_fast_assign(
  3469. + __entry->dev = dev->primary->index;
  3470. + __entry->seqno = seqno;
  3471. + __entry->timeout = timeout;
  3472. + ),
  3473. +
  3474. + TP_printk("dev=%u, seqno=%llu, timeout=%llu",
  3475. + __entry->dev, __entry->seqno, __entry->timeout)
  3476. +);
  3477. +
  3478. +TRACE_EVENT(vc4_wait_for_seqno_end,
  3479. + TP_PROTO(struct drm_device *dev, uint64_t seqno),
  3480. + TP_ARGS(dev, seqno),
  3481. +
  3482. + TP_STRUCT__entry(
  3483. + __field(u32, dev)
  3484. + __field(u64, seqno)
  3485. + ),
  3486. +
  3487. + TP_fast_assign(
  3488. + __entry->dev = dev->primary->index;
  3489. + __entry->seqno = seqno;
  3490. + ),
  3491. +
  3492. + TP_printk("dev=%u, seqno=%llu",
  3493. + __entry->dev, __entry->seqno)
  3494. +);
  3495. +
  3496. +#endif /* _VC4_TRACE_H_ */
  3497. +
  3498. +/* This part must be outside protection */
  3499. +#undef TRACE_INCLUDE_PATH
  3500. +#define TRACE_INCLUDE_PATH .
  3501. +#include <trace/define_trace.h>
  3502. --- /dev/null
  3503. +++ b/drivers/gpu/drm/vc4/vc4_trace_points.c
  3504. @@ -0,0 +1,14 @@
  3505. +/*
  3506. + * Copyright (C) 2015 Broadcom
  3507. + *
  3508. + * This program is free software; you can redistribute it and/or modify
  3509. + * it under the terms of the GNU General Public License version 2 as
  3510. + * published by the Free Software Foundation.
  3511. + */
  3512. +
  3513. +#include "vc4_drv.h"
  3514. +
  3515. +#ifndef __CHECKER__
  3516. +#define CREATE_TRACE_POINTS
  3517. +#include "vc4_trace.h"
  3518. +#endif
  3519. --- /dev/null
  3520. +++ b/drivers/gpu/drm/vc4/vc4_v3d.c
  3521. @@ -0,0 +1,268 @@
  3522. +/*
  3523. + * Copyright (c) 2014 The Linux Foundation. All rights reserved.
  3524. + * Copyright (C) 2013 Red Hat
  3525. + * Author: Rob Clark <robdclark@gmail.com>
  3526. + *
  3527. + * This program is free software; you can redistribute it and/or modify it
  3528. + * under the terms of the GNU General Public License version 2 as published by
  3529. + * the Free Software Foundation.
  3530. + *
  3531. + * This program is distributed in the hope that it will be useful, but WITHOUT
  3532. + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  3533. + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  3534. + * more details.
  3535. + *
  3536. + * You should have received a copy of the GNU General Public License along with
  3537. + * this program. If not, see <http://www.gnu.org/licenses/>.
  3538. + */
  3539. +
  3540. +#include "linux/component.h"
  3541. +#include "soc/bcm2835/raspberrypi-firmware.h"
  3542. +#include "vc4_drv.h"
  3543. +#include "vc4_regs.h"
  3544. +
  3545. +#ifdef CONFIG_DEBUG_FS
  3546. +#define REGDEF(reg) { reg, #reg }
  3547. +static const struct {
  3548. + uint32_t reg;
  3549. + const char *name;
  3550. +} vc4_reg_defs[] = {
  3551. + REGDEF(V3D_IDENT0),
  3552. + REGDEF(V3D_IDENT1),
  3553. + REGDEF(V3D_IDENT2),
  3554. + REGDEF(V3D_SCRATCH),
  3555. + REGDEF(V3D_L2CACTL),
  3556. + REGDEF(V3D_SLCACTL),
  3557. + REGDEF(V3D_INTCTL),
  3558. + REGDEF(V3D_INTENA),
  3559. + REGDEF(V3D_INTDIS),
  3560. + REGDEF(V3D_CT0CS),
  3561. + REGDEF(V3D_CT1CS),
  3562. + REGDEF(V3D_CT0EA),
  3563. + REGDEF(V3D_CT1EA),
  3564. + REGDEF(V3D_CT0CA),
  3565. + REGDEF(V3D_CT1CA),
  3566. + REGDEF(V3D_CT00RA0),
  3567. + REGDEF(V3D_CT01RA0),
  3568. + REGDEF(V3D_CT0LC),
  3569. + REGDEF(V3D_CT1LC),
  3570. + REGDEF(V3D_CT0PC),
  3571. + REGDEF(V3D_CT1PC),
  3572. + REGDEF(V3D_PCS),
  3573. + REGDEF(V3D_BFC),
  3574. + REGDEF(V3D_RFC),
  3575. + REGDEF(V3D_BPCA),
  3576. + REGDEF(V3D_BPCS),
  3577. + REGDEF(V3D_BPOA),
  3578. + REGDEF(V3D_BPOS),
  3579. + REGDEF(V3D_BXCF),
  3580. + REGDEF(V3D_SQRSV0),
  3581. + REGDEF(V3D_SQRSV1),
  3582. + REGDEF(V3D_SQCNTL),
  3583. + REGDEF(V3D_SRQPC),
  3584. + REGDEF(V3D_SRQUA),
  3585. + REGDEF(V3D_SRQUL),
  3586. + REGDEF(V3D_SRQCS),
  3587. + REGDEF(V3D_VPACNTL),
  3588. + REGDEF(V3D_VPMBASE),
  3589. + REGDEF(V3D_PCTRC),
  3590. + REGDEF(V3D_PCTRE),
  3591. + REGDEF(V3D_PCTR0),
  3592. + REGDEF(V3D_PCTRS0),
  3593. + REGDEF(V3D_PCTR1),
  3594. + REGDEF(V3D_PCTRS1),
  3595. + REGDEF(V3D_PCTR2),
  3596. + REGDEF(V3D_PCTRS2),
  3597. + REGDEF(V3D_PCTR3),
  3598. + REGDEF(V3D_PCTRS3),
  3599. + REGDEF(V3D_PCTR4),
  3600. + REGDEF(V3D_PCTRS4),
  3601. + REGDEF(V3D_PCTR5),
  3602. + REGDEF(V3D_PCTRS5),
  3603. + REGDEF(V3D_PCTR6),
  3604. + REGDEF(V3D_PCTRS6),
  3605. + REGDEF(V3D_PCTR7),
  3606. + REGDEF(V3D_PCTRS7),
  3607. + REGDEF(V3D_PCTR8),
  3608. + REGDEF(V3D_PCTRS8),
  3609. + REGDEF(V3D_PCTR9),
  3610. + REGDEF(V3D_PCTRS9),
  3611. + REGDEF(V3D_PCTR10),
  3612. + REGDEF(V3D_PCTRS10),
  3613. + REGDEF(V3D_PCTR11),
  3614. + REGDEF(V3D_PCTRS11),
  3615. + REGDEF(V3D_PCTR12),
  3616. + REGDEF(V3D_PCTRS12),
  3617. + REGDEF(V3D_PCTR13),
  3618. + REGDEF(V3D_PCTRS13),
  3619. + REGDEF(V3D_PCTR14),
  3620. + REGDEF(V3D_PCTRS14),
  3621. + REGDEF(V3D_PCTR15),
  3622. + REGDEF(V3D_PCTRS15),
  3623. + REGDEF(V3D_BGE),
  3624. + REGDEF(V3D_FDBGO),
  3625. + REGDEF(V3D_FDBGB),
  3626. + REGDEF(V3D_FDBGR),
  3627. + REGDEF(V3D_FDBGS),
  3628. + REGDEF(V3D_ERRSTAT),
  3629. +};
  3630. +
  3631. +int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused)
  3632. +{
  3633. + struct drm_info_node *node = (struct drm_info_node *) m->private;
  3634. + struct drm_device *dev = node->minor->dev;
  3635. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  3636. + int i;
  3637. +
  3638. + for (i = 0; i < ARRAY_SIZE(vc4_reg_defs); i++) {
  3639. + seq_printf(m, "%s (0x%04x): 0x%08x\n",
  3640. + vc4_reg_defs[i].name, vc4_reg_defs[i].reg,
  3641. + V3D_READ(vc4_reg_defs[i].reg));
  3642. + }
  3643. +
  3644. + return 0;
  3645. +}
  3646. +
  3647. +int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
  3648. +{
  3649. + struct drm_info_node *node = (struct drm_info_node *) m->private;
  3650. + struct drm_device *dev = node->minor->dev;
  3651. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  3652. + uint32_t ident1 = V3D_READ(V3D_IDENT1);
  3653. + uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC);
  3654. + uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
  3655. + uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
  3656. +
  3657. + seq_printf(m, "Revision: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
  3658. + seq_printf(m, "Slices: %d\n", nslc);
  3659. + seq_printf(m, "TMUs: %d\n", nslc * tups);
  3660. + seq_printf(m, "QPUs: %d\n", nslc * qups);
  3661. + seq_printf(m, "Semaphores: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
  3662. +
  3663. + return 0;
  3664. +}
  3665. +#endif /* CONFIG_DEBUG_FS */
  3666. +
  3667. +/*
  3668. + * Asks the firmware to turn on power to the V3D engine.
  3669. + *
  3670. + * This may be doable with just the clocks interface, though this
  3671. + * packet does some other register setup from the firmware, too.
  3672. + */
  3673. +int
  3674. +vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
  3675. +{
  3676. + u32 packet = on;
  3677. +
  3678. + return rpi_firmware_property(vc4->firmware,
  3679. + RPI_FIRMWARE_SET_ENABLE_QPU,
  3680. + &packet, sizeof(packet));
  3681. +}
  3682. +
  3683. +static void vc4_v3d_init_hw(struct drm_device *dev)
  3684. +{
  3685. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  3686. +
  3687. + /* Take all the memory that would have been reserved for user
  3688. + * QPU programs, since we don't have an interface for running
  3689. + * them, anyway.
  3690. + */
  3691. + V3D_WRITE(V3D_VPMBASE, 0);
  3692. +}
  3693. +
  3694. +static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
  3695. +{
  3696. + struct platform_device *pdev = to_platform_device(dev);
  3697. + struct drm_device *drm = dev_get_drvdata(master);
  3698. + struct vc4_dev *vc4 = to_vc4_dev(drm);
  3699. + struct vc4_v3d *v3d = NULL;
  3700. + int ret;
  3701. +
  3702. + v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
  3703. + if (!v3d)
  3704. + return -ENOMEM;
  3705. +
  3706. + v3d->pdev = pdev;
  3707. +
  3708. + v3d->regs = vc4_ioremap_regs(pdev, 0);
  3709. + if (IS_ERR(v3d->regs))
  3710. + return PTR_ERR(v3d->regs);
  3711. +
  3712. + vc4->v3d = v3d;
  3713. +
  3714. + ret = vc4_v3d_set_power(vc4, true);
  3715. + if (ret)
  3716. + return ret;
  3717. +
  3718. + if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
  3719. + DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
  3720. + V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0);
  3721. + return -EINVAL;
  3722. + }
  3723. +
  3724. + /* Reset the binner overflow address/size at setup, to be sure
  3725. + * we don't reuse an old one.
  3726. + */
  3727. + V3D_WRITE(V3D_BPOA, 0);
  3728. + V3D_WRITE(V3D_BPOS, 0);
  3729. +
  3730. + vc4_v3d_init_hw(drm);
  3731. +
  3732. + ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
  3733. + if (ret) {
  3734. + DRM_ERROR("Failed to install IRQ handler\n");
  3735. + return ret;
  3736. + }
  3737. +
  3738. + return 0;
  3739. +}
  3740. +
  3741. +static void vc4_v3d_unbind(struct device *dev, struct device *master,
  3742. + void *data)
  3743. +{
  3744. + struct drm_device *drm = dev_get_drvdata(master);
  3745. + struct vc4_dev *vc4 = to_vc4_dev(drm);
  3746. +
  3747. + drm_irq_uninstall(drm);
  3748. +
  3749. + /* Disable the binner's overflow memory address, so the next
  3750. + * driver probe (if any) doesn't try to reuse our old
  3751. + * allocation.
  3752. + */
  3753. + V3D_WRITE(V3D_BPOA, 0);
  3754. + V3D_WRITE(V3D_BPOS, 0);
  3755. +
  3756. + vc4_v3d_set_power(vc4, false);
  3757. +
  3758. + vc4->v3d = NULL;
  3759. +}
  3760. +
  3761. +static const struct component_ops vc4_v3d_ops = {
  3762. + .bind = vc4_v3d_bind,
  3763. + .unbind = vc4_v3d_unbind,
  3764. +};
  3765. +
  3766. +static int vc4_v3d_dev_probe(struct platform_device *pdev)
  3767. +{
  3768. + return component_add(&pdev->dev, &vc4_v3d_ops);
  3769. +}
  3770. +
  3771. +static int vc4_v3d_dev_remove(struct platform_device *pdev)
  3772. +{
  3773. + component_del(&pdev->dev, &vc4_v3d_ops);
  3774. + return 0;
  3775. +}
  3776. +
  3777. +static const struct of_device_id vc4_v3d_dt_match[] = {
  3778. + { .compatible = "brcm,vc4-v3d" },
  3779. + {}
  3780. +};
  3781. +
  3782. +struct platform_driver vc4_v3d_driver = {
  3783. + .probe = vc4_v3d_dev_probe,
  3784. + .remove = vc4_v3d_dev_remove,
  3785. + .driver = {
  3786. + .name = "vc4_v3d",
  3787. + .of_match_table = vc4_v3d_dt_match,
  3788. + },
  3789. +};
  3790. --- /dev/null
  3791. +++ b/drivers/gpu/drm/vc4/vc4_validate.c
  3792. @@ -0,0 +1,958 @@
  3793. +/*
  3794. + * Copyright © 2014 Broadcom
  3795. + *
  3796. + * Permission is hereby granted, free of charge, to any person obtaining a
  3797. + * copy of this software and associated documentation files (the "Software"),
  3798. + * to deal in the Software without restriction, including without limitation
  3799. + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  3800. + * and/or sell copies of the Software, and to permit persons to whom the
  3801. + * Software is furnished to do so, subject to the following conditions:
  3802. + *
  3803. + * The above copyright notice and this permission notice (including the next
  3804. + * paragraph) shall be included in all copies or substantial portions of the
  3805. + * Software.
  3806. + *
  3807. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  3808. + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  3809. + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  3810. + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  3811. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  3812. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  3813. + * IN THE SOFTWARE.
  3814. + */
  3815. +
  3816. +/**
  3817. + * Command list validator for VC4.
  3818. + *
  3819. + * The VC4 has no IOMMU between it and system memory. So, a user with
  3820. + * access to execute command lists could escalate privilege by
  3821. + * overwriting system memory (drawing to it as a framebuffer) or
  3822. + * reading system memory it shouldn't (reading it as a texture, or
  3823. + * uniform data, or vertex data).
  3824. + *
  3825. + * This validates command lists to ensure that all accesses are within
  3826. + * the bounds of the GEM objects referenced. It explicitly whitelists
  3827. + * packets, and looks at the offsets in any address fields to make
  3828. + * sure they're constrained within the BOs they reference.
  3829. + *
  3830. + * Note that because of the validation that's happening anyway, this
  3831. + * is where GEM relocation processing happens.
  3832. + */
  3833. +
  3834. +#include "uapi/drm/vc4_drm.h"
  3835. +#include "vc4_drv.h"
  3836. +#include "vc4_packet.h"
  3837. +
  3838. +#define VALIDATE_ARGS \
  3839. + struct vc4_exec_info *exec, \
  3840. + void *validated, \
  3841. + void *untrusted
  3842. +
  3843. +
  3844. +/** Return the width in pixels of a 64-byte microtile. */
  3845. +static uint32_t
  3846. +utile_width(int cpp)
  3847. +{
  3848. + switch (cpp) {
  3849. + case 1:
  3850. + case 2:
  3851. + return 8;
  3852. + case 4:
  3853. + return 4;
  3854. + case 8:
  3855. + return 2;
  3856. + default:
  3857. + DRM_ERROR("unknown cpp: %d\n", cpp);
  3858. + return 1;
  3859. + }
  3860. +}
  3861. +
  3862. +/** Return the height in pixels of a 64-byte microtile. */
  3863. +static uint32_t
  3864. +utile_height(int cpp)
  3865. +{
  3866. + switch (cpp) {
  3867. + case 1:
  3868. + return 8;
  3869. + case 2:
  3870. + case 4:
  3871. + case 8:
  3872. + return 4;
  3873. + default:
  3874. + DRM_ERROR("unknown cpp: %d\n", cpp);
  3875. + return 1;
  3876. + }
  3877. +}
  3878. +
  3879. +/**
  3880. + * The texture unit decides what tiling format a particular miplevel is using
  3881. + * this function, so we lay out our miptrees accordingly.
  3882. + */
  3883. +static bool
  3884. +size_is_lt(uint32_t width, uint32_t height, int cpp)
  3885. +{
  3886. + return (width <= 4 * utile_width(cpp) ||
  3887. + height <= 4 * utile_height(cpp));
  3888. +}
  3889. +
  3890. +bool
  3891. +vc4_use_bo(struct vc4_exec_info *exec,
  3892. + uint32_t hindex,
  3893. + enum vc4_bo_mode mode,
  3894. + struct drm_gem_cma_object **obj)
  3895. +{
  3896. + *obj = NULL;
  3897. +
  3898. + if (hindex >= exec->bo_count) {
  3899. + DRM_ERROR("BO index %d greater than BO count %d\n",
  3900. + hindex, exec->bo_count);
  3901. + return false;
  3902. + }
  3903. +
  3904. + if (exec->bo[hindex].mode != mode) {
  3905. + if (exec->bo[hindex].mode == VC4_MODE_UNDECIDED) {
  3906. + exec->bo[hindex].mode = mode;
  3907. + } else {
  3908. + DRM_ERROR("BO index %d reused with mode %d vs %d\n",
  3909. + hindex, exec->bo[hindex].mode, mode);
  3910. + return false;
  3911. + }
  3912. + }
  3913. +
  3914. + *obj = exec->bo[hindex].bo;
  3915. + return true;
  3916. +}
  3917. +
  3918. +static bool
  3919. +vc4_use_handle(struct vc4_exec_info *exec,
  3920. + uint32_t gem_handles_packet_index,
  3921. + enum vc4_bo_mode mode,
  3922. + struct drm_gem_cma_object **obj)
  3923. +{
  3924. + return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index],
  3925. + mode, obj);
  3926. +}
  3927. +
  3928. +static uint32_t
  3929. +gl_shader_rec_size(uint32_t pointer_bits)
  3930. +{
  3931. + uint32_t attribute_count = pointer_bits & 7;
  3932. + bool extended = pointer_bits & 8;
  3933. +
  3934. + if (attribute_count == 0)
  3935. + attribute_count = 8;
  3936. +
  3937. + if (extended)
  3938. + return 100 + attribute_count * 4;
  3939. + else
  3940. + return 36 + attribute_count * 8;
  3941. +}
  3942. +
  3943. +bool
  3944. +vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
  3945. + uint32_t offset, uint8_t tiling_format,
  3946. + uint32_t width, uint32_t height, uint8_t cpp)
  3947. +{
  3948. + uint32_t aligned_width, aligned_height, stride, size;
  3949. + uint32_t utile_w = utile_width(cpp);
  3950. + uint32_t utile_h = utile_height(cpp);
  3951. +
  3952. + /* The shaded vertex format stores signed 12.4 fixed point
  3953. + * (-2048,2047) offsets from the viewport center, so we should
  3954. + * never have a render target larger than 4096. The texture
  3955. + * unit can only sample from 2048x2048, so it's even more
  3956. + * restricted. This lets us avoid worrying about overflow in
  3957. + * our math.
  3958. + */
  3959. + if (width > 4096 || height > 4096) {
  3960. + DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
  3961. + return false;
  3962. + }
  3963. +
  3964. + switch (tiling_format) {
  3965. + case VC4_TILING_FORMAT_LINEAR:
  3966. + aligned_width = round_up(width, utile_w);
  3967. + aligned_height = height;
  3968. + break;
  3969. + case VC4_TILING_FORMAT_T:
  3970. + aligned_width = round_up(width, utile_w * 8);
  3971. + aligned_height = round_up(height, utile_h * 8);
  3972. + break;
  3973. + case VC4_TILING_FORMAT_LT:
  3974. + aligned_width = round_up(width, utile_w);
  3975. + aligned_height = round_up(height, utile_h);
  3976. + break;
  3977. + default:
  3978. + DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
  3979. + return false;
  3980. + }
  3981. +
  3982. + stride = aligned_width * cpp;
  3983. + size = stride * aligned_height;
  3984. +
  3985. + if (size + offset < size ||
  3986. + size + offset > fbo->base.size) {
  3987. + DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %d)\n",
  3988. + width, height,
  3989. + aligned_width, aligned_height,
  3990. + size, offset, fbo->base.size);
  3991. + return false;
  3992. + }
  3993. +
  3994. + return true;
  3995. +}
  3996. +
  3997. +static int
  3998. +validate_flush_all(VALIDATE_ARGS)
  3999. +{
  4000. + if (exec->found_increment_semaphore_packet) {
  4001. + DRM_ERROR("VC4_PACKET_FLUSH_ALL after "
  4002. + "VC4_PACKET_INCREMENT_SEMAPHORE\n");
  4003. + return -EINVAL;
  4004. + }
  4005. +
  4006. + return 0;
  4007. +}
  4008. +
  4009. +static int
  4010. +validate_start_tile_binning(VALIDATE_ARGS)
  4011. +{
  4012. + if (exec->found_start_tile_binning_packet) {
  4013. + DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
  4014. + return -EINVAL;
  4015. + }
  4016. + exec->found_start_tile_binning_packet = true;
  4017. +
  4018. + if (!exec->found_tile_binning_mode_config_packet) {
  4019. + DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
  4020. + return -EINVAL;
  4021. + }
  4022. +
  4023. + return 0;
  4024. +}
  4025. +
  4026. +static int
  4027. +validate_increment_semaphore(VALIDATE_ARGS)
  4028. +{
  4029. + if (exec->found_increment_semaphore_packet) {
  4030. + DRM_ERROR("Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n");
  4031. + return -EINVAL;
  4032. + }
  4033. + exec->found_increment_semaphore_packet = true;
  4034. +
  4035. + /* Once we've found the semaphore increment, there should be one FLUSH
  4036. + * then the end of the command list. The FLUSH actually triggers the
  4037. + * increment, so we only need to make sure there
  4038. + */
  4039. +
  4040. + return 0;
  4041. +}
  4042. +
  4043. +static int
  4044. +validate_indexed_prim_list(VALIDATE_ARGS)
  4045. +{
  4046. + struct drm_gem_cma_object *ib;
  4047. + uint32_t length = *(uint32_t *)(untrusted + 1);
  4048. + uint32_t offset = *(uint32_t *)(untrusted + 5);
  4049. + uint32_t max_index = *(uint32_t *)(untrusted + 9);
  4050. + uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
  4051. + struct vc4_shader_state *shader_state;
  4052. +
  4053. + if (exec->found_increment_semaphore_packet) {
  4054. + DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
  4055. + return -EINVAL;
  4056. + }
  4057. +
  4058. + /* Check overflow condition */
  4059. + if (exec->shader_state_count == 0) {
  4060. + DRM_ERROR("shader state must precede primitives\n");
  4061. + return -EINVAL;
  4062. + }
  4063. + shader_state = &exec->shader_state[exec->shader_state_count - 1];
  4064. +
  4065. + if (max_index > shader_state->max_index)
  4066. + shader_state->max_index = max_index;
  4067. +
  4068. + if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &ib))
  4069. + return -EINVAL;
  4070. +
  4071. + if (offset > ib->base.size ||
  4072. + (ib->base.size - offset) / index_size < length) {
  4073. + DRM_ERROR("IB access overflow (%d + %d*%d > %d)\n",
  4074. + offset, length, index_size, ib->base.size);
  4075. + return -EINVAL;
  4076. + }
  4077. +
  4078. + *(uint32_t *)(validated + 5) = ib->paddr + offset;
  4079. +
  4080. + return 0;
  4081. +}
  4082. +
  4083. +static int
  4084. +validate_gl_array_primitive(VALIDATE_ARGS)
  4085. +{
  4086. + uint32_t length = *(uint32_t *)(untrusted + 1);
  4087. + uint32_t base_index = *(uint32_t *)(untrusted + 5);
  4088. + uint32_t max_index;
  4089. + struct vc4_shader_state *shader_state;
  4090. +
  4091. + if (exec->found_increment_semaphore_packet) {
  4092. + DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
  4093. + return -EINVAL;
  4094. + }
  4095. +
  4096. + /* Check overflow condition */
  4097. + if (exec->shader_state_count == 0) {
  4098. + DRM_ERROR("shader state must precede primitives\n");
  4099. + return -EINVAL;
  4100. + }
  4101. + shader_state = &exec->shader_state[exec->shader_state_count - 1];
  4102. +
  4103. + if (length + base_index < length) {
  4104. + DRM_ERROR("primitive vertex count overflow\n");
  4105. + return -EINVAL;
  4106. + }
  4107. + max_index = length + base_index - 1;
  4108. +
  4109. + if (max_index > shader_state->max_index)
  4110. + shader_state->max_index = max_index;
  4111. +
  4112. + return 0;
  4113. +}
  4114. +
  4115. +static int
  4116. +validate_gl_shader_state(VALIDATE_ARGS)
  4117. +{
  4118. + uint32_t i = exec->shader_state_count++;
  4119. +
  4120. + if (i >= exec->shader_state_size) {
  4121. + DRM_ERROR("More requests for shader states than declared\n");
  4122. + return -EINVAL;
  4123. + }
  4124. +
  4125. + exec->shader_state[i].packet = VC4_PACKET_GL_SHADER_STATE;
  4126. + exec->shader_state[i].addr = *(uint32_t *)untrusted;
  4127. + exec->shader_state[i].max_index = 0;
  4128. +
  4129. + if (exec->shader_state[i].addr & ~0xf) {
  4130. + DRM_ERROR("high bits set in GL shader rec reference\n");
  4131. + return -EINVAL;
  4132. + }
  4133. +
  4134. + *(uint32_t *)validated = (exec->shader_rec_p +
  4135. + exec->shader_state[i].addr);
  4136. +
  4137. + exec->shader_rec_p +=
  4138. + roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
  4139. +
  4140. + return 0;
  4141. +}
  4142. +
  4143. +static int
  4144. +validate_nv_shader_state(VALIDATE_ARGS)
  4145. +{
  4146. + uint32_t i = exec->shader_state_count++;
  4147. +
  4148. + if (i >= exec->shader_state_size) {
  4149. + DRM_ERROR("More requests for shader states than declared\n");
  4150. + return -EINVAL;
  4151. + }
  4152. +
  4153. + exec->shader_state[i].packet = VC4_PACKET_NV_SHADER_STATE;
  4154. + exec->shader_state[i].addr = *(uint32_t *)untrusted;
  4155. +
  4156. + if (exec->shader_state[i].addr & 15) {
  4157. + DRM_ERROR("NV shader state address 0x%08x misaligned\n",
  4158. + exec->shader_state[i].addr);
  4159. + return -EINVAL;
  4160. + }
  4161. +
  4162. + *(uint32_t *)validated = (exec->shader_state[i].addr +
  4163. + exec->shader_rec_p);
  4164. +
  4165. + return 0;
  4166. +}
  4167. +
  4168. +static int
  4169. +validate_tile_binning_config(VALIDATE_ARGS)
  4170. +{
  4171. + struct drm_device *dev = exec->exec_bo->base.dev;
  4172. + uint8_t flags;
  4173. + uint32_t tile_state_size, tile_alloc_size;
  4174. + uint32_t tile_count;
  4175. +
  4176. + if (exec->found_tile_binning_mode_config_packet) {
  4177. + DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
  4178. + return -EINVAL;
  4179. + }
  4180. + exec->found_tile_binning_mode_config_packet = true;
  4181. +
  4182. + exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
  4183. + exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
  4184. + tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
  4185. + flags = *(uint8_t *)(untrusted + 14);
  4186. +
  4187. + if (exec->bin_tiles_x == 0 ||
  4188. + exec->bin_tiles_y == 0) {
  4189. + DRM_ERROR("Tile binning config of %dx%d too small\n",
  4190. + exec->bin_tiles_x, exec->bin_tiles_y);
  4191. + return -EINVAL;
  4192. + }
  4193. +
  4194. + if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
  4195. + VC4_BIN_CONFIG_TILE_BUFFER_64BIT |
  4196. + VC4_BIN_CONFIG_MS_MODE_4X)) {
  4197. + DRM_ERROR("unsupported bining config flags 0x%02x\n", flags);
  4198. + return -EINVAL;
  4199. + }
  4200. +
  4201. + /* The tile state data array is 48 bytes per tile, and we put it at
  4202. + * the start of a BO containing both it and the tile alloc.
  4203. + */
  4204. + tile_state_size = 48 * tile_count;
  4205. +
  4206. + /* Since the tile alloc array will follow us, align. */
  4207. + exec->tile_alloc_offset = roundup(tile_state_size, 4096);
  4208. +
  4209. + *(uint8_t *)(validated + 14) =
  4210. + ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
  4211. + VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
  4212. + VC4_BIN_CONFIG_AUTO_INIT_TSDA |
  4213. + VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
  4214. + VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
  4215. + VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
  4216. + VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
  4217. +
  4218. + /* Initial block size. */
  4219. + tile_alloc_size = 32 * tile_count;
  4220. +
  4221. + /*
  4222. + * The initial allocation gets rounded to the next 256 bytes before
  4223. + * the hardware starts fulfilling further allocations.
  4224. + */
  4225. + tile_alloc_size = roundup(tile_alloc_size, 256);
  4226. +
  4227. + /* Add space for the extra allocations. This is what gets used first,
  4228. + * before overflow memory. It must have at least 4096 bytes, but we
  4229. + * want to avoid overflow memory usage if possible.
  4230. + */
  4231. + tile_alloc_size += 1024 * 1024;
  4232. +
  4233. + exec->tile_bo = &vc4_bo_create(dev, exec->tile_alloc_offset +
  4234. + tile_alloc_size)->base;
  4235. + if (!exec->tile_bo)
  4236. + return -ENOMEM;
  4237. + list_add_tail(&to_vc4_bo(&exec->tile_bo->base)->unref_head,
  4238. + &exec->unref_list);
  4239. +
  4240. + /* tile alloc address. */
  4241. + *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
  4242. + exec->tile_alloc_offset);
  4243. + /* tile alloc size. */
  4244. + *(uint32_t *)(validated + 4) = tile_alloc_size;
  4245. + /* tile state address. */
  4246. + *(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
  4247. +
  4248. + return 0;
  4249. +}
  4250. +
  4251. +static int
  4252. +validate_gem_handles(VALIDATE_ARGS)
  4253. +{
  4254. + memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
  4255. + return 0;
  4256. +}
  4257. +
  4258. +#define VC4_DEFINE_PACKET(packet, name, func) \
  4259. + [packet] = { packet ## _SIZE, name, func }
  4260. +
  4261. +static const struct cmd_info {
  4262. + uint16_t len;
  4263. + const char *name;
  4264. + int (*func)(struct vc4_exec_info *exec, void *validated,
  4265. + void *untrusted);
  4266. +} cmd_info[] = {
  4267. + VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL),
  4268. + VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL),
  4269. + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL),
  4270. + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all),
  4271. + VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning),
  4272. + VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore),
  4273. +
  4274. + VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list),
  4275. +
  4276. + VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive),
  4277. +
  4278. + /* This is only used by clipped primitives (packets 48 and 49), which
  4279. + * we don't support parsing yet.
  4280. + */
  4281. + VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL),
  4282. +
  4283. + VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state),
  4284. + VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state),
  4285. +
  4286. + VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL),
  4287. + VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL),
  4288. + VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL),
  4289. + VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL),
  4290. + VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL),
  4291. + VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL),
  4292. + VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL),
  4293. + VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL),
  4294. + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL),
  4295. + /* Note: The docs say this was also 105, but it was 106 in the
  4296. + * initial userland code drop.
  4297. + */
  4298. + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL),
  4299. +
  4300. + VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config),
  4301. +
  4302. + VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles),
  4303. +};
  4304. +
  4305. +int
  4306. +vc4_validate_bin_cl(struct drm_device *dev,
  4307. + void *validated,
  4308. + void *unvalidated,
  4309. + struct vc4_exec_info *exec)
  4310. +{
  4311. + uint32_t len = exec->args->bin_cl_size;
  4312. + uint32_t dst_offset = 0;
  4313. + uint32_t src_offset = 0;
  4314. +
  4315. + while (src_offset < len) {
  4316. + void *dst_pkt = validated + dst_offset;
  4317. + void *src_pkt = unvalidated + src_offset;
  4318. + u8 cmd = *(uint8_t *)src_pkt;
  4319. + const struct cmd_info *info;
  4320. +
  4321. + if (cmd > ARRAY_SIZE(cmd_info)) {
  4322. + DRM_ERROR("0x%08x: packet %d out of bounds\n",
  4323. + src_offset, cmd);
  4324. + return -EINVAL;
  4325. + }
  4326. +
  4327. + info = &cmd_info[cmd];
  4328. + if (!info->name) {
  4329. + DRM_ERROR("0x%08x: packet %d invalid\n",
  4330. + src_offset, cmd);
  4331. + return -EINVAL;
  4332. + }
  4333. +
  4334. +#if 0
  4335. + DRM_INFO("0x%08x: packet %d (%s) size %d processing...\n",
  4336. + src_offset, cmd, info->name, info->len);
  4337. +#endif
  4338. +
  4339. + if (src_offset + info->len > len) {
  4340. + DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
  4341. + "exceeds bounds (0x%08x)\n",
  4342. + src_offset, cmd, info->name, info->len,
  4343. + src_offset + len);
  4344. + return -EINVAL;
  4345. + }
  4346. +
  4347. + if (cmd != VC4_PACKET_GEM_HANDLES)
  4348. + memcpy(dst_pkt, src_pkt, info->len);
  4349. +
  4350. + if (info->func && info->func(exec,
  4351. + dst_pkt + 1,
  4352. + src_pkt + 1)) {
  4353. + DRM_ERROR("0x%08x: packet %d (%s) failed to "
  4354. + "validate\n",
  4355. + src_offset, cmd, info->name);
  4356. + return -EINVAL;
  4357. + }
  4358. +
  4359. + src_offset += info->len;
  4360. + /* GEM handle loading doesn't produce HW packets. */
  4361. + if (cmd != VC4_PACKET_GEM_HANDLES)
  4362. + dst_offset += info->len;
  4363. +
  4364. + /* When the CL hits halt, it'll stop reading anything else. */
  4365. + if (cmd == VC4_PACKET_HALT)
  4366. + break;
  4367. + }
  4368. +
  4369. + exec->ct0ea = exec->ct0ca + dst_offset;
  4370. +
  4371. + if (!exec->found_start_tile_binning_packet) {
  4372. + DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
  4373. + return -EINVAL;
  4374. + }
  4375. +
  4376. + if (!exec->found_increment_semaphore_packet) {
  4377. + DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE\n");
  4378. + return -EINVAL;
  4379. + }
  4380. +
  4381. + return 0;
  4382. +}
  4383. +
  4384. +static bool
  4385. +reloc_tex(struct vc4_exec_info *exec,
  4386. + void *uniform_data_u,
  4387. + struct vc4_texture_sample_info *sample,
  4388. + uint32_t texture_handle_index)
  4389. +
  4390. +{
  4391. + struct drm_gem_cma_object *tex;
  4392. + uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
  4393. + uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
  4394. + uint32_t p2 = (sample->p_offset[2] != ~0 ?
  4395. + *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
  4396. + uint32_t p3 = (sample->p_offset[3] != ~0 ?
  4397. + *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
  4398. + uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
  4399. + uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
  4400. + uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
  4401. + uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
  4402. + uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
  4403. + uint32_t cpp, tiling_format, utile_w, utile_h;
  4404. + uint32_t i;
  4405. + uint32_t cube_map_stride = 0;
  4406. + enum vc4_texture_data_type type;
  4407. +
  4408. + if (!vc4_use_bo(exec, texture_handle_index, VC4_MODE_RENDER, &tex))
  4409. + return false;
  4410. +
  4411. + if (sample->is_direct) {
  4412. + uint32_t remaining_size = tex->base.size - p0;
  4413. + if (p0 > tex->base.size - 4) {
  4414. + DRM_ERROR("UBO offset greater than UBO size\n");
  4415. + goto fail;
  4416. + }
  4417. + if (p1 > remaining_size - 4) {
  4418. + DRM_ERROR("UBO clamp would allow reads outside of UBO\n");
  4419. + goto fail;
  4420. + }
  4421. + *validated_p0 = tex->paddr + p0;
  4422. + return true;
  4423. + }
  4424. +
  4425. + if (width == 0)
  4426. + width = 2048;
  4427. + if (height == 0)
  4428. + height = 2048;
  4429. +
  4430. + if (p0 & VC4_TEX_P0_CMMODE_MASK) {
  4431. + if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
  4432. + VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
  4433. + cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
  4434. + if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
  4435. + VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
  4436. + if (cube_map_stride) {
  4437. + DRM_ERROR("Cube map stride set twice\n");
  4438. + goto fail;
  4439. + }
  4440. +
  4441. + cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
  4442. + }
  4443. + if (!cube_map_stride) {
  4444. + DRM_ERROR("Cube map stride not set\n");
  4445. + goto fail;
  4446. + }
  4447. + }
  4448. +
  4449. + type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
  4450. + (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
  4451. +
  4452. + switch (type) {
  4453. + case VC4_TEXTURE_TYPE_RGBA8888:
  4454. + case VC4_TEXTURE_TYPE_RGBX8888:
  4455. + case VC4_TEXTURE_TYPE_RGBA32R:
  4456. + cpp = 4;
  4457. + break;
  4458. + case VC4_TEXTURE_TYPE_RGBA4444:
  4459. + case VC4_TEXTURE_TYPE_RGBA5551:
  4460. + case VC4_TEXTURE_TYPE_RGB565:
  4461. + case VC4_TEXTURE_TYPE_LUMALPHA:
  4462. + case VC4_TEXTURE_TYPE_S16F:
  4463. + case VC4_TEXTURE_TYPE_S16:
  4464. + cpp = 2;
  4465. + break;
  4466. + case VC4_TEXTURE_TYPE_LUMINANCE:
  4467. + case VC4_TEXTURE_TYPE_ALPHA:
  4468. + case VC4_TEXTURE_TYPE_S8:
  4469. + cpp = 1;
  4470. + break;
  4471. + case VC4_TEXTURE_TYPE_ETC1:
  4472. + case VC4_TEXTURE_TYPE_BW1:
  4473. + case VC4_TEXTURE_TYPE_A4:
  4474. + case VC4_TEXTURE_TYPE_A1:
  4475. + case VC4_TEXTURE_TYPE_RGBA64:
  4476. + case VC4_TEXTURE_TYPE_YUV422R:
  4477. + default:
  4478. + DRM_ERROR("Texture format %d unsupported\n", type);
  4479. + goto fail;
  4480. + }
  4481. + utile_w = utile_width(cpp);
  4482. + utile_h = utile_height(cpp);
  4483. +
  4484. + if (type == VC4_TEXTURE_TYPE_RGBA32R) {
  4485. + tiling_format = VC4_TILING_FORMAT_LINEAR;
  4486. + } else {
  4487. + if (size_is_lt(width, height, cpp))
  4488. + tiling_format = VC4_TILING_FORMAT_LT;
  4489. + else
  4490. + tiling_format = VC4_TILING_FORMAT_T;
  4491. + }
  4492. +
  4493. + if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
  4494. + tiling_format, width, height, cpp)) {
  4495. + goto fail;
  4496. + }
  4497. +
  4498. + /* The mipmap levels are stored before the base of the texture. Make
  4499. + * sure there is actually space in the BO.
  4500. + */
  4501. + for (i = 1; i <= miplevels; i++) {
  4502. + uint32_t level_width = max(width >> i, 1u);
  4503. + uint32_t level_height = max(height >> i, 1u);
  4504. + uint32_t aligned_width, aligned_height;
  4505. + uint32_t level_size;
  4506. +
  4507. + /* Once the levels get small enough, they drop from T to LT. */
  4508. + if (tiling_format == VC4_TILING_FORMAT_T &&
  4509. + size_is_lt(level_width, level_height, cpp)) {
  4510. + tiling_format = VC4_TILING_FORMAT_LT;
  4511. + }
  4512. +
  4513. + switch (tiling_format) {
  4514. + case VC4_TILING_FORMAT_T:
  4515. + aligned_width = round_up(level_width, utile_w * 8);
  4516. + aligned_height = round_up(level_height, utile_h * 8);
  4517. + break;
  4518. + case VC4_TILING_FORMAT_LT:
  4519. + aligned_width = round_up(level_width, utile_w);
  4520. + aligned_height = round_up(level_height, utile_h);
  4521. + break;
  4522. + default:
  4523. + aligned_width = round_up(level_width, utile_w);
  4524. + aligned_height = level_height;
  4525. + break;
  4526. + }
  4527. +
  4528. + level_size = aligned_width * cpp * aligned_height;
  4529. +
  4530. + if (offset < level_size) {
  4531. + DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
  4532. + "overflowed buffer bounds (offset %d)\n",
  4533. + i, level_width, level_height,
  4534. + aligned_width, aligned_height,
  4535. + level_size, offset);
  4536. + goto fail;
  4537. + }
  4538. +
  4539. + offset -= level_size;
  4540. + }
  4541. +
  4542. + *validated_p0 = tex->paddr + p0;
  4543. +
  4544. + return true;
  4545. + fail:
  4546. + DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
  4547. + DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
  4548. + DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
  4549. + DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
  4550. + return false;
  4551. +}
  4552. +
  4553. +static int
  4554. +validate_shader_rec(struct drm_device *dev,
  4555. + struct vc4_exec_info *exec,
  4556. + struct vc4_shader_state *state)
  4557. +{
  4558. + uint32_t *src_handles;
  4559. + void *pkt_u, *pkt_v;
  4560. + enum shader_rec_reloc_type {
  4561. + RELOC_CODE,
  4562. + RELOC_VBO,
  4563. + };
  4564. + struct shader_rec_reloc {
  4565. + enum shader_rec_reloc_type type;
  4566. + uint32_t offset;
  4567. + };
  4568. + static const struct shader_rec_reloc gl_relocs[] = {
  4569. + { RELOC_CODE, 4 }, /* fs */
  4570. + { RELOC_CODE, 16 }, /* vs */
  4571. + { RELOC_CODE, 28 }, /* cs */
  4572. + };
  4573. + static const struct shader_rec_reloc nv_relocs[] = {
  4574. + { RELOC_CODE, 4 }, /* fs */
  4575. + { RELOC_VBO, 12 }
  4576. + };
  4577. + const struct shader_rec_reloc *relocs;
  4578. + struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
  4579. + uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size;
  4580. + int i;
  4581. + struct vc4_validated_shader_info *validated_shader;
  4582. +
  4583. + if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
  4584. + relocs = nv_relocs;
  4585. + nr_fixed_relocs = ARRAY_SIZE(nv_relocs);
  4586. +
  4587. + packet_size = 16;
  4588. + } else {
  4589. + relocs = gl_relocs;
  4590. + nr_fixed_relocs = ARRAY_SIZE(gl_relocs);
  4591. +
  4592. + nr_attributes = state->addr & 0x7;
  4593. + if (nr_attributes == 0)
  4594. + nr_attributes = 8;
  4595. + packet_size = gl_shader_rec_size(state->addr);
  4596. + }
  4597. + nr_relocs = nr_fixed_relocs + nr_attributes;
  4598. +
  4599. + if (nr_relocs * 4 > exec->shader_rec_size) {
  4600. + DRM_ERROR("overflowed shader recs reading %d handles "
  4601. + "from %d bytes left\n",
  4602. + nr_relocs, exec->shader_rec_size);
  4603. + return -EINVAL;
  4604. + }
  4605. + src_handles = exec->shader_rec_u;
  4606. + exec->shader_rec_u += nr_relocs * 4;
  4607. + exec->shader_rec_size -= nr_relocs * 4;
  4608. +
  4609. + if (packet_size > exec->shader_rec_size) {
  4610. + DRM_ERROR("overflowed shader recs copying %db packet "
  4611. + "from %d bytes left\n",
  4612. + packet_size, exec->shader_rec_size);
  4613. + return -EINVAL;
  4614. + }
  4615. + pkt_u = exec->shader_rec_u;
  4616. + pkt_v = exec->shader_rec_v;
  4617. + memcpy(pkt_v, pkt_u, packet_size);
  4618. + exec->shader_rec_u += packet_size;
  4619. + /* Shader recs have to be aligned to 16 bytes (due to the attribute
  4620. + * flags being in the low bytes), so round the next validated shader
  4621. + * rec address up. This should be safe, since we've got so many
  4622. + * relocations in a shader rec packet.
  4623. + */
  4624. + BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
  4625. + exec->shader_rec_v += roundup(packet_size, 16);
  4626. + exec->shader_rec_size -= packet_size;
  4627. +
  4628. + for (i = 0; i < nr_relocs; i++) {
  4629. + enum vc4_bo_mode mode;
  4630. +
  4631. + if (i < nr_fixed_relocs && relocs[i].type == RELOC_CODE)
  4632. + mode = VC4_MODE_SHADER;
  4633. + else
  4634. + mode = VC4_MODE_RENDER;
  4635. +
  4636. + if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i])) {
  4637. + return false;
  4638. + }
  4639. + }
  4640. +
  4641. + for (i = 0; i < nr_fixed_relocs; i++) {
  4642. + uint32_t o = relocs[i].offset;
  4643. + uint32_t src_offset = *(uint32_t *)(pkt_u + o);
  4644. + uint32_t *texture_handles_u;
  4645. + void *uniform_data_u;
  4646. + uint32_t tex;
  4647. +
  4648. + *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
  4649. +
  4650. + switch (relocs[i].type) {
  4651. + case RELOC_CODE:
  4652. + if (src_offset != 0) {
  4653. + DRM_ERROR("Shaders must be at offset 0 of "
  4654. + "the BO.\n");
  4655. + goto fail;
  4656. + }
  4657. +
  4658. + validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
  4659. + if (!validated_shader)
  4660. + goto fail;
  4661. +
  4662. + if (validated_shader->uniforms_src_size >
  4663. + exec->uniforms_size) {
  4664. + DRM_ERROR("Uniforms src buffer overflow\n");
  4665. + goto fail;
  4666. + }
  4667. +
  4668. + texture_handles_u = exec->uniforms_u;
  4669. + uniform_data_u = (texture_handles_u +
  4670. + validated_shader->num_texture_samples);
  4671. +
  4672. + memcpy(exec->uniforms_v, uniform_data_u,
  4673. + validated_shader->uniforms_size);
  4674. +
  4675. + for (tex = 0;
  4676. + tex < validated_shader->num_texture_samples;
  4677. + tex++) {
  4678. + if (!reloc_tex(exec,
  4679. + uniform_data_u,
  4680. + &validated_shader->texture_samples[tex],
  4681. + texture_handles_u[tex])) {
  4682. + goto fail;
  4683. + }
  4684. + }
  4685. +
  4686. + *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
  4687. +
  4688. + exec->uniforms_u += validated_shader->uniforms_src_size;
  4689. + exec->uniforms_v += validated_shader->uniforms_size;
  4690. + exec->uniforms_p += validated_shader->uniforms_size;
  4691. +
  4692. + break;
  4693. +
  4694. + case RELOC_VBO:
  4695. + break;
  4696. + }
  4697. + }
  4698. +
  4699. + for (i = 0; i < nr_attributes; i++) {
  4700. + struct drm_gem_cma_object *vbo = bo[nr_fixed_relocs + i];
  4701. + uint32_t o = 36 + i * 8;
  4702. + uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
  4703. + uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
  4704. + uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
  4705. + uint32_t max_index;
  4706. +
  4707. + if (state->addr & 0x8)
  4708. + stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
  4709. +
  4710. + if (vbo->base.size < offset ||
  4711. + vbo->base.size - offset < attr_size) {
  4712. + DRM_ERROR("BO offset overflow (%d + %d > %d)\n",
  4713. + offset, attr_size, vbo->base.size);
  4714. + return -EINVAL;
  4715. + }
  4716. +
  4717. + if (stride != 0) {
  4718. + max_index = ((vbo->base.size - offset - attr_size) /
  4719. + stride);
  4720. + if (state->max_index > max_index) {
  4721. + DRM_ERROR("primitives use index %d out of supplied %d\n",
  4722. + state->max_index, max_index);
  4723. + return -EINVAL;
  4724. + }
  4725. + }
  4726. +
  4727. + *(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
  4728. + }
  4729. +
  4730. + return 0;
  4731. +
  4732. +fail:
  4733. + return -EINVAL;
  4734. +}
  4735. +
  4736. +int
  4737. +vc4_validate_shader_recs(struct drm_device *dev,
  4738. + struct vc4_exec_info *exec)
  4739. +{
  4740. + uint32_t i;
  4741. + int ret = 0;
  4742. +
  4743. + for (i = 0; i < exec->shader_state_count; i++) {
  4744. + ret = validate_shader_rec(dev, exec, &exec->shader_state[i]);
  4745. + if (ret)
  4746. + return ret;
  4747. + }
  4748. +
  4749. + return ret;
  4750. +}
  4751. --- /dev/null
  4752. +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
  4753. @@ -0,0 +1,521 @@
  4754. +/*
  4755. + * Copyright © 2014 Broadcom
  4756. + *
  4757. + * Permission is hereby granted, free of charge, to any person obtaining a
  4758. + * copy of this software and associated documentation files (the "Software"),
  4759. + * to deal in the Software without restriction, including without limitation
  4760. + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  4761. + * and/or sell copies of the Software, and to permit persons to whom the
  4762. + * Software is furnished to do so, subject to the following conditions:
  4763. + *
  4764. + * The above copyright notice and this permission notice (including the next
  4765. + * paragraph) shall be included in all copies or substantial portions of the
  4766. + * Software.
  4767. + *
  4768. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  4769. + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  4770. + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  4771. + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  4772. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  4773. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  4774. + * IN THE SOFTWARE.
  4775. + */
  4776. +
  4777. +/**
  4778. + * DOC: Shader validator for VC4.
  4779. + *
  4780. + * The VC4 has no IOMMU between it and system memory. So, a user with access
  4781. + * to execute shaders could escalate privilege by overwriting system memory
  4782. + * (using the VPM write address register in the general-purpose DMA mode) or
  4783. + * reading system memory it shouldn't (reading it as a texture, or uniform
  4784. + * data, or vertex data).
  4785. + *
  4786. + * This walks over a shader starting from some offset within a BO, ensuring
  4787. + * that its accesses are appropriately bounded, and recording how many texture
  4788. + * accesses are made and where so that we can do relocations for them in the
  4789. + * uniform stream.
  4790. + *
  4791. + * The kernel API has shaders stored in user-mapped BOs. The BOs will be
  4792. + * forcibly unmapped from the process before validation, and any cache of
  4793. + * validated state will be flushed if the mapping is faulted back in.
  4794. + *
  4795. + * Storing the shaders in BOs means that the validation process will be slow
  4796. + * due to uncached reads, but since shaders are long-lived and shader BOs are
  4797. + * never actually modified, this shouldn't be a problem.
  4798. + */
  4799. +
  4800. +#include "vc4_drv.h"
  4801. +#include "vc4_qpu_defines.h"
  4802. +
  4803. +struct vc4_shader_validation_state {
  4804. + struct vc4_texture_sample_info tmu_setup[2];
  4805. + int tmu_write_count[2];
  4806. +
  4807. + /* For registers that were last written to by a MIN instruction with
  4808. + * one argument being a uniform, the address of the uniform.
  4809. + * Otherwise, ~0.
  4810. + *
  4811. + * This is used for the validation of direct address memory reads.
  4812. + */
  4813. + uint32_t live_min_clamp_offsets[32 + 32 + 4];
  4814. + bool live_max_clamp_regs[32 + 32 + 4];
  4815. +};
  4816. +
  4817. +static uint32_t
  4818. +waddr_to_live_reg_index(uint32_t waddr, bool is_b)
  4819. +{
  4820. + if (waddr < 32) {
  4821. + if (is_b)
  4822. + return 32 + waddr;
  4823. + else
  4824. + return waddr;
  4825. + } else if (waddr <= QPU_W_ACC3) {
  4826. +
  4827. + return 64 + waddr - QPU_W_ACC0;
  4828. + } else {
  4829. + return ~0;
  4830. + }
  4831. +}
  4832. +
  4833. +static uint32_t
  4834. +raddr_add_a_to_live_reg_index(uint64_t inst)
  4835. +{
  4836. + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  4837. + uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
  4838. + uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  4839. + uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  4840. +
  4841. + if (add_a == QPU_MUX_A) {
  4842. + return raddr_a;
  4843. + } else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) {
  4844. + return 32 + raddr_b;
  4845. + } else if (add_a <= QPU_MUX_R3) {
  4846. + return 64 + add_a;
  4847. + } else {
  4848. + return ~0;
  4849. + }
  4850. +}
  4851. +
  4852. +static bool
  4853. +is_tmu_submit(uint32_t waddr)
  4854. +{
  4855. + return (waddr == QPU_W_TMU0_S ||
  4856. + waddr == QPU_W_TMU1_S);
  4857. +}
  4858. +
  4859. +static bool
  4860. +is_tmu_write(uint32_t waddr)
  4861. +{
  4862. + return (waddr >= QPU_W_TMU0_S &&
  4863. + waddr <= QPU_W_TMU1_B);
  4864. +}
  4865. +
  4866. +static bool
  4867. +record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader,
  4868. + struct vc4_shader_validation_state *validation_state,
  4869. + int tmu)
  4870. +{
  4871. + uint32_t s = validated_shader->num_texture_samples;
  4872. + int i;
  4873. + struct vc4_texture_sample_info *temp_samples;
  4874. +
  4875. + temp_samples = krealloc(validated_shader->texture_samples,
  4876. + (s + 1) * sizeof(*temp_samples),
  4877. + GFP_KERNEL);
  4878. + if (!temp_samples)
  4879. + return false;
  4880. +
  4881. + memcpy(&temp_samples[s],
  4882. + &validation_state->tmu_setup[tmu],
  4883. + sizeof(*temp_samples));
  4884. +
  4885. + validated_shader->num_texture_samples = s + 1;
  4886. + validated_shader->texture_samples = temp_samples;
  4887. +
  4888. + for (i = 0; i < 4; i++)
  4889. + validation_state->tmu_setup[tmu].p_offset[i] = ~0;
  4890. +
  4891. + return true;
  4892. +}
  4893. +
  4894. +static bool
  4895. +check_tmu_write(uint64_t inst,
  4896. + struct vc4_validated_shader_info *validated_shader,
  4897. + struct vc4_shader_validation_state *validation_state,
  4898. + bool is_mul)
  4899. +{
  4900. + uint32_t waddr = (is_mul ?
  4901. + QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
  4902. + QPU_GET_FIELD(inst, QPU_WADDR_ADD));
  4903. + uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  4904. + uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  4905. + int tmu = waddr > QPU_W_TMU0_B;
  4906. + bool submit = is_tmu_submit(waddr);
  4907. + bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
  4908. + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  4909. +
  4910. + if (is_direct) {
  4911. + uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
  4912. + uint32_t clamp_reg, clamp_offset;
  4913. +
  4914. + if (sig == QPU_SIG_SMALL_IMM) {
  4915. + DRM_ERROR("direct TMU read used small immediate\n");
  4916. + return false;
  4917. + }
  4918. +
  4919. + /* Make sure that this texture load is an add of the base
  4920. + * address of the UBO to a clamped offset within the UBO.
  4921. + */
  4922. + if (is_mul ||
  4923. + QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
  4924. + DRM_ERROR("direct TMU load wasn't an add\n");
  4925. + return false;
  4926. + }
  4927. +
  4928. + /* We assert that the the clamped address is the first
  4929. + * argument, and the UBO base address is the second argument.
  4930. + * This is arbitrary, but simpler than supporting flipping the
  4931. + * two either way.
  4932. + */
  4933. + clamp_reg = raddr_add_a_to_live_reg_index(inst);
  4934. + if (clamp_reg == ~0) {
  4935. + DRM_ERROR("direct TMU load wasn't clamped\n");
  4936. + return false;
  4937. + }
  4938. +
  4939. + clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
  4940. + if (clamp_offset == ~0) {
  4941. + DRM_ERROR("direct TMU load wasn't clamped\n");
  4942. + return false;
  4943. + }
  4944. +
  4945. + /* Store the clamp value's offset in p1 (see reloc_tex() in
  4946. + * vc4_validate.c).
  4947. + */
  4948. + validation_state->tmu_setup[tmu].p_offset[1] =
  4949. + clamp_offset;
  4950. +
  4951. + if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
  4952. + !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
  4953. + DRM_ERROR("direct TMU load didn't add to a uniform\n");
  4954. + return false;
  4955. + }
  4956. +
  4957. + validation_state->tmu_setup[tmu].is_direct = true;
  4958. + } else {
  4959. + if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
  4960. + raddr_b == QPU_R_UNIF)) {
  4961. + DRM_ERROR("uniform read in the same instruction as "
  4962. + "texture setup.\n");
  4963. + return false;
  4964. + }
  4965. + }
  4966. +
  4967. + if (validation_state->tmu_write_count[tmu] >= 4) {
  4968. + DRM_ERROR("TMU%d got too many parameters before dispatch\n",
  4969. + tmu);
  4970. + return false;
  4971. + }
  4972. + validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
  4973. + validated_shader->uniforms_size;
  4974. + validation_state->tmu_write_count[tmu]++;
  4975. + /* Since direct uses a RADDR uniform reference, it will get counted in
  4976. + * check_instruction_reads()
  4977. + */
  4978. + if (!is_direct)
  4979. + validated_shader->uniforms_size += 4;
  4980. +
  4981. + if (submit) {
  4982. + if (!record_validated_texture_sample(validated_shader,
  4983. + validation_state, tmu)) {
  4984. + return false;
  4985. + }
  4986. +
  4987. + validation_state->tmu_write_count[tmu] = 0;
  4988. + }
  4989. +
  4990. + return true;
  4991. +}
  4992. +
  4993. +static bool
  4994. +check_register_write(uint64_t inst,
  4995. + struct vc4_validated_shader_info *validated_shader,
  4996. + struct vc4_shader_validation_state *validation_state,
  4997. + bool is_mul)
  4998. +{
  4999. + uint32_t waddr = (is_mul ?
  5000. + QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
  5001. + QPU_GET_FIELD(inst, QPU_WADDR_ADD));
  5002. +
  5003. + switch (waddr) {
  5004. + case QPU_W_UNIFORMS_ADDRESS:
  5005. + /* XXX: We'll probably need to support this for reladdr, but
  5006. + * it's definitely a security-related one.
  5007. + */
  5008. + DRM_ERROR("uniforms address load unsupported\n");
  5009. + return false;
  5010. +
  5011. + case QPU_W_TLB_COLOR_MS:
  5012. + case QPU_W_TLB_COLOR_ALL:
  5013. + case QPU_W_TLB_Z:
  5014. + /* These only interact with the tile buffer, not main memory,
  5015. + * so they're safe.
  5016. + */
  5017. + return true;
  5018. +
  5019. + case QPU_W_TMU0_S:
  5020. + case QPU_W_TMU0_T:
  5021. + case QPU_W_TMU0_R:
  5022. + case QPU_W_TMU0_B:
  5023. + case QPU_W_TMU1_S:
  5024. + case QPU_W_TMU1_T:
  5025. + case QPU_W_TMU1_R:
  5026. + case QPU_W_TMU1_B:
  5027. + return check_tmu_write(inst, validated_shader, validation_state,
  5028. + is_mul);
  5029. +
  5030. + case QPU_W_HOST_INT:
  5031. + case QPU_W_TMU_NOSWAP:
  5032. + case QPU_W_TLB_ALPHA_MASK:
  5033. + case QPU_W_MUTEX_RELEASE:
  5034. + /* XXX: I haven't thought about these, so don't support them
  5035. + * for now.
  5036. + */
  5037. + DRM_ERROR("Unsupported waddr %d\n", waddr);
  5038. + return false;
  5039. +
  5040. + case QPU_W_VPM_ADDR:
  5041. + DRM_ERROR("General VPM DMA unsupported\n");
  5042. + return false;
  5043. +
  5044. + case QPU_W_VPM:
  5045. + case QPU_W_VPMVCD_SETUP:
  5046. + /* We allow VPM setup in general, even including VPM DMA
  5047. + * configuration setup, because the (unsafe) DMA can only be
  5048. + * triggered by QPU_W_VPM_ADDR writes.
  5049. + */
  5050. + return true;
  5051. +
  5052. + case QPU_W_TLB_STENCIL_SETUP:
  5053. + return true;
  5054. + }
  5055. +
  5056. + return true;
  5057. +}
  5058. +
  5059. +static void
  5060. +track_live_clamps(uint64_t inst,
  5061. + struct vc4_validated_shader_info *validated_shader,
  5062. + struct vc4_shader_validation_state *validation_state)
  5063. +{
  5064. + uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
  5065. + uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
  5066. + uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
  5067. + uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
  5068. + uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
  5069. + uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
  5070. + uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  5071. + uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  5072. + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  5073. + bool ws = inst & QPU_WS;
  5074. + uint32_t lri_add_a, lri_add, lri_mul;
  5075. + bool add_a_is_min_0;
  5076. +
  5077. + /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
  5078. + * before we clear previous live state.
  5079. + */
  5080. + lri_add_a = raddr_add_a_to_live_reg_index(inst);
  5081. + add_a_is_min_0 = (lri_add_a != ~0 &&
  5082. + validation_state->live_max_clamp_regs[lri_add_a]);
  5083. +
  5084. + /* Clear live state for registers written by our instruction. */
  5085. + lri_add = waddr_to_live_reg_index(waddr_add, ws);
  5086. + lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
  5087. + if (lri_mul != ~0) {
  5088. + validation_state->live_max_clamp_regs[lri_mul] = false;
  5089. + validation_state->live_min_clamp_offsets[lri_mul] = ~0;
  5090. + }
  5091. + if (lri_add != ~0) {
  5092. + validation_state->live_max_clamp_regs[lri_add] = false;
  5093. + validation_state->live_min_clamp_offsets[lri_add] = ~0;
  5094. + } else {
  5095. + /* Nothing further to do for live tracking, since only ADDs
  5096. + * generate new live clamp registers.
  5097. + */
  5098. + return;
  5099. + }
  5100. +
  5101. + /* Now, handle remaining live clamp tracking for the ADD operation. */
  5102. +
  5103. + if (cond_add != QPU_COND_ALWAYS)
  5104. + return;
  5105. +
  5106. + if (op_add == QPU_A_MAX) {
  5107. + /* Track live clamps of a value to a minimum of 0 (in either
  5108. + * arg).
  5109. + */
  5110. + if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
  5111. + (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
  5112. + return;
  5113. + }
  5114. +
  5115. + validation_state->live_max_clamp_regs[lri_add] = true;
  5116. + } if (op_add == QPU_A_MIN) {
  5117. + /* Track live clamps of a value clamped to a minimum of 0 and
  5118. + * a maximum of some uniform's offset.
  5119. + */
  5120. + if (!add_a_is_min_0)
  5121. + return;
  5122. +
  5123. + if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
  5124. + !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
  5125. + sig != QPU_SIG_SMALL_IMM)) {
  5126. + return;
  5127. + }
  5128. +
  5129. + validation_state->live_min_clamp_offsets[lri_add] =
  5130. + validated_shader->uniforms_size;
  5131. + }
  5132. +}
  5133. +
  5134. +static bool
  5135. +check_instruction_writes(uint64_t inst,
  5136. + struct vc4_validated_shader_info *validated_shader,
  5137. + struct vc4_shader_validation_state *validation_state)
  5138. +{
  5139. + uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
  5140. + uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
  5141. + bool ok;
  5142. +
  5143. + if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
  5144. + DRM_ERROR("ADD and MUL both set up textures\n");
  5145. + return false;
  5146. + }
  5147. +
  5148. + ok = (check_register_write(inst, validated_shader, validation_state, false) &&
  5149. + check_register_write(inst, validated_shader, validation_state, true));
  5150. +
  5151. + track_live_clamps(inst, validated_shader, validation_state);
  5152. +
  5153. + return ok;
  5154. +}
  5155. +
  5156. +static bool
  5157. +check_instruction_reads(uint64_t inst,
  5158. + struct vc4_validated_shader_info *validated_shader)
  5159. +{
  5160. + uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
  5161. + uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
  5162. + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  5163. +
  5164. + if (raddr_a == QPU_R_UNIF ||
  5165. + (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
  5166. + /* This can't overflow the uint32_t, because we're reading 8
  5167. + * bytes of instruction to increment by 4 here, so we'd
  5168. + * already be OOM.
  5169. + */
  5170. + validated_shader->uniforms_size += 4;
  5171. + }
  5172. +
  5173. + return true;
  5174. +}
  5175. +
  5176. +struct vc4_validated_shader_info *
  5177. +vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
  5178. +{
  5179. + bool found_shader_end = false;
  5180. + int shader_end_ip = 0;
  5181. + uint32_t ip, max_ip;
  5182. + uint64_t *shader;
  5183. + struct vc4_validated_shader_info *validated_shader;
  5184. + struct vc4_shader_validation_state validation_state;
  5185. + int i;
  5186. +
  5187. + memset(&validation_state, 0, sizeof(validation_state));
  5188. +
  5189. + for (i = 0; i < 8; i++)
  5190. + validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
  5191. + for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
  5192. + validation_state.live_min_clamp_offsets[i] = ~0;
  5193. +
  5194. + shader = shader_obj->vaddr;
  5195. + max_ip = shader_obj->base.size / sizeof(uint64_t);
  5196. +
  5197. + validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
  5198. + if (!validated_shader)
  5199. + return NULL;
  5200. +
  5201. + for (ip = 0; ip < max_ip; ip++) {
  5202. + uint64_t inst = shader[ip];
  5203. + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  5204. +
  5205. + switch (sig) {
  5206. + case QPU_SIG_NONE:
  5207. + case QPU_SIG_WAIT_FOR_SCOREBOARD:
  5208. + case QPU_SIG_SCOREBOARD_UNLOCK:
  5209. + case QPU_SIG_COLOR_LOAD:
  5210. + case QPU_SIG_LOAD_TMU0:
  5211. + case QPU_SIG_LOAD_TMU1:
  5212. + case QPU_SIG_PROG_END:
  5213. + case QPU_SIG_SMALL_IMM:
  5214. + if (!check_instruction_writes(inst, validated_shader,
  5215. + &validation_state)) {
  5216. + DRM_ERROR("Bad write at ip %d\n", ip);
  5217. + goto fail;
  5218. + }
  5219. +
  5220. + if (!check_instruction_reads(inst, validated_shader))
  5221. + goto fail;
  5222. +
  5223. + if (sig == QPU_SIG_PROG_END) {
  5224. + found_shader_end = true;
  5225. + shader_end_ip = ip;
  5226. + }
  5227. +
  5228. + break;
  5229. +
  5230. + case QPU_SIG_LOAD_IMM:
  5231. + if (!check_instruction_writes(inst, validated_shader,
  5232. + &validation_state)) {
  5233. + DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
  5234. + goto fail;
  5235. + }
  5236. + break;
  5237. +
  5238. + default:
  5239. + DRM_ERROR("Unsupported QPU signal %d at "
  5240. + "instruction %d\n", sig, ip);
  5241. + goto fail;
  5242. + }
  5243. +
  5244. + /* There are two delay slots after program end is signaled
  5245. + * that are still executed, then we're finished.
  5246. + */
  5247. + if (found_shader_end && ip == shader_end_ip + 2)
  5248. + break;
  5249. + }
  5250. +
  5251. + if (ip == max_ip) {
  5252. + DRM_ERROR("shader failed to terminate before "
  5253. + "shader BO end at %d\n",
  5254. + shader_obj->base.size);
  5255. + goto fail;
  5256. + }
  5257. +
  5258. + /* Again, no chance of integer overflow here because the worst case
  5259. + * scenario is 8 bytes of uniforms plus handles per 8-byte
  5260. + * instruction.
  5261. + */
  5262. + validated_shader->uniforms_src_size =
  5263. + (validated_shader->uniforms_size +
  5264. + 4 * validated_shader->num_texture_samples);
  5265. +
  5266. + return validated_shader;
  5267. +
  5268. +fail:
  5269. + if (validated_shader) {
  5270. + kfree(validated_shader->texture_samples);
  5271. + kfree(validated_shader);
  5272. + }
  5273. + return NULL;
  5274. +}
  5275. --- /dev/null
  5276. +++ b/include/uapi/drm/vc4_drm.h
  5277. @@ -0,0 +1,229 @@
  5278. +/*
  5279. + * Copyright © 2014-2015 Broadcom
  5280. + *
  5281. + * Permission is hereby granted, free of charge, to any person obtaining a
  5282. + * copy of this software and associated documentation files (the "Software"),
  5283. + * to deal in the Software without restriction, including without limitation
  5284. + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  5285. + * and/or sell copies of the Software, and to permit persons to whom the
  5286. + * Software is furnished to do so, subject to the following conditions:
  5287. + *
  5288. + * The above copyright notice and this permission notice (including the next
  5289. + * paragraph) shall be included in all copies or substantial portions of the
  5290. + * Software.
  5291. + *
  5292. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  5293. + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  5294. + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  5295. + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  5296. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  5297. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  5298. + * IN THE SOFTWARE.
  5299. + */
  5300. +
  5301. +#ifndef _UAPI_VC4_DRM_H_
  5302. +#define _UAPI_VC4_DRM_H_
  5303. +
  5304. +#include <drm/drm.h>
  5305. +
  5306. +#define DRM_VC4_SUBMIT_CL 0x00
  5307. +#define DRM_VC4_WAIT_SEQNO 0x01
  5308. +#define DRM_VC4_WAIT_BO 0x02
  5309. +#define DRM_VC4_CREATE_BO 0x03
  5310. +#define DRM_VC4_MMAP_BO 0x04
  5311. +#define DRM_VC4_CREATE_SHADER_BO 0x05
  5312. +
  5313. +#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
  5314. +#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
  5315. +#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
  5316. +#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
  5317. +#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
  5318. +#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
  5319. +
  5320. +struct drm_vc4_submit_rcl_surface {
  5321. + uint32_t hindex; /* Handle index, or ~0 if not present. */
  5322. + uint32_t offset; /* Offset to start of buffer. */
  5323. + /*
  5324. + * Bits for either render config (color_ms_write) or load/store packet.
  5325. + */
  5326. + uint16_t bits;
  5327. + uint16_t pad;
  5328. +};
  5329. +
  5330. +/**
  5331. + * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
  5332. + * engine.
  5333. + *
  5334. + * Drivers typically use GPU BOs to store batchbuffers / command lists and
  5335. + * their associated state. However, because the VC4 lacks an MMU, we have to
  5336. + * do validation of memory accesses by the GPU commands. If we were to store
  5337. + * our commands in BOs, we'd need to do uncached readback from them to do the
  5338. + * validation process, which is too expensive. Instead, userspace accumulates
  5339. + * commands and associated state in plain memory, then the kernel copies the
  5340. + * data to its own address space, and then validates and stores it in a GPU
  5341. + * BO.
  5342. + */
  5343. +struct drm_vc4_submit_cl {
  5344. + /* Pointer to the binner command list.
  5345. + *
  5346. + * This is the first set of commands executed, which runs the
  5347. + * coordinate shader to determine where primitives land on the screen,
  5348. + * then writes out the state updates and draw calls necessary per tile
  5349. + * to the tile allocation BO.
  5350. + */
  5351. + uint64_t bin_cl;
  5352. +
  5353. + /* Pointer to the shader records.
  5354. + *
  5355. + * Shader records are the structures read by the hardware that contain
  5356. + * pointers to uniforms, shaders, and vertex attributes. The
  5357. + * reference to the shader record has enough information to determine
  5358. + * how many pointers are necessary (fixed number for shaders/uniforms,
  5359. + * and an attribute count), so those BO indices into bo_handles are
  5360. + * just stored as uint32_ts before each shader record passed in.
  5361. + */
  5362. + uint64_t shader_rec;
  5363. +
  5364. + /* Pointer to uniform data and texture handles for the textures
  5365. + * referenced by the shader.
  5366. + *
  5367. + * For each shader state record, there is a set of uniform data in the
  5368. + * order referenced by the record (FS, VS, then CS). Each set of
  5369. + * uniform data has a uint32_t index into bo_handles per texture
  5370. + * sample operation, in the order the QPU_W_TMUn_S writes appear in
  5371. + * the program. Following the texture BO handle indices is the actual
  5372. + * uniform data.
  5373. + *
  5374. + * The individual uniform state blocks don't have sizes passed in,
  5375. + * because the kernel has to determine the sizes anyway during shader
  5376. + * code validation.
  5377. + */
  5378. + uint64_t uniforms;
  5379. + uint64_t bo_handles;
  5380. +
  5381. + /* Size in bytes of the binner command list. */
  5382. + uint32_t bin_cl_size;
  5383. + /* Size in bytes of the set of shader records. */
  5384. + uint32_t shader_rec_size;
  5385. + /* Number of shader records.
  5386. + *
  5387. + * This could just be computed from the contents of shader_records and
  5388. + * the address bits of references to them from the bin CL, but it
  5389. + * keeps the kernel from having to resize some allocations it makes.
  5390. + */
  5391. + uint32_t shader_rec_count;
  5392. + /* Size in bytes of the uniform state. */
  5393. + uint32_t uniforms_size;
  5394. +
  5395. + /* Number of BO handles passed in (size is that times 4). */
  5396. + uint32_t bo_handle_count;
  5397. +
  5398. + /* RCL setup: */
  5399. + uint16_t width;
  5400. + uint16_t height;
  5401. + uint8_t min_x_tile;
  5402. + uint8_t min_y_tile;
  5403. + uint8_t max_x_tile;
  5404. + uint8_t max_y_tile;
  5405. + struct drm_vc4_submit_rcl_surface color_read;
  5406. + struct drm_vc4_submit_rcl_surface color_ms_write;
  5407. + struct drm_vc4_submit_rcl_surface zs_read;
  5408. + struct drm_vc4_submit_rcl_surface zs_write;
  5409. + uint32_t clear_color[2];
  5410. + uint32_t clear_z;
  5411. + uint8_t clear_s;
  5412. +
  5413. + uint32_t pad:24;
  5414. +
  5415. +#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0)
  5416. + uint32_t flags;
  5417. +
  5418. + /* Returned value of the seqno of this render job (for the
  5419. + * wait ioctl).
  5420. + */
  5421. + uint64_t seqno;
  5422. +};
  5423. +
  5424. +/**
  5425. + * struct drm_vc4_wait_seqno - ioctl argument for waiting for
  5426. + * DRM_VC4_SUBMIT_CL completion using its returned seqno.
  5427. + *
  5428. + * timeout_ns is the timeout in nanoseconds, where "0" means "don't
  5429. + * block, just return the status."
  5430. + */
  5431. +struct drm_vc4_wait_seqno {
  5432. + uint64_t seqno;
  5433. + uint64_t timeout_ns;
  5434. +};
  5435. +
  5436. +/**
  5437. + * struct drm_vc4_wait_bo - ioctl argument for waiting for
  5438. + * completion of the last DRM_VC4_SUBMIT_CL on a BO.
  5439. + *
  5440. + * This is useful for cases where multiple processes might be
  5441. + * rendering to a BO and you want to wait for all rendering to be
  5442. + * completed.
  5443. + */
  5444. +struct drm_vc4_wait_bo {
  5445. + uint32_t handle;
  5446. + uint32_t pad;
  5447. + uint64_t timeout_ns;
  5448. +};
  5449. +
  5450. +/**
  5451. + * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
  5452. + *
  5453. + * There are currently no values for the flags argument, but it may be
  5454. + * used in a future extension.
  5455. + */
  5456. +struct drm_vc4_create_bo {
  5457. + uint32_t size;
  5458. + uint32_t flags;
  5459. + /** Returned GEM handle for the BO. */
  5460. + uint32_t handle;
  5461. + uint32_t pad;
  5462. +};
  5463. +
  5464. +/**
  5465. + * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
  5466. + * shader BOs.
  5467. + *
  5468. + * Since allowing a shader to be overwritten while it's also being
  5469. + * executed from would allow privlege escalation, shaders must be
  5470. + * created using this ioctl, and they can't be mmapped later.
  5471. + */
  5472. +struct drm_vc4_create_shader_bo {
  5473. + /* Size of the data argument. */
  5474. + uint32_t size;
  5475. + /* Flags, currently must be 0. */
  5476. + uint32_t flags;
  5477. +
  5478. + /* Pointer to the data. */
  5479. + uint64_t data;
  5480. +
  5481. + /** Returned GEM handle for the BO. */
  5482. + uint32_t handle;
  5483. + /* Pad, must be 0. */
  5484. + uint32_t pad;
  5485. +};
  5486. +
  5487. +/**
  5488. + * struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs.
  5489. + *
  5490. + * This doesn't actually perform an mmap. Instead, it returns the
  5491. + * offset you need to use in an mmap on the DRM device node. This
  5492. + * means that tools like valgrind end up knowing about the mapped
  5493. + * memory.
  5494. + *
  5495. + * There are currently no values for the flags argument, but it may be
  5496. + * used in a future extension.
  5497. + */
  5498. +struct drm_vc4_mmap_bo {
  5499. + /** Handle for the object being mapped. */
  5500. + uint32_t handle;
  5501. + uint32_t flags;
  5502. + /** offset into the drm node to use for subsequent mmap call. */
  5503. + uint64_t offset;
  5504. +};
  5505. +
  5506. +#endif /* _UAPI_VC4_DRM_H_ */