hallo alle miteinander!
meine programmiererfahrungen sind etwas eingerostet, daher macht mir diese aufgabe probleme:
ich habe XML dokumente in folgendem Format :
---------------------------
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE DefaultDIAsDEMvolume SYSTEM "DefaultDIAsDEMvolume.dtd">
<DefaultDIAsDEMvolume NumberOfDocuments="10">
<DefaultDIAsDEMdocument NumberOfTextUnitsLayers="1">
<MetaData>
<Name>DiasdemDocumentID</Name>
<Content>C:\Dokumente und Einstellungen\Hope\Desktop\DA\Donnerstag\DIAsDEM.workbench21\data\Hoffnung\hopecollect\volume100001.xml:0</Content>
</MetaData>
<MetaData>
<Name>SourceFile</Name>
<Content>C:\Dokumente und Einstellungen\Hope\Desktop\DA\Donnerstag\DIAsDEM.workbench21\data\samples\de\case2\file10010.txt</Content>
</MetaData>
<OriginalText>Die BANA Immobilien GmbH, Sitz Frankfurt/Main, ist als persönlich haftende Gesellschafterin eingetreten. Sie ist von der Vertretung der Gesellschaft ausgeschlossen.</OriginalText>
<TextUnitsLayer TextUnitsLayerID="0" TextUnitsDescription="Algorithm: HEURISTIC_SENTENCE_IDENTIFIER">
<OriginalTextUnits>
<OriginalTextUnit TextUnitID="0" BeginIndex="0" EndIndex="104">Die BANA Immobilien GmbH, Sitz Frankfurt/Main, ist als persönlich haftende Gesellschafterin eingetreten.</OriginalTextUnit>
<OriginalTextUnit TextUnitID="1" BeginIndex="105" EndIndex="164">Sie ist von der Vertretung der Gesellschaft ausgeschlossen.</OriginalTextUnit>
</OriginalTextUnits>
<ProcessedTextUnits>
<ProcessedTextUnit TextUnitID="0">
<NeRef NeID="2" />
, persönlich haftend Gesellschafterin eintreten .
</ProcessedTextUnit>
<ProcessedTextUnit TextUnitID="1">Vertretung ausschließen .</ProcessedTextUnit>
</ProcessedTextUnits>
<RollbackTextUnits RollbackID="0">
<ProcessedTextUnit TextUnitID="0">
<NeRef NeID="2" />
, persönlich haftend Gesellschafterin eintreten .
</ProcessedTextUnit>
<ProcessedTextUnit TextUnitID="1">Vertretung ausschließen .</ProcessedTextUnit>
</RollbackTextUnits>
<NamedEntities>
<NamedEntity NeID="0" NeType="organization">BANA Immobilien GmbH</NamedEntity>
<NamedEntity NeID="1" NeType="place">Frankfurt / Main</NamedEntity>
<NamedEntity NeID="2" NeType="company">2|null|company|null|BANA Immobilien GmbH|null|null|Frankfurt / Main|null</NamedEntity>
</NamedEntities>
</TextUnitsLayer>
</DefaultDIAsDEMdocument>
</DefaultDIAsDEMvolume>
---------------------------
brauche aber zur weiteren bearbeitung ein anderes xml-Format:
---------------------------
<?xml version = '1.0' encoding = 'UTF-8'?>
<ParDoc stage="1" source="F:\in_mb\test5\web_parta\news_story.cfm_StoryID=10000120&full=1&print=1.html" content-domain="testtesttest">
<front>
<downloadtime>Sat, 23 Apr 2005 09:41:37 GMT</downloadtime>
<modtime>Thu, 21 Apr 2005 22:56:04 GMT</modtime>
<ontologyVersion></ontologyVersion>
<title>
<tok id="t1" pos="NIL" lem="american" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">American</tok>
<tok id="t2" pos="NIL" lem="foundation" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">Foundation</tok>
<tok id="t3" pos="NIL" lem="for" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">For</tok>
<tok id="t4" pos="NIL" lem="urologic" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">Urologic</tok>
<tok id="t5" pos="NIL" lem="disease" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">Disease</tok>
<tok id="t6" pos="NIL" lem="(" lookup="NIL" orth="bracket" zone="title" sepAfter="AFUD">(</tok>
<tok id="t7" pos="NIL" lem="afud" lookup="NIL" orth="uppercase" zone="title" sepAfter=")">AFUD</tok>
<tok id="t8" pos="NIL" lem=")" lookup="NIL" orth="bracket" zone="title" sepAfter=" ">)</tok>
<tok id="t9" pos="NIL" lem="release" lookup="NIL" orth="capitalized" zone="title" sepAfter=":">Release</tok>
<tok id="t10" pos="NIL" lem=":" lookup="NIL" orth="punct" zone="title" sepAfter=" ">:</tok>
<tok id="t11" pos="NIL" lem="boxers" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">Boxers</tok>
<tok id="t12" pos="NIL" lem="or" lookup="NIL" orth="capitalized" zone="title" sepAfter=" ">Or</tok>
<tok id="t13" pos="NIL" lem="briefs" lookup="NIL" orth="capitalized" zone="title" sepAfter="?">Briefs</tok>
<tok id="t14" pos="NIL" lem="?" lookup="NIL" orth="punct" zone="title" sepAfter=" ">?</tok>
<tok id="t15" pos="NIL" lem="." lookup="NIL" orth="punct" zone="title" sepAfter=".">.</tok>
<tok id="t16" pos="NIL" lem="." lookup="NIL" orth="punct" zone="title" sepAfter=".">.</tok>
<tok id="t17" pos="NIL" lem="." lookup="NIL" orth="punct" zone="title" sepAfter="\n">.</tok>
</title>
</front>
<body>
<sec>
<s id="sen1">
<tok id="t18" pos="NIL" lem="american" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">American</tok>
<tok id="t19" pos="NIL" lem="foundation" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Foundation</tok>
<tok id="t20" pos="NIL" lem="for" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">For</tok>
<tok id="t21" pos="NIL" lem="urologic" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Urologic</tok>
<tok id="t22" pos="NIL" lem="disease" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Disease</tok>
<tok id="t23" pos="NIL" lem="(" lookup="NIL" orth="bracket" zone="body" sepAfter="AFUD">(</tok>
<tok id="t24" pos="NIL" lem="afud" lookup="NIL" orth="uppercase" zone="body" sepAfter=")">AFUD</tok>
<tok id="t25" pos="NIL" lem=")" lookup="NIL" orth="bracket" zone="body" sepAfter="Release">)</tok>
<tok id="t26" pos="NIL" lem="release" lookup="NIL" orth="capitalized" zone="body" sepAfter=":">Release</tok>
<tok id="t27" pos="NIL" lem=":" lookup="NIL" orth="punct" zone="body" sepAfter=" ">:</tok>
<tok id="t28" pos="NIL" lem="boxers" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Boxers</tok>
<tok id="t29" pos="NIL" lem="or" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Or</tok>
<tok id="t30" pos="NIL" lem="briefs" lookup="NIL" orth="capitalized" zone="body" sepAfter="?">Briefs</tok>
<tok id="t31" pos="NIL" lem="?" lookup="NIL" orth="punct" zone="body" sepAfter=" ">?</tok>
<tok id="t32" pos="NIL" lem="just" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Just</tok>
<tok id="t33" pos="NIL" lem="one" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">One</tok>
<tok id="t34" pos="NIL" lem="of" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Of</tok>
<tok id="t35" pos="NIL" lem="the" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">The</tok>
<tok id="t36" pos="NIL" lem="questions" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Questions</tok>
<tok id="t37" pos="NIL" lem="facing" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Facing</tok>
<tok id="t38" pos="NIL" lem="the" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">The</tok>
<tok id="t39" pos="NIL" lem="millions" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Millions</tok>
<tok id="t40" pos="NIL" lem="of" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Of</tok>
<tok id="t41" pos="NIL" lem="american" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">American</tok>
<tok id="t42" pos="NIL" lem="men" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Men</tok>
<tok id="t43" pos="NIL" lem="struggling" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Struggling</tok>
<tok id="t44" pos="NIL" lem="with" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">With</tok>
<tok id="t45" pos="NIL" lem="infertility" lookup="NIL" orth="capitalized" zone="body" sepAfter="\n">Infertility</tok>
</s>
</p>
<s id="sen2">
<tok id="t46" pos="NIL" lem="american" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">American</tok>
<tok id="t47" pos="NIL" lem="foundation" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Foundation</tok>
<tok id="t48" pos="NIL" lem="for" lookup="NIL" orth="lowercase" zone="body" sepAfter=" ">for</tok>
<tok id="t49" pos="NIL" lem="urologic" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Urologic</tok>
<tok id="t50" pos="NIL" lem="disease" lookup="NIL" orth="capitalized" zone="body" sepAfter=" ">Disease</tok>
<tok id="t51" pos="NIL" lem="sheds" lookup="NIL" orth="lowercase" zone="body" sepAfter=" ">sheds</tok>
<tok id="t52" pos="NIL" lem="light" lookup="NIL" orth="lowercase" zone="body" sepAfter=" ">light</tok>
<tok id="t53" pos="NIL" lem="on" lookup="NIL" orth="lowercase" zone="body" sepAfter=" ">on</tok>
<tok id="t54" pos="NIL" lem="male" lookup="NIL" orth="lowercase" zone="body" sepAfter=" ">male</tok>
<tok id="t55" pos="NIL" lem="infertility" lookup="NIL" orth="lowercase" zone="body" sepAfter="\n">infertility</tok>
</s>
</p>
</sec>
</body>
<back>
<ParLex/>
<ParCon/>
</back>
</ParDoc>
---------------------------
sorry das das posting so groß ist.
kann mir jemand halfen und mir erklären, wie ich aus dem einen format das andere bekomme. wenn möglich detailiert ?
jeder tipp ist hilfreich !
danke schon mal im voraus.